1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
50 #include "insn-codes.h"
52 #include "diagnostic-core.h"
55 #include "dominance.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
65 #include "plugin-api.h"
72 #include "sched-int.h"
73 #include "target-def.h"
75 #include "langhooks.h"
82 #include "gimple-expr.h"
84 #include "tm-constrs.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode
;
89 typedef struct minipool_fixup Mfix
;
91 void (*arm_lang_output_object_attributes_hook
)(void);
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx
);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets
*arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
106 HOST_WIDE_INT
, rtx
, rtx
, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx
, int);
109 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
110 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
111 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
112 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
113 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
114 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
115 inline static int thumb1_index_register_rtx_p (rtx
, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx
, unsigned int, int);
120 static void arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
);
121 static void arm_print_operand (FILE *, rtx
, int);
122 static void arm_print_operand_address (FILE *, rtx
);
123 static bool arm_print_operand_punct_valid_p (unsigned char code
);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
125 static arm_cc
get_arm_condition_code (rtx
);
126 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
127 static const char *output_multi_immediate (rtx
*, const char *, const char *,
129 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
130 static struct machine_function
*arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
133 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
134 static Mnode
*add_minipool_forward_ref (Mfix
*);
135 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
136 static Mnode
*add_minipool_backward_ref (Mfix
*);
137 static void assign_minipool_offsets (Mfix
*);
138 static void arm_print_value (FILE *, rtx
);
139 static void dump_minipool (rtx_insn
*);
140 static int arm_barrier_cost (rtx
);
141 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
142 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
143 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree
);
150 static unsigned long arm_compute_func_type (void);
151 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
159 static int arm_comp_type_attributes (const_tree
, const_tree
);
160 static void arm_set_default_type_attributes (tree
);
161 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
);
166 static int optimal_immediate_sequence_1 (enum rtx_code code
,
167 unsigned HOST_WIDE_INT val
,
168 struct four_ints
*return_sequence
,
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree
, tree
);
172 static machine_mode
arm_promote_function_mode (const_tree
,
175 static bool arm_return_in_memory (const_tree
, const_tree
);
176 static rtx
arm_function_value (const_tree
, const_tree
, bool);
177 static rtx
arm_libcall_value_1 (machine_mode
);
178 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
185 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
186 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
187 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
188 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
191 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
192 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
193 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
194 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
195 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
196 static void arm_init_builtins (void);
197 static void arm_init_iwmmxt_builtins (void);
198 static rtx
safe_vector_operand (rtx
, machine_mode
);
199 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
200 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
201 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, machine_mode
, int);
202 static tree
arm_builtin_decl (unsigned, bool);
203 static void emit_constant_insn (rtx cond
, rtx pattern
);
204 static rtx_insn
*emit_set_insn (rtx
, rtx
);
205 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
206 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
208 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
210 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
212 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
213 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
215 static rtx
aapcs_libcall_value (machine_mode
);
216 static int aapcs_select_return_coproc (const_tree
, const_tree
);
218 #ifdef OBJECT_FORMAT_ELF
219 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
220 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
223 static void arm_encode_section_info (tree
, rtx
, int);
226 static void arm_file_end (void);
227 static void arm_file_start (void);
229 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
231 static bool arm_pass_by_reference (cumulative_args_t
,
232 machine_mode
, const_tree
, bool);
233 static bool arm_promote_prototypes (const_tree
);
234 static bool arm_default_short_enums (void);
235 static bool arm_align_anon_bitfield (void);
236 static bool arm_return_in_msb (const_tree
);
237 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
238 static bool arm_return_in_memory (const_tree
, const_tree
);
240 static void arm_unwind_emit (FILE *, rtx_insn
*);
241 static bool arm_output_ttype (rtx
);
242 static void arm_asm_emit_except_personality (rtx
);
243 static void arm_asm_init_sections (void);
245 static rtx
arm_dwarf_register_span (rtx
);
247 static tree
arm_cxx_guard_type (void);
248 static bool arm_cxx_guard_mask_bit (void);
249 static tree
arm_get_cookie_size (tree
);
250 static bool arm_cookie_has_size (void);
251 static bool arm_cxx_cdtor_returns_this (void);
252 static bool arm_cxx_key_method_may_be_inline (void);
253 static void arm_cxx_determine_class_data_visibility (tree
);
254 static bool arm_cxx_class_data_always_comdat (void);
255 static bool arm_cxx_use_aeabi_atexit (void);
256 static void arm_init_libfuncs (void);
257 static tree
arm_build_builtin_va_list (void);
258 static void arm_expand_builtin_va_start (tree
, rtx
);
259 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
260 static void arm_option_override (void);
261 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
262 static bool arm_cannot_copy_insn_p (rtx_insn
*);
263 static int arm_issue_rate (void);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
265 static bool arm_output_addr_const_extra (FILE *, rtx
);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree
);
268 static const char *arm_invalid_parameter_type (const_tree t
);
269 static const char *arm_invalid_return_type (const_tree t
);
270 static tree
arm_promoted_type (const_tree t
);
271 static tree
arm_convert_to_type (tree type
, tree expr
);
272 static bool arm_scalar_mode_supported_p (machine_mode
);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx
, tree
, rtx
);
277 static rtx
arm_trampoline_adjust_address (rtx
);
278 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
282 static bool arm_array_mode_supported_p (machine_mode
,
283 unsigned HOST_WIDE_INT
);
284 static machine_mode
arm_preferred_simd_mode (machine_mode
);
285 static bool arm_class_likely_spilled_p (reg_class_t
);
286 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
287 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
292 static void arm_conditional_register_usage (void);
293 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
294 static unsigned int arm_autovectorize_vector_sizes (void);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
300 const unsigned char *sel
);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
304 int misalign ATTRIBUTE_UNUSED
);
305 static unsigned arm_add_stmt_cost (void *data
, int count
,
306 enum vect_cost_for_stmt kind
,
307 struct _stmt_vec_info
*stmt_info
,
309 enum vect_cost_model_location where
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table
[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
323 { "long_call", 0, 0, false, true, true, NULL
, false },
324 /* Whereas these functions are always known to reside within the 26 bit
326 { "short_call", 0, 0, false, true, true, NULL
, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
338 /* ARM/PE has three new attributes:
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
347 { "dllimport", 0, 0, true, false, false, NULL
, false },
348 { "dllexport", 0, 0, true, false, false, NULL
, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
357 { NULL
, 0, 0, false, false, false, NULL
, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #define TARGET_LRA_P arm_lra_p
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_OPTION_OVERRIDE
402 #define TARGET_OPTION_OVERRIDE arm_option_override
404 #undef TARGET_COMP_TYPE_ATTRIBUTES
405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
407 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
410 #undef TARGET_SCHED_ADJUST_COST
411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
413 #undef TARGET_SCHED_REORDER
414 #define TARGET_SCHED_REORDER arm_sched_reorder
416 #undef TARGET_REGISTER_MOVE_COST
417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
419 #undef TARGET_MEMORY_MOVE_COST
420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
422 #undef TARGET_ENCODE_SECTION_INFO
424 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
426 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
429 #undef TARGET_STRIP_NAME_ENCODING
430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
432 #undef TARGET_ASM_INTERNAL_LABEL
433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
438 #undef TARGET_FUNCTION_VALUE
439 #define TARGET_FUNCTION_VALUE arm_function_value
441 #undef TARGET_LIBCALL_VALUE
442 #define TARGET_LIBCALL_VALUE arm_libcall_value
444 #undef TARGET_FUNCTION_VALUE_REGNO_P
445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
447 #undef TARGET_ASM_OUTPUT_MI_THUNK
448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
452 #undef TARGET_RTX_COSTS
453 #define TARGET_RTX_COSTS arm_rtx_costs
454 #undef TARGET_ADDRESS_COST
455 #define TARGET_ADDRESS_COST arm_address_cost
457 #undef TARGET_SHIFT_TRUNCATION_MASK
458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
467 arm_autovectorize_vector_sizes
469 #undef TARGET_MACHINE_DEPENDENT_REORG
470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS arm_init_builtins
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
476 #undef TARGET_BUILTIN_DECL
477 #define TARGET_BUILTIN_DECL arm_builtin_decl
479 #undef TARGET_INIT_LIBFUNCS
480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
482 #undef TARGET_PROMOTE_FUNCTION_MODE
483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
486 #undef TARGET_PASS_BY_REFERENCE
487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
488 #undef TARGET_ARG_PARTIAL_BYTES
489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
490 #undef TARGET_FUNCTION_ARG
491 #define TARGET_FUNCTION_ARG arm_function_arg
492 #undef TARGET_FUNCTION_ARG_ADVANCE
493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
494 #undef TARGET_FUNCTION_ARG_BOUNDARY
495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
497 #undef TARGET_SETUP_INCOMING_VARARGS
498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
505 #undef TARGET_TRAMPOLINE_INIT
506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
510 #undef TARGET_WARN_FUNC_RETURN
511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
513 #undef TARGET_DEFAULT_SHORT_ENUMS
514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
516 #undef TARGET_ALIGN_ANON_BITFIELD
517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
519 #undef TARGET_NARROW_VOLATILE_BITFIELD
520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
522 #undef TARGET_CXX_GUARD_TYPE
523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
525 #undef TARGET_CXX_GUARD_MASK_BIT
526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
528 #undef TARGET_CXX_GET_COOKIE_SIZE
529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
531 #undef TARGET_CXX_COOKIE_HAS_SIZE
532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
540 #undef TARGET_CXX_USE_AEABI_ATEXIT
541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
545 arm_cxx_determine_class_data_visibility
547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
550 #undef TARGET_RETURN_IN_MSB
551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
553 #undef TARGET_RETURN_IN_MEMORY
554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
556 #undef TARGET_MUST_PASS_IN_STACK
557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
560 #undef TARGET_ASM_UNWIND_EMIT
561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
563 /* EABI unwinding tables use a different format for the typeinfo tables. */
564 #undef TARGET_ASM_TTYPE
565 #define TARGET_ASM_TTYPE arm_output_ttype
567 #undef TARGET_ARM_EABI_UNWINDER
568 #define TARGET_ARM_EABI_UNWINDER true
570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
573 #undef TARGET_ASM_INIT_SECTIONS
574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
575 #endif /* ARM_UNWIND_INFO */
577 #undef TARGET_DWARF_REGISTER_SPAN
578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
580 #undef TARGET_CANNOT_COPY_INSN_P
581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
584 #undef TARGET_HAVE_TLS
585 #define TARGET_HAVE_TLS true
588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
591 #undef TARGET_LEGITIMATE_CONSTANT_P
592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
594 #undef TARGET_CANNOT_FORCE_CONST_MEM
595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
597 #undef TARGET_MAX_ANCHOR_OFFSET
598 #define TARGET_MAX_ANCHOR_OFFSET 4095
600 /* The minimum is set such that the total size of the block
601 for a particular anchor is -4088 + 1 + 4095 bytes, which is
602 divisible by eight, ensuring natural spacing of anchors. */
603 #undef TARGET_MIN_ANCHOR_OFFSET
604 #define TARGET_MIN_ANCHOR_OFFSET -4088
606 #undef TARGET_SCHED_ISSUE_RATE
607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE arm_mangle_type
612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
615 #undef TARGET_BUILD_BUILTIN_VA_LIST
616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
617 #undef TARGET_EXPAND_BUILTIN_VA_START
618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
627 #undef TARGET_LEGITIMATE_ADDRESS_P
628 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
633 #undef TARGET_INVALID_PARAMETER_TYPE
634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
636 #undef TARGET_INVALID_RETURN_TYPE
637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
639 #undef TARGET_PROMOTED_TYPE
640 #define TARGET_PROMOTED_TYPE arm_promoted_type
642 #undef TARGET_CONVERT_TO_TYPE
643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
648 #undef TARGET_FRAME_POINTER_REQUIRED
649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
651 #undef TARGET_CAN_ELIMINATE
652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
657 #undef TARGET_CLASS_LIKELY_SPILLED_P
658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
660 #undef TARGET_VECTORIZE_BUILTINS
661 #define TARGET_VECTORIZE_BUILTINS
663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
665 arm_builtin_vectorized_function
667 #undef TARGET_VECTOR_ALIGNMENT
668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
672 arm_vector_alignment_reachable
674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
676 arm_builtin_support_vector_misalignment
678 #undef TARGET_PREFERRED_RENAME_CLASS
679 #define TARGET_PREFERRED_RENAME_CLASS \
680 arm_preferred_rename_class
682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
684 arm_vectorize_vec_perm_const_ok
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
688 arm_builtin_vectorization_cost
689 #undef TARGET_VECTORIZE_ADD_STMT_COST
690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
692 #undef TARGET_CANONICALIZE_COMPARISON
693 #define TARGET_CANONICALIZE_COMPARISON \
694 arm_canonicalize_comparison
696 #undef TARGET_ASAN_SHADOW_OFFSET
697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
699 #undef MAX_INSN_PER_IT_BLOCK
700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
702 #undef TARGET_CAN_USE_DOLOOP_P
703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
711 struct gcc_target targetm
= TARGET_INITIALIZER
;
713 /* Obstack for minipool constant handling. */
714 static struct obstack minipool_obstack
;
715 static char * minipool_startobj
;
717 /* The maximum number of insns skipped which
718 will be conditionalised if possible. */
719 static int max_insns_skipped
= 5;
721 extern FILE * asm_out_file
;
723 /* True if we are currently building a constant table. */
724 int making_const_table
;
726 /* The processor for which instructions should be scheduled. */
727 enum processor_type arm_tune
= arm_none
;
729 /* The current tuning set. */
730 const struct tune_params
*current_tune
;
732 /* Which floating point hardware to schedule for. */
735 /* Which floating popint hardware to use. */
736 const struct arm_fpu_desc
*arm_fpu_desc
;
738 /* Used for Thumb call_via trampolines. */
739 rtx thumb_call_via_label
[14];
740 static int thumb_call_reg_needed
;
742 /* Bit values used to identify processor capabilities. */
743 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
744 #define FL_ARCH3M (1 << 1) /* Extended multiply */
745 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
746 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
747 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
748 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
749 #define FL_THUMB (1 << 6) /* Thumb aware */
750 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
751 #define FL_STRONG (1 << 8) /* StrongARM */
752 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
753 #define FL_XSCALE (1 << 10) /* XScale */
754 /* spare (1 << 11) */
755 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
756 media instructions. */
757 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
758 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
759 Note: ARM6 & 7 derivatives only. */
760 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
761 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
762 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
764 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
765 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
766 #define FL_NEON (1 << 20) /* Neon instructions. */
767 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
769 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
770 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
771 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
772 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
774 #define FL_SMALLMUL (1 << 26) /* Small multiply supported. */
776 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
777 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
779 /* Flags that only effect tuning, not available instructions. */
780 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
783 #define FL_FOR_ARCH2 FL_NOTM
784 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
785 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
786 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
787 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
788 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
789 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
790 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
791 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
792 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
793 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
794 #define FL_FOR_ARCH6J FL_FOR_ARCH6
795 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
796 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
797 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
798 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
799 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
800 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
801 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
802 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
803 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
804 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
805 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
806 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
808 /* The bits in this mask specify which
809 instructions we are allowed to generate. */
810 static unsigned long insn_flags
= 0;
812 /* The bits in this mask specify which instruction scheduling options should
814 static unsigned long tune_flags
= 0;
816 /* The highest ARM architecture version supported by the
818 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
820 /* The following are used in the arm.md file as equivalents to bits
821 in the above two flag variables. */
823 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
826 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
829 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
832 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
835 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
838 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
841 /* Nonzero if this chip supports the ARM 6K extensions. */
844 /* Nonzero if instructions present in ARMv6-M can be used. */
847 /* Nonzero if this chip supports the ARM 7 extensions. */
850 /* Nonzero if instructions not present in the 'M' profile can be used. */
851 int arm_arch_notm
= 0;
853 /* Nonzero if instructions present in ARMv7E-M can be used. */
856 /* Nonzero if instructions present in ARMv8 can be used. */
859 /* Nonzero if this chip can benefit from load scheduling. */
860 int arm_ld_sched
= 0;
862 /* Nonzero if this chip is a StrongARM. */
863 int arm_tune_strongarm
= 0;
865 /* Nonzero if this chip supports Intel Wireless MMX technology. */
866 int arm_arch_iwmmxt
= 0;
868 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
869 int arm_arch_iwmmxt2
= 0;
871 /* Nonzero if this chip is an XScale. */
872 int arm_arch_xscale
= 0;
874 /* Nonzero if tuning for XScale */
875 int arm_tune_xscale
= 0;
877 /* Nonzero if we want to tune for stores that access the write-buffer.
878 This typically means an ARM6 or ARM7 with MMU or MPU. */
879 int arm_tune_wbuf
= 0;
881 /* Nonzero if tuning for Cortex-A9. */
882 int arm_tune_cortex_a9
= 0;
884 /* Nonzero if generating Thumb instructions. */
887 /* Nonzero if generating Thumb-1 instructions. */
890 /* Nonzero if we should define __THUMB_INTERWORK__ in the
892 XXX This is a bit of a hack, it's intended to help work around
893 problems in GLD which doesn't understand that armv5t code is
894 interworking clean. */
895 int arm_cpp_interwork
= 0;
897 /* Nonzero if chip supports Thumb 2. */
900 /* Nonzero if chip supports integer division instruction. */
901 int arm_arch_arm_hwdiv
;
902 int arm_arch_thumb_hwdiv
;
904 /* Nonzero if we should use Neon to handle 64-bits operations rather
905 than core registers. */
906 int prefer_neon_for_64bits
= 0;
908 /* Nonzero if we shouldn't use literal pools. */
909 bool arm_disable_literal_pool
= false;
911 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
912 we must report the mode of the memory reference from
913 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
914 machine_mode output_memory_reference_mode
;
916 /* The register number to be used for the PIC offset register. */
917 unsigned arm_pic_register
= INVALID_REGNUM
;
919 enum arm_pcs arm_pcs_default
;
921 /* For an explanation of these variables, see final_prescan_insn below. */
923 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
924 enum arm_cond_code arm_current_cc
;
927 int arm_target_label
;
928 /* The number of conditionally executed insns, including the current insn. */
929 int arm_condexec_count
= 0;
930 /* A bitmask specifying the patterns for the IT block.
931 Zero means do not output an IT block before this insn. */
932 int arm_condexec_mask
= 0;
933 /* The number of bits used in arm_condexec_mask. */
934 int arm_condexec_masklen
= 0;
936 /* Nonzero if chip supports the ARMv8 CRC instructions. */
937 int arm_arch_crc
= 0;
939 /* Nonzero if the core has a very small, high-latency, multiply unit. */
940 int arm_m_profile_small_mul
= 0;
942 /* The condition codes of the ARM, and the inverse function. */
943 static const char * const arm_condition_codes
[] =
945 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
946 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
949 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
950 int arm_regs_in_sequence
[] =
952 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
955 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
956 #define streq(string1, string2) (strcmp (string1, string2) == 0)
958 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
959 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
960 | (1 << PIC_OFFSET_TABLE_REGNUM)))
962 /* Initialization code. */
966 const char *const name
;
967 enum processor_type core
;
969 enum base_architecture base_arch
;
970 const unsigned long flags
;
971 const struct tune_params
*const tune
;
975 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
976 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
981 /* arm generic vectorizer costs. */
983 struct cpu_vec_costs arm_default_vec_cost
= {
984 1, /* scalar_stmt_cost. */
985 1, /* scalar load_cost. */
986 1, /* scalar_store_cost. */
987 1, /* vec_stmt_cost. */
988 1, /* vec_to_scalar_cost. */
989 1, /* scalar_to_vec_cost. */
990 1, /* vec_align_load_cost. */
991 1, /* vec_unalign_load_cost. */
992 1, /* vec_unalign_store_cost. */
993 1, /* vec_store_cost. */
994 3, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
999 #include "aarch-cost-tables.h"
1003 const struct cpu_cost_table cortexa9_extra_costs
=
1010 COSTS_N_INSNS (1), /* shift_reg. */
1011 COSTS_N_INSNS (1), /* arith_shift. */
1012 COSTS_N_INSNS (2), /* arith_shift_reg. */
1014 COSTS_N_INSNS (1), /* log_shift_reg. */
1015 COSTS_N_INSNS (1), /* extend. */
1016 COSTS_N_INSNS (2), /* extend_arith. */
1017 COSTS_N_INSNS (1), /* bfi. */
1018 COSTS_N_INSNS (1), /* bfx. */
1022 true /* non_exec_costs_exec. */
1027 COSTS_N_INSNS (3), /* simple. */
1028 COSTS_N_INSNS (3), /* flag_setting. */
1029 COSTS_N_INSNS (2), /* extend. */
1030 COSTS_N_INSNS (3), /* add. */
1031 COSTS_N_INSNS (2), /* extend_add. */
1032 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1036 0, /* simple (N/A). */
1037 0, /* flag_setting (N/A). */
1038 COSTS_N_INSNS (4), /* extend. */
1040 COSTS_N_INSNS (4), /* extend_add. */
1046 COSTS_N_INSNS (2), /* load. */
1047 COSTS_N_INSNS (2), /* load_sign_extend. */
1048 COSTS_N_INSNS (2), /* ldrd. */
1049 COSTS_N_INSNS (2), /* ldm_1st. */
1050 1, /* ldm_regs_per_insn_1st. */
1051 2, /* ldm_regs_per_insn_subsequent. */
1052 COSTS_N_INSNS (5), /* loadf. */
1053 COSTS_N_INSNS (5), /* loadd. */
1054 COSTS_N_INSNS (1), /* load_unaligned. */
1055 COSTS_N_INSNS (2), /* store. */
1056 COSTS_N_INSNS (2), /* strd. */
1057 COSTS_N_INSNS (2), /* stm_1st. */
1058 1, /* stm_regs_per_insn_1st. */
1059 2, /* stm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (1), /* storef. */
1061 COSTS_N_INSNS (1), /* stored. */
1062 COSTS_N_INSNS (1) /* store_unaligned. */
1067 COSTS_N_INSNS (14), /* div. */
1068 COSTS_N_INSNS (4), /* mult. */
1069 COSTS_N_INSNS (7), /* mult_addsub. */
1070 COSTS_N_INSNS (30), /* fma. */
1071 COSTS_N_INSNS (3), /* addsub. */
1072 COSTS_N_INSNS (1), /* fpconst. */
1073 COSTS_N_INSNS (1), /* neg. */
1074 COSTS_N_INSNS (3), /* compare. */
1075 COSTS_N_INSNS (3), /* widen. */
1076 COSTS_N_INSNS (3), /* narrow. */
1077 COSTS_N_INSNS (3), /* toint. */
1078 COSTS_N_INSNS (3), /* fromint. */
1079 COSTS_N_INSNS (3) /* roundint. */
1083 COSTS_N_INSNS (24), /* div. */
1084 COSTS_N_INSNS (5), /* mult. */
1085 COSTS_N_INSNS (8), /* mult_addsub. */
1086 COSTS_N_INSNS (30), /* fma. */
1087 COSTS_N_INSNS (3), /* addsub. */
1088 COSTS_N_INSNS (1), /* fpconst. */
1089 COSTS_N_INSNS (1), /* neg. */
1090 COSTS_N_INSNS (3), /* compare. */
1091 COSTS_N_INSNS (3), /* widen. */
1092 COSTS_N_INSNS (3), /* narrow. */
1093 COSTS_N_INSNS (3), /* toint. */
1094 COSTS_N_INSNS (3), /* fromint. */
1095 COSTS_N_INSNS (3) /* roundint. */
1100 COSTS_N_INSNS (1) /* alu. */
1104 const struct cpu_cost_table cortexa8_extra_costs
=
1110 COSTS_N_INSNS (1), /* shift. */
1112 COSTS_N_INSNS (1), /* arith_shift. */
1113 0, /* arith_shift_reg. */
1114 COSTS_N_INSNS (1), /* log_shift. */
1115 0, /* log_shift_reg. */
1117 0, /* extend_arith. */
1123 true /* non_exec_costs_exec. */
1128 COSTS_N_INSNS (1), /* simple. */
1129 COSTS_N_INSNS (1), /* flag_setting. */
1130 COSTS_N_INSNS (1), /* extend. */
1131 COSTS_N_INSNS (1), /* add. */
1132 COSTS_N_INSNS (1), /* extend_add. */
1133 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1137 0, /* simple (N/A). */
1138 0, /* flag_setting (N/A). */
1139 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (2), /* extend_add. */
1147 COSTS_N_INSNS (1), /* load. */
1148 COSTS_N_INSNS (1), /* load_sign_extend. */
1149 COSTS_N_INSNS (1), /* ldrd. */
1150 COSTS_N_INSNS (1), /* ldm_1st. */
1151 1, /* ldm_regs_per_insn_1st. */
1152 2, /* ldm_regs_per_insn_subsequent. */
1153 COSTS_N_INSNS (1), /* loadf. */
1154 COSTS_N_INSNS (1), /* loadd. */
1155 COSTS_N_INSNS (1), /* load_unaligned. */
1156 COSTS_N_INSNS (1), /* store. */
1157 COSTS_N_INSNS (1), /* strd. */
1158 COSTS_N_INSNS (1), /* stm_1st. */
1159 1, /* stm_regs_per_insn_1st. */
1160 2, /* stm_regs_per_insn_subsequent. */
1161 COSTS_N_INSNS (1), /* storef. */
1162 COSTS_N_INSNS (1), /* stored. */
1163 COSTS_N_INSNS (1) /* store_unaligned. */
1168 COSTS_N_INSNS (36), /* div. */
1169 COSTS_N_INSNS (11), /* mult. */
1170 COSTS_N_INSNS (20), /* mult_addsub. */
1171 COSTS_N_INSNS (30), /* fma. */
1172 COSTS_N_INSNS (9), /* addsub. */
1173 COSTS_N_INSNS (3), /* fpconst. */
1174 COSTS_N_INSNS (3), /* neg. */
1175 COSTS_N_INSNS (6), /* compare. */
1176 COSTS_N_INSNS (4), /* widen. */
1177 COSTS_N_INSNS (4), /* narrow. */
1178 COSTS_N_INSNS (8), /* toint. */
1179 COSTS_N_INSNS (8), /* fromint. */
1180 COSTS_N_INSNS (8) /* roundint. */
1184 COSTS_N_INSNS (64), /* div. */
1185 COSTS_N_INSNS (16), /* mult. */
1186 COSTS_N_INSNS (25), /* mult_addsub. */
1187 COSTS_N_INSNS (30), /* fma. */
1188 COSTS_N_INSNS (9), /* addsub. */
1189 COSTS_N_INSNS (3), /* fpconst. */
1190 COSTS_N_INSNS (3), /* neg. */
1191 COSTS_N_INSNS (6), /* compare. */
1192 COSTS_N_INSNS (6), /* widen. */
1193 COSTS_N_INSNS (6), /* narrow. */
1194 COSTS_N_INSNS (8), /* toint. */
1195 COSTS_N_INSNS (8), /* fromint. */
1196 COSTS_N_INSNS (8) /* roundint. */
1201 COSTS_N_INSNS (1) /* alu. */
1205 const struct cpu_cost_table cortexa5_extra_costs
=
1211 COSTS_N_INSNS (1), /* shift. */
1212 COSTS_N_INSNS (1), /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 COSTS_N_INSNS (1), /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 COSTS_N_INSNS (1), /* log_shift_reg. */
1217 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (1), /* extend_arith. */
1219 COSTS_N_INSNS (1), /* bfi. */
1220 COSTS_N_INSNS (1), /* bfx. */
1221 COSTS_N_INSNS (1), /* clz. */
1222 COSTS_N_INSNS (1), /* rev. */
1224 true /* non_exec_costs_exec. */
1231 COSTS_N_INSNS (1), /* flag_setting. */
1232 COSTS_N_INSNS (1), /* extend. */
1233 COSTS_N_INSNS (1), /* add. */
1234 COSTS_N_INSNS (1), /* extend_add. */
1235 COSTS_N_INSNS (7) /* idiv. */
1239 0, /* simple (N/A). */
1240 0, /* flag_setting (N/A). */
1241 COSTS_N_INSNS (1), /* extend. */
1243 COSTS_N_INSNS (2), /* extend_add. */
1249 COSTS_N_INSNS (1), /* load. */
1250 COSTS_N_INSNS (1), /* load_sign_extend. */
1251 COSTS_N_INSNS (6), /* ldrd. */
1252 COSTS_N_INSNS (1), /* ldm_1st. */
1253 1, /* ldm_regs_per_insn_1st. */
1254 2, /* ldm_regs_per_insn_subsequent. */
1255 COSTS_N_INSNS (2), /* loadf. */
1256 COSTS_N_INSNS (4), /* loadd. */
1257 COSTS_N_INSNS (1), /* load_unaligned. */
1258 COSTS_N_INSNS (1), /* store. */
1259 COSTS_N_INSNS (3), /* strd. */
1260 COSTS_N_INSNS (1), /* stm_1st. */
1261 1, /* stm_regs_per_insn_1st. */
1262 2, /* stm_regs_per_insn_subsequent. */
1263 COSTS_N_INSNS (2), /* storef. */
1264 COSTS_N_INSNS (2), /* stored. */
1265 COSTS_N_INSNS (1) /* store_unaligned. */
1270 COSTS_N_INSNS (15), /* div. */
1271 COSTS_N_INSNS (3), /* mult. */
1272 COSTS_N_INSNS (7), /* mult_addsub. */
1273 COSTS_N_INSNS (7), /* fma. */
1274 COSTS_N_INSNS (3), /* addsub. */
1275 COSTS_N_INSNS (3), /* fpconst. */
1276 COSTS_N_INSNS (3), /* neg. */
1277 COSTS_N_INSNS (3), /* compare. */
1278 COSTS_N_INSNS (3), /* widen. */
1279 COSTS_N_INSNS (3), /* narrow. */
1280 COSTS_N_INSNS (3), /* toint. */
1281 COSTS_N_INSNS (3), /* fromint. */
1282 COSTS_N_INSNS (3) /* roundint. */
1286 COSTS_N_INSNS (30), /* div. */
1287 COSTS_N_INSNS (6), /* mult. */
1288 COSTS_N_INSNS (10), /* mult_addsub. */
1289 COSTS_N_INSNS (7), /* fma. */
1290 COSTS_N_INSNS (3), /* addsub. */
1291 COSTS_N_INSNS (3), /* fpconst. */
1292 COSTS_N_INSNS (3), /* neg. */
1293 COSTS_N_INSNS (3), /* compare. */
1294 COSTS_N_INSNS (3), /* widen. */
1295 COSTS_N_INSNS (3), /* narrow. */
1296 COSTS_N_INSNS (3), /* toint. */
1297 COSTS_N_INSNS (3), /* fromint. */
1298 COSTS_N_INSNS (3) /* roundint. */
1303 COSTS_N_INSNS (1) /* alu. */
1308 const struct cpu_cost_table cortexa7_extra_costs
=
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1327 true /* non_exec_costs_exec. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (3), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (2), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1) /* store_unaligned. */
1373 COSTS_N_INSNS (15), /* div. */
1374 COSTS_N_INSNS (3), /* mult. */
1375 COSTS_N_INSNS (7), /* mult_addsub. */
1376 COSTS_N_INSNS (7), /* fma. */
1377 COSTS_N_INSNS (3), /* addsub. */
1378 COSTS_N_INSNS (3), /* fpconst. */
1379 COSTS_N_INSNS (3), /* neg. */
1380 COSTS_N_INSNS (3), /* compare. */
1381 COSTS_N_INSNS (3), /* widen. */
1382 COSTS_N_INSNS (3), /* narrow. */
1383 COSTS_N_INSNS (3), /* toint. */
1384 COSTS_N_INSNS (3), /* fromint. */
1385 COSTS_N_INSNS (3) /* roundint. */
1389 COSTS_N_INSNS (30), /* div. */
1390 COSTS_N_INSNS (6), /* mult. */
1391 COSTS_N_INSNS (10), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1406 COSTS_N_INSNS (1) /* alu. */
1410 const struct cpu_cost_table cortexa12_extra_costs
=
1417 COSTS_N_INSNS (1), /* shift_reg. */
1418 COSTS_N_INSNS (1), /* arith_shift. */
1419 COSTS_N_INSNS (1), /* arith_shift_reg. */
1420 COSTS_N_INSNS (1), /* log_shift. */
1421 COSTS_N_INSNS (1), /* log_shift_reg. */
1423 COSTS_N_INSNS (1), /* extend_arith. */
1425 COSTS_N_INSNS (1), /* bfx. */
1426 COSTS_N_INSNS (1), /* clz. */
1427 COSTS_N_INSNS (1), /* rev. */
1429 true /* non_exec_costs_exec. */
1434 COSTS_N_INSNS (2), /* simple. */
1435 COSTS_N_INSNS (3), /* flag_setting. */
1436 COSTS_N_INSNS (2), /* extend. */
1437 COSTS_N_INSNS (3), /* add. */
1438 COSTS_N_INSNS (2), /* extend_add. */
1439 COSTS_N_INSNS (18) /* idiv. */
1443 0, /* simple (N/A). */
1444 0, /* flag_setting (N/A). */
1445 COSTS_N_INSNS (3), /* extend. */
1447 COSTS_N_INSNS (3), /* extend_add. */
1453 COSTS_N_INSNS (3), /* load. */
1454 COSTS_N_INSNS (3), /* load_sign_extend. */
1455 COSTS_N_INSNS (3), /* ldrd. */
1456 COSTS_N_INSNS (3), /* ldm_1st. */
1457 1, /* ldm_regs_per_insn_1st. */
1458 2, /* ldm_regs_per_insn_subsequent. */
1459 COSTS_N_INSNS (3), /* loadf. */
1460 COSTS_N_INSNS (3), /* loadd. */
1461 0, /* load_unaligned. */
1465 1, /* stm_regs_per_insn_1st. */
1466 2, /* stm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* storef. */
1468 COSTS_N_INSNS (2), /* stored. */
1469 0 /* store_unaligned. */
1474 COSTS_N_INSNS (17), /* div. */
1475 COSTS_N_INSNS (4), /* mult. */
1476 COSTS_N_INSNS (8), /* mult_addsub. */
1477 COSTS_N_INSNS (8), /* fma. */
1478 COSTS_N_INSNS (4), /* addsub. */
1479 COSTS_N_INSNS (2), /* fpconst. */
1480 COSTS_N_INSNS (2), /* neg. */
1481 COSTS_N_INSNS (2), /* compare. */
1482 COSTS_N_INSNS (4), /* widen. */
1483 COSTS_N_INSNS (4), /* narrow. */
1484 COSTS_N_INSNS (4), /* toint. */
1485 COSTS_N_INSNS (4), /* fromint. */
1486 COSTS_N_INSNS (4) /* roundint. */
1490 COSTS_N_INSNS (31), /* div. */
1491 COSTS_N_INSNS (4), /* mult. */
1492 COSTS_N_INSNS (8), /* mult_addsub. */
1493 COSTS_N_INSNS (8), /* fma. */
1494 COSTS_N_INSNS (4), /* addsub. */
1495 COSTS_N_INSNS (2), /* fpconst. */
1496 COSTS_N_INSNS (2), /* neg. */
1497 COSTS_N_INSNS (2), /* compare. */
1498 COSTS_N_INSNS (4), /* widen. */
1499 COSTS_N_INSNS (4), /* narrow. */
1500 COSTS_N_INSNS (4), /* toint. */
1501 COSTS_N_INSNS (4), /* fromint. */
1502 COSTS_N_INSNS (4) /* roundint. */
1507 COSTS_N_INSNS (1) /* alu. */
1511 const struct cpu_cost_table cortexa15_extra_costs
=
1519 COSTS_N_INSNS (1), /* arith_shift. */
1520 COSTS_N_INSNS (1), /* arith_shift_reg. */
1521 COSTS_N_INSNS (1), /* log_shift. */
1522 COSTS_N_INSNS (1), /* log_shift_reg. */
1524 COSTS_N_INSNS (1), /* extend_arith. */
1525 COSTS_N_INSNS (1), /* bfi. */
1530 true /* non_exec_costs_exec. */
1535 COSTS_N_INSNS (2), /* simple. */
1536 COSTS_N_INSNS (3), /* flag_setting. */
1537 COSTS_N_INSNS (2), /* extend. */
1538 COSTS_N_INSNS (2), /* add. */
1539 COSTS_N_INSNS (2), /* extend_add. */
1540 COSTS_N_INSNS (18) /* idiv. */
1544 0, /* simple (N/A). */
1545 0, /* flag_setting (N/A). */
1546 COSTS_N_INSNS (3), /* extend. */
1548 COSTS_N_INSNS (3), /* extend_add. */
1554 COSTS_N_INSNS (3), /* load. */
1555 COSTS_N_INSNS (3), /* load_sign_extend. */
1556 COSTS_N_INSNS (3), /* ldrd. */
1557 COSTS_N_INSNS (4), /* ldm_1st. */
1558 1, /* ldm_regs_per_insn_1st. */
1559 2, /* ldm_regs_per_insn_subsequent. */
1560 COSTS_N_INSNS (4), /* loadf. */
1561 COSTS_N_INSNS (4), /* loadd. */
1562 0, /* load_unaligned. */
1565 COSTS_N_INSNS (1), /* stm_1st. */
1566 1, /* stm_regs_per_insn_1st. */
1567 2, /* stm_regs_per_insn_subsequent. */
1570 0 /* store_unaligned. */
1575 COSTS_N_INSNS (17), /* div. */
1576 COSTS_N_INSNS (4), /* mult. */
1577 COSTS_N_INSNS (8), /* mult_addsub. */
1578 COSTS_N_INSNS (8), /* fma. */
1579 COSTS_N_INSNS (4), /* addsub. */
1580 COSTS_N_INSNS (2), /* fpconst. */
1581 COSTS_N_INSNS (2), /* neg. */
1582 COSTS_N_INSNS (5), /* compare. */
1583 COSTS_N_INSNS (4), /* widen. */
1584 COSTS_N_INSNS (4), /* narrow. */
1585 COSTS_N_INSNS (4), /* toint. */
1586 COSTS_N_INSNS (4), /* fromint. */
1587 COSTS_N_INSNS (4) /* roundint. */
1591 COSTS_N_INSNS (31), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1608 COSTS_N_INSNS (1) /* alu. */
1612 const struct cpu_cost_table v7m_extra_costs
=
1620 0, /* arith_shift. */
1621 COSTS_N_INSNS (1), /* arith_shift_reg. */
1623 COSTS_N_INSNS (1), /* log_shift_reg. */
1625 COSTS_N_INSNS (1), /* extend_arith. */
1630 COSTS_N_INSNS (1), /* non_exec. */
1631 false /* non_exec_costs_exec. */
1636 COSTS_N_INSNS (1), /* simple. */
1637 COSTS_N_INSNS (1), /* flag_setting. */
1638 COSTS_N_INSNS (2), /* extend. */
1639 COSTS_N_INSNS (1), /* add. */
1640 COSTS_N_INSNS (3), /* extend_add. */
1641 COSTS_N_INSNS (8) /* idiv. */
1645 0, /* simple (N/A). */
1646 0, /* flag_setting (N/A). */
1647 COSTS_N_INSNS (2), /* extend. */
1649 COSTS_N_INSNS (3), /* extend_add. */
1655 COSTS_N_INSNS (2), /* load. */
1656 0, /* load_sign_extend. */
1657 COSTS_N_INSNS (3), /* ldrd. */
1658 COSTS_N_INSNS (2), /* ldm_1st. */
1659 1, /* ldm_regs_per_insn_1st. */
1660 1, /* ldm_regs_per_insn_subsequent. */
1661 COSTS_N_INSNS (2), /* loadf. */
1662 COSTS_N_INSNS (3), /* loadd. */
1663 COSTS_N_INSNS (1), /* load_unaligned. */
1664 COSTS_N_INSNS (2), /* store. */
1665 COSTS_N_INSNS (3), /* strd. */
1666 COSTS_N_INSNS (2), /* stm_1st. */
1667 1, /* stm_regs_per_insn_1st. */
1668 1, /* stm_regs_per_insn_subsequent. */
1669 COSTS_N_INSNS (2), /* storef. */
1670 COSTS_N_INSNS (3), /* stored. */
1671 COSTS_N_INSNS (1) /* store_unaligned. */
1676 COSTS_N_INSNS (7), /* div. */
1677 COSTS_N_INSNS (2), /* mult. */
1678 COSTS_N_INSNS (5), /* mult_addsub. */
1679 COSTS_N_INSNS (3), /* fma. */
1680 COSTS_N_INSNS (1), /* addsub. */
1692 COSTS_N_INSNS (15), /* div. */
1693 COSTS_N_INSNS (5), /* mult. */
1694 COSTS_N_INSNS (7), /* mult_addsub. */
1695 COSTS_N_INSNS (7), /* fma. */
1696 COSTS_N_INSNS (3), /* addsub. */
1709 COSTS_N_INSNS (1) /* alu. */
1713 const struct tune_params arm_slowmul_tune
=
1715 arm_slowmul_rtx_costs
,
1717 NULL
, /* Sched adj cost. */
1718 3, /* Constant limit. */
1719 5, /* Max cond insns. */
1720 ARM_PREFETCH_NOT_BENEFICIAL
,
1721 true, /* Prefer constant pool. */
1722 arm_default_branch_cost
,
1723 false, /* Prefer LDRD/STRD. */
1724 {true, true}, /* Prefer non short circuit. */
1725 &arm_default_vec_cost
, /* Vectorizer costs. */
1726 false, /* Prefer Neon for 64-bits bitops. */
1727 false, false, /* Prefer 32-bit encodings. */
1728 false, /* Prefer Neon for stringops. */
1729 8 /* Maximum insns to inline memset. */
1732 const struct tune_params arm_fastmul_tune
=
1734 arm_fastmul_rtx_costs
,
1736 NULL
, /* Sched adj cost. */
1737 1, /* Constant limit. */
1738 5, /* Max cond insns. */
1739 ARM_PREFETCH_NOT_BENEFICIAL
,
1740 true, /* Prefer constant pool. */
1741 arm_default_branch_cost
,
1742 false, /* Prefer LDRD/STRD. */
1743 {true, true}, /* Prefer non short circuit. */
1744 &arm_default_vec_cost
, /* Vectorizer costs. */
1745 false, /* Prefer Neon for 64-bits bitops. */
1746 false, false, /* Prefer 32-bit encodings. */
1747 false, /* Prefer Neon for stringops. */
1748 8 /* Maximum insns to inline memset. */
1751 /* StrongARM has early execution of branches, so a sequence that is worth
1752 skipping is shorter. Set max_insns_skipped to a lower value. */
1754 const struct tune_params arm_strongarm_tune
=
1756 arm_fastmul_rtx_costs
,
1758 NULL
, /* Sched adj cost. */
1759 1, /* Constant limit. */
1760 3, /* Max cond insns. */
1761 ARM_PREFETCH_NOT_BENEFICIAL
,
1762 true, /* Prefer constant pool. */
1763 arm_default_branch_cost
,
1764 false, /* Prefer LDRD/STRD. */
1765 {true, true}, /* Prefer non short circuit. */
1766 &arm_default_vec_cost
, /* Vectorizer costs. */
1767 false, /* Prefer Neon for 64-bits bitops. */
1768 false, false, /* Prefer 32-bit encodings. */
1769 false, /* Prefer Neon for stringops. */
1770 8 /* Maximum insns to inline memset. */
1773 const struct tune_params arm_xscale_tune
=
1775 arm_xscale_rtx_costs
,
1777 xscale_sched_adjust_cost
,
1778 2, /* Constant limit. */
1779 3, /* Max cond insns. */
1780 ARM_PREFETCH_NOT_BENEFICIAL
,
1781 true, /* Prefer constant pool. */
1782 arm_default_branch_cost
,
1783 false, /* Prefer LDRD/STRD. */
1784 {true, true}, /* Prefer non short circuit. */
1785 &arm_default_vec_cost
, /* Vectorizer costs. */
1786 false, /* Prefer Neon for 64-bits bitops. */
1787 false, false, /* Prefer 32-bit encodings. */
1788 false, /* Prefer Neon for stringops. */
1789 8 /* Maximum insns to inline memset. */
1792 const struct tune_params arm_9e_tune
=
1796 NULL
, /* Sched adj cost. */
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 ARM_PREFETCH_NOT_BENEFICIAL
,
1800 true, /* Prefer constant pool. */
1801 arm_default_branch_cost
,
1802 false, /* Prefer LDRD/STRD. */
1803 {true, true}, /* Prefer non short circuit. */
1804 &arm_default_vec_cost
, /* Vectorizer costs. */
1805 false, /* Prefer Neon for 64-bits bitops. */
1806 false, false, /* Prefer 32-bit encodings. */
1807 false, /* Prefer Neon for stringops. */
1808 8 /* Maximum insns to inline memset. */
1811 const struct tune_params arm_v6t2_tune
=
1815 NULL
, /* Sched adj cost. */
1816 1, /* Constant limit. */
1817 5, /* Max cond insns. */
1818 ARM_PREFETCH_NOT_BENEFICIAL
,
1819 false, /* Prefer constant pool. */
1820 arm_default_branch_cost
,
1821 false, /* Prefer LDRD/STRD. */
1822 {true, true}, /* Prefer non short circuit. */
1823 &arm_default_vec_cost
, /* Vectorizer costs. */
1824 false, /* Prefer Neon for 64-bits bitops. */
1825 false, false, /* Prefer 32-bit encodings. */
1826 false, /* Prefer Neon for stringops. */
1827 8 /* Maximum insns to inline memset. */
1830 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1831 const struct tune_params arm_cortex_tune
=
1834 &generic_extra_costs
,
1835 NULL
, /* Sched adj cost. */
1836 1, /* Constant limit. */
1837 5, /* Max cond insns. */
1838 ARM_PREFETCH_NOT_BENEFICIAL
,
1839 false, /* Prefer constant pool. */
1840 arm_default_branch_cost
,
1841 false, /* Prefer LDRD/STRD. */
1842 {true, true}, /* Prefer non short circuit. */
1843 &arm_default_vec_cost
, /* Vectorizer costs. */
1844 false, /* Prefer Neon for 64-bits bitops. */
1845 false, false, /* Prefer 32-bit encodings. */
1846 false, /* Prefer Neon for stringops. */
1847 8 /* Maximum insns to inline memset. */
1850 const struct tune_params arm_cortex_a8_tune
=
1853 &cortexa8_extra_costs
,
1854 NULL
, /* Sched adj cost. */
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 ARM_PREFETCH_NOT_BENEFICIAL
,
1858 false, /* Prefer constant pool. */
1859 arm_default_branch_cost
,
1860 false, /* Prefer LDRD/STRD. */
1861 {true, true}, /* Prefer non short circuit. */
1862 &arm_default_vec_cost
, /* Vectorizer costs. */
1863 false, /* Prefer Neon for 64-bits bitops. */
1864 false, false, /* Prefer 32-bit encodings. */
1865 true, /* Prefer Neon for stringops. */
1866 8 /* Maximum insns to inline memset. */
1869 const struct tune_params arm_cortex_a7_tune
=
1872 &cortexa7_extra_costs
,
1874 1, /* Constant limit. */
1875 5, /* Max cond insns. */
1876 ARM_PREFETCH_NOT_BENEFICIAL
,
1877 false, /* Prefer constant pool. */
1878 arm_default_branch_cost
,
1879 false, /* Prefer LDRD/STRD. */
1880 {true, true}, /* Prefer non short circuit. */
1881 &arm_default_vec_cost
, /* Vectorizer costs. */
1882 false, /* Prefer Neon for 64-bits bitops. */
1883 false, false, /* Prefer 32-bit encodings. */
1884 true, /* Prefer Neon for stringops. */
1885 8 /* Maximum insns to inline memset. */
1888 const struct tune_params arm_cortex_a15_tune
=
1891 &cortexa15_extra_costs
,
1892 NULL
, /* Sched adj cost. */
1893 1, /* Constant limit. */
1894 2, /* Max cond insns. */
1895 ARM_PREFETCH_NOT_BENEFICIAL
,
1896 false, /* Prefer constant pool. */
1897 arm_default_branch_cost
,
1898 true, /* Prefer LDRD/STRD. */
1899 {true, true}, /* Prefer non short circuit. */
1900 &arm_default_vec_cost
, /* Vectorizer costs. */
1901 false, /* Prefer Neon for 64-bits bitops. */
1902 true, true, /* Prefer 32-bit encodings. */
1903 true, /* Prefer Neon for stringops. */
1904 8 /* Maximum insns to inline memset. */
1907 const struct tune_params arm_cortex_a53_tune
=
1910 &cortexa53_extra_costs
,
1911 NULL
, /* Scheduler cost adjustment. */
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 ARM_PREFETCH_NOT_BENEFICIAL
,
1915 false, /* Prefer constant pool. */
1916 arm_default_branch_cost
,
1917 false, /* Prefer LDRD/STRD. */
1918 {true, true}, /* Prefer non short circuit. */
1919 &arm_default_vec_cost
, /* Vectorizer costs. */
1920 false, /* Prefer Neon for 64-bits bitops. */
1921 false, false, /* Prefer 32-bit encodings. */
1922 false, /* Prefer Neon for stringops. */
1923 8 /* Maximum insns to inline memset. */
1926 const struct tune_params arm_cortex_a57_tune
=
1929 &cortexa57_extra_costs
,
1930 NULL
, /* Scheduler cost adjustment. */
1931 1, /* Constant limit. */
1932 2, /* Max cond insns. */
1933 ARM_PREFETCH_NOT_BENEFICIAL
,
1934 false, /* Prefer constant pool. */
1935 arm_default_branch_cost
,
1936 true, /* Prefer LDRD/STRD. */
1937 {true, true}, /* Prefer non short circuit. */
1938 &arm_default_vec_cost
, /* Vectorizer costs. */
1939 false, /* Prefer Neon for 64-bits bitops. */
1940 true, true, /* Prefer 32-bit encodings. */
1941 false, /* Prefer Neon for stringops. */
1942 8 /* Maximum insns to inline memset. */
1945 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1946 less appealing. Set max_insns_skipped to a low value. */
1948 const struct tune_params arm_cortex_a5_tune
=
1951 &cortexa5_extra_costs
,
1952 NULL
, /* Sched adj cost. */
1953 1, /* Constant limit. */
1954 1, /* Max cond insns. */
1955 ARM_PREFETCH_NOT_BENEFICIAL
,
1956 false, /* Prefer constant pool. */
1957 arm_cortex_a5_branch_cost
,
1958 false, /* Prefer LDRD/STRD. */
1959 {false, false}, /* Prefer non short circuit. */
1960 &arm_default_vec_cost
, /* Vectorizer costs. */
1961 false, /* Prefer Neon for 64-bits bitops. */
1962 false, false, /* Prefer 32-bit encodings. */
1963 true, /* Prefer Neon for stringops. */
1964 8 /* Maximum insns to inline memset. */
1967 const struct tune_params arm_cortex_a9_tune
=
1970 &cortexa9_extra_costs
,
1971 cortex_a9_sched_adjust_cost
,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 ARM_PREFETCH_BENEFICIAL(4,32,32),
1975 false, /* Prefer constant pool. */
1976 arm_default_branch_cost
,
1977 false, /* Prefer LDRD/STRD. */
1978 {true, true}, /* Prefer non short circuit. */
1979 &arm_default_vec_cost
, /* Vectorizer costs. */
1980 false, /* Prefer Neon for 64-bits bitops. */
1981 false, false, /* Prefer 32-bit encodings. */
1982 false, /* Prefer Neon for stringops. */
1983 8 /* Maximum insns to inline memset. */
1986 const struct tune_params arm_cortex_a12_tune
=
1989 &cortexa12_extra_costs
,
1991 1, /* Constant limit. */
1992 5, /* Max cond insns. */
1993 ARM_PREFETCH_BENEFICIAL(4,32,32),
1994 false, /* Prefer constant pool. */
1995 arm_default_branch_cost
,
1996 true, /* Prefer LDRD/STRD. */
1997 {true, true}, /* Prefer non short circuit. */
1998 &arm_default_vec_cost
, /* Vectorizer costs. */
1999 false, /* Prefer Neon for 64-bits bitops. */
2000 false, false, /* Prefer 32-bit encodings. */
2001 true, /* Prefer Neon for stringops. */
2002 8 /* Maximum insns to inline memset. */
2005 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2006 cycle to execute each. An LDR from the constant pool also takes two cycles
2007 to execute, but mildly increases pipelining opportunity (consecutive
2008 loads/stores can be pipelined together, saving one cycle), and may also
2009 improve icache utilisation. Hence we prefer the constant pool for such
2012 const struct tune_params arm_v7m_tune
=
2016 NULL
, /* Sched adj cost. */
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 ARM_PREFETCH_NOT_BENEFICIAL
,
2020 true, /* Prefer constant pool. */
2021 arm_cortex_m_branch_cost
,
2022 false, /* Prefer LDRD/STRD. */
2023 {false, false}, /* Prefer non short circuit. */
2024 &arm_default_vec_cost
, /* Vectorizer costs. */
2025 false, /* Prefer Neon for 64-bits bitops. */
2026 false, false, /* Prefer 32-bit encodings. */
2027 false, /* Prefer Neon for stringops. */
2028 8 /* Maximum insns to inline memset. */
2031 /* Cortex-M7 tuning. */
2033 const struct tune_params arm_cortex_m7_tune
=
2037 NULL
, /* Sched adj cost. */
2038 0, /* Constant limit. */
2039 0, /* Max cond insns. */
2040 ARM_PREFETCH_NOT_BENEFICIAL
,
2041 true, /* Prefer constant pool. */
2042 arm_cortex_m_branch_cost
,
2043 false, /* Prefer LDRD/STRD. */
2044 {true, true}, /* Prefer non short circuit. */
2045 &arm_default_vec_cost
, /* Vectorizer costs. */
2046 false, /* Prefer Neon for 64-bits bitops. */
2047 false, false, /* Prefer 32-bit encodings. */
2048 false, /* Prefer Neon for stringops. */
2049 8 /* Maximum insns to inline memset. */
2052 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2053 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2054 const struct tune_params arm_v6m_tune
=
2058 NULL
, /* Sched adj cost. */
2059 1, /* Constant limit. */
2060 5, /* Max cond insns. */
2061 ARM_PREFETCH_NOT_BENEFICIAL
,
2062 false, /* Prefer constant pool. */
2063 arm_default_branch_cost
,
2064 false, /* Prefer LDRD/STRD. */
2065 {false, false}, /* Prefer non short circuit. */
2066 &arm_default_vec_cost
, /* Vectorizer costs. */
2067 false, /* Prefer Neon for 64-bits bitops. */
2068 false, false, /* Prefer 32-bit encodings. */
2069 false, /* Prefer Neon for stringops. */
2070 8 /* Maximum insns to inline memset. */
2073 const struct tune_params arm_fa726te_tune
=
2077 fa726te_sched_adjust_cost
,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 true, /* Prefer constant pool. */
2082 arm_default_branch_cost
,
2083 false, /* Prefer LDRD/STRD. */
2084 {true, true}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost
, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8 /* Maximum insns to inline memset. */
2093 /* Not all of these give usefully different compilation alternatives,
2094 but there is no simple way of generalizing them. */
2095 static const struct processors all_cores
[] =
2098 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2099 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2100 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2101 #include "arm-cores.def"
2103 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2106 static const struct processors all_architectures
[] =
2108 /* ARM Architectures */
2109 /* We don't specify tuning costs here as it will be figured out
2112 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2113 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2114 #include "arm-arches.def"
2116 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2120 /* These are populated as commandline arguments are processed, or NULL
2121 if not specified. */
2122 static const struct processors
*arm_selected_arch
;
2123 static const struct processors
*arm_selected_cpu
;
2124 static const struct processors
*arm_selected_tune
;
2126 /* The name of the preprocessor macro to define for this architecture. */
2128 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2130 /* Available values for -mfpu=. */
2132 static const struct arm_fpu_desc all_fpus
[] =
2134 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2135 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2136 #include "arm-fpus.def"
2141 /* Supported TLS relocations. */
2149 TLS_DESCSEQ
/* GNU scheme */
2152 /* The maximum number of insns to be used when loading a constant. */
2154 arm_constant_limit (bool size_p
)
2156 return size_p
? 1 : current_tune
->constant_limit
;
2159 /* Emit an insn that's a simple single-set. Both the operands must be known
2161 inline static rtx_insn
*
2162 emit_set_insn (rtx x
, rtx y
)
2164 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2167 /* Return the number of bits set in VALUE. */
2169 bit_count (unsigned long value
)
2171 unsigned long count
= 0;
2176 value
&= value
- 1; /* Clear the least-significant set bit. */
2186 } arm_fixed_mode_set
;
2188 /* A small helper for setting fixed-point library libfuncs. */
2191 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2192 const char *funcname
, const char *modename
,
2197 if (num_suffix
== 0)
2198 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2200 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2202 set_optab_libfunc (optable
, mode
, buffer
);
2206 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2207 machine_mode from
, const char *funcname
,
2208 const char *toname
, const char *fromname
)
2211 const char *maybe_suffix_2
= "";
2213 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2214 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2215 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2216 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2217 maybe_suffix_2
= "2";
2219 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2222 set_conv_libfunc (optable
, to
, from
, buffer
);
2225 /* Set up library functions unique to ARM. */
2228 arm_init_libfuncs (void)
2230 /* For Linux, we have access to kernel support for atomic operations. */
2231 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2232 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2234 /* There are no special library functions unless we are using the
2239 /* The functions below are described in Section 4 of the "Run-Time
2240 ABI for the ARM architecture", Version 1.0. */
2242 /* Double-precision floating-point arithmetic. Table 2. */
2243 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2244 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2245 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2246 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2247 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2249 /* Double-precision comparisons. Table 3. */
2250 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2251 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2252 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2253 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2254 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2255 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2256 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2258 /* Single-precision floating-point arithmetic. Table 4. */
2259 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2260 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2261 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2262 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2263 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2265 /* Single-precision comparisons. Table 5. */
2266 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2267 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2268 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2269 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2270 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2271 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2272 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2274 /* Floating-point to integer conversions. Table 6. */
2275 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2276 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2277 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2278 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2279 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2280 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2281 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2282 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2284 /* Conversions between floating types. Table 7. */
2285 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2286 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2288 /* Integer to floating-point conversions. Table 8. */
2289 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2290 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2291 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2292 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2293 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2294 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2295 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2296 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2298 /* Long long. Table 9. */
2299 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2300 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2301 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2302 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2303 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2304 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2305 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2306 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2308 /* Integer (32/32->32) division. \S 4.3.1. */
2309 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2310 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2312 /* The divmod functions are designed so that they can be used for
2313 plain division, even though they return both the quotient and the
2314 remainder. The quotient is returned in the usual location (i.e.,
2315 r0 for SImode, {r0, r1} for DImode), just as would be expected
2316 for an ordinary division routine. Because the AAPCS calling
2317 conventions specify that all of { r0, r1, r2, r3 } are
2318 callee-saved registers, there is no need to tell the compiler
2319 explicitly that those registers are clobbered by these
2321 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2322 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2324 /* For SImode division the ABI provides div-without-mod routines,
2325 which are faster. */
2326 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2327 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2329 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2330 divmod libcalls instead. */
2331 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2332 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2333 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2334 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2336 /* Half-precision float operations. The compiler handles all operations
2337 with NULL libfuncs by converting the SFmode. */
2338 switch (arm_fp16_format
)
2340 case ARM_FP16_FORMAT_IEEE
:
2341 case ARM_FP16_FORMAT_ALTERNATIVE
:
2344 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2345 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2347 : "__gnu_f2h_alternative"));
2348 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2349 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2351 : "__gnu_h2f_alternative"));
2354 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2355 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2356 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2357 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2358 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2361 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2362 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2363 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2364 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2365 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2366 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2367 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2374 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2376 const arm_fixed_mode_set fixed_arith_modes
[] =
2397 const arm_fixed_mode_set fixed_conv_modes
[] =
2427 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2429 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2430 "add", fixed_arith_modes
[i
].name
, 3);
2431 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2432 "ssadd", fixed_arith_modes
[i
].name
, 3);
2433 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2434 "usadd", fixed_arith_modes
[i
].name
, 3);
2435 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2436 "sub", fixed_arith_modes
[i
].name
, 3);
2437 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2438 "sssub", fixed_arith_modes
[i
].name
, 3);
2439 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2440 "ussub", fixed_arith_modes
[i
].name
, 3);
2441 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2442 "mul", fixed_arith_modes
[i
].name
, 3);
2443 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2444 "ssmul", fixed_arith_modes
[i
].name
, 3);
2445 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2446 "usmul", fixed_arith_modes
[i
].name
, 3);
2447 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2448 "div", fixed_arith_modes
[i
].name
, 3);
2449 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2450 "udiv", fixed_arith_modes
[i
].name
, 3);
2451 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2452 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2453 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2454 "usdiv", fixed_arith_modes
[i
].name
, 3);
2455 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2456 "neg", fixed_arith_modes
[i
].name
, 2);
2457 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2458 "ssneg", fixed_arith_modes
[i
].name
, 2);
2459 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2460 "usneg", fixed_arith_modes
[i
].name
, 2);
2461 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2462 "ashl", fixed_arith_modes
[i
].name
, 3);
2463 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2464 "ashr", fixed_arith_modes
[i
].name
, 3);
2465 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2466 "lshr", fixed_arith_modes
[i
].name
, 3);
2467 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2468 "ssashl", fixed_arith_modes
[i
].name
, 3);
2469 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2470 "usashl", fixed_arith_modes
[i
].name
, 3);
2471 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2472 "cmp", fixed_arith_modes
[i
].name
, 2);
2475 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2476 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2479 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2480 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2483 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2484 fixed_conv_modes
[j
].mode
, "fract",
2485 fixed_conv_modes
[i
].name
,
2486 fixed_conv_modes
[j
].name
);
2487 arm_set_fixed_conv_libfunc (satfract_optab
,
2488 fixed_conv_modes
[i
].mode
,
2489 fixed_conv_modes
[j
].mode
, "satfract",
2490 fixed_conv_modes
[i
].name
,
2491 fixed_conv_modes
[j
].name
);
2492 arm_set_fixed_conv_libfunc (fractuns_optab
,
2493 fixed_conv_modes
[i
].mode
,
2494 fixed_conv_modes
[j
].mode
, "fractuns",
2495 fixed_conv_modes
[i
].name
,
2496 fixed_conv_modes
[j
].name
);
2497 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2498 fixed_conv_modes
[i
].mode
,
2499 fixed_conv_modes
[j
].mode
, "satfractuns",
2500 fixed_conv_modes
[i
].name
,
2501 fixed_conv_modes
[j
].name
);
2505 if (TARGET_AAPCS_BASED
)
2506 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2509 /* On AAPCS systems, this is the "struct __va_list". */
2510 static GTY(()) tree va_list_type
;
2512 /* Return the type to use as __builtin_va_list. */
2514 arm_build_builtin_va_list (void)
2519 if (!TARGET_AAPCS_BASED
)
2520 return std_build_builtin_va_list ();
2522 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2530 The C Library ABI further reinforces this definition in \S
2533 We must follow this definition exactly. The structure tag
2534 name is visible in C++ mangled names, and thus forms a part
2535 of the ABI. The field name may be used by people who
2536 #include <stdarg.h>. */
2537 /* Create the type. */
2538 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2539 /* Give it the required name. */
2540 va_list_name
= build_decl (BUILTINS_LOCATION
,
2542 get_identifier ("__va_list"),
2544 DECL_ARTIFICIAL (va_list_name
) = 1;
2545 TYPE_NAME (va_list_type
) = va_list_name
;
2546 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2547 /* Create the __ap field. */
2548 ap_field
= build_decl (BUILTINS_LOCATION
,
2550 get_identifier ("__ap"),
2552 DECL_ARTIFICIAL (ap_field
) = 1;
2553 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2554 TYPE_FIELDS (va_list_type
) = ap_field
;
2555 /* Compute its layout. */
2556 layout_type (va_list_type
);
2558 return va_list_type
;
2561 /* Return an expression of type "void *" pointing to the next
2562 available argument in a variable-argument list. VALIST is the
2563 user-level va_list object, of type __builtin_va_list. */
2565 arm_extract_valist_ptr (tree valist
)
2567 if (TREE_TYPE (valist
) == error_mark_node
)
2568 return error_mark_node
;
2570 /* On an AAPCS target, the pointer is stored within "struct
2572 if (TARGET_AAPCS_BASED
)
2574 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2575 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2576 valist
, ap_field
, NULL_TREE
);
2582 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2584 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2586 valist
= arm_extract_valist_ptr (valist
);
2587 std_expand_builtin_va_start (valist
, nextarg
);
2590 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2592 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2595 valist
= arm_extract_valist_ptr (valist
);
2596 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2599 /* Fix up any incompatible options that the user has specified. */
2601 arm_option_override (void)
2603 if (global_options_set
.x_arm_arch_option
)
2604 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2606 if (global_options_set
.x_arm_cpu_option
)
2608 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2609 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2612 if (global_options_set
.x_arm_tune_option
)
2613 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2615 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2616 SUBTARGET_OVERRIDE_OPTIONS
;
2619 if (arm_selected_arch
)
2621 if (arm_selected_cpu
)
2623 /* Check for conflict between mcpu and march. */
2624 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2626 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2627 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2628 /* -march wins for code generation.
2629 -mcpu wins for default tuning. */
2630 if (!arm_selected_tune
)
2631 arm_selected_tune
= arm_selected_cpu
;
2633 arm_selected_cpu
= arm_selected_arch
;
2637 arm_selected_arch
= NULL
;
2640 /* Pick a CPU based on the architecture. */
2641 arm_selected_cpu
= arm_selected_arch
;
2644 /* If the user did not specify a processor, choose one for them. */
2645 if (!arm_selected_cpu
)
2647 const struct processors
* sel
;
2648 unsigned int sought
;
2650 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2651 if (!arm_selected_cpu
->name
)
2653 #ifdef SUBTARGET_CPU_DEFAULT
2654 /* Use the subtarget default CPU if none was specified by
2656 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2658 /* Default to ARM6. */
2659 if (!arm_selected_cpu
->name
)
2660 arm_selected_cpu
= &all_cores
[arm6
];
2663 sel
= arm_selected_cpu
;
2664 insn_flags
= sel
->flags
;
2666 /* Now check to see if the user has specified some command line
2667 switch that require certain abilities from the cpu. */
2670 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2672 sought
|= (FL_THUMB
| FL_MODE32
);
2674 /* There are no ARM processors that support both APCS-26 and
2675 interworking. Therefore we force FL_MODE26 to be removed
2676 from insn_flags here (if it was set), so that the search
2677 below will always be able to find a compatible processor. */
2678 insn_flags
&= ~FL_MODE26
;
2681 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2683 /* Try to locate a CPU type that supports all of the abilities
2684 of the default CPU, plus the extra abilities requested by
2686 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2687 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2690 if (sel
->name
== NULL
)
2692 unsigned current_bit_count
= 0;
2693 const struct processors
* best_fit
= NULL
;
2695 /* Ideally we would like to issue an error message here
2696 saying that it was not possible to find a CPU compatible
2697 with the default CPU, but which also supports the command
2698 line options specified by the programmer, and so they
2699 ought to use the -mcpu=<name> command line option to
2700 override the default CPU type.
2702 If we cannot find a cpu that has both the
2703 characteristics of the default cpu and the given
2704 command line options we scan the array again looking
2705 for a best match. */
2706 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2707 if ((sel
->flags
& sought
) == sought
)
2711 count
= bit_count (sel
->flags
& insn_flags
);
2713 if (count
>= current_bit_count
)
2716 current_bit_count
= count
;
2720 gcc_assert (best_fit
);
2724 arm_selected_cpu
= sel
;
2728 gcc_assert (arm_selected_cpu
);
2729 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2730 if (!arm_selected_tune
)
2731 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2733 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2734 insn_flags
= arm_selected_cpu
->flags
;
2735 arm_base_arch
= arm_selected_cpu
->base_arch
;
2737 arm_tune
= arm_selected_tune
->core
;
2738 tune_flags
= arm_selected_tune
->flags
;
2739 current_tune
= arm_selected_tune
->tune
;
2741 /* Make sure that the processor choice does not conflict with any of the
2742 other command line choices. */
2743 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2744 error ("target CPU does not support ARM mode");
2746 /* BPABI targets use linker tricks to allow interworking on cores
2747 without thumb support. */
2748 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2750 warning (0, "target CPU does not support interworking" );
2751 target_flags
&= ~MASK_INTERWORK
;
2754 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2756 warning (0, "target CPU does not support THUMB instructions");
2757 target_flags
&= ~MASK_THUMB
;
2760 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2762 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2763 target_flags
&= ~MASK_APCS_FRAME
;
2766 /* Callee super interworking implies thumb interworking. Adding
2767 this to the flags here simplifies the logic elsewhere. */
2768 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2769 target_flags
|= MASK_INTERWORK
;
2771 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2772 from here where no function is being compiled currently. */
2773 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2774 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2776 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2777 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2779 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2781 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2782 target_flags
|= MASK_APCS_FRAME
;
2785 if (TARGET_POKE_FUNCTION_NAME
)
2786 target_flags
|= MASK_APCS_FRAME
;
2788 if (TARGET_APCS_REENT
&& flag_pic
)
2789 error ("-fpic and -mapcs-reent are incompatible");
2791 if (TARGET_APCS_REENT
)
2792 warning (0, "APCS reentrant code not supported. Ignored");
2794 /* If this target is normally configured to use APCS frames, warn if they
2795 are turned off and debugging is turned on. */
2797 && write_symbols
!= NO_DEBUG
2798 && !TARGET_APCS_FRAME
2799 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2800 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2802 if (TARGET_APCS_FLOAT
)
2803 warning (0, "passing floating point arguments in fp regs not yet supported");
2805 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2806 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2807 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2808 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2809 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2810 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2811 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2812 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2813 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2814 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2815 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2816 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2817 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2818 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2819 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2821 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2822 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2823 thumb_code
= TARGET_ARM
== 0;
2824 thumb1_code
= TARGET_THUMB1
!= 0;
2825 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2826 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2827 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2828 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2829 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2830 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2831 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2832 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2833 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
2834 if (arm_restrict_it
== 2)
2835 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2838 arm_restrict_it
= 0;
2840 /* If we are not using the default (ARM mode) section anchor offset
2841 ranges, then set the correct ranges now. */
2844 /* Thumb-1 LDR instructions cannot have negative offsets.
2845 Permissible positive offset ranges are 5-bit (for byte loads),
2846 6-bit (for halfword loads), or 7-bit (for word loads).
2847 Empirical results suggest a 7-bit anchor range gives the best
2848 overall code size. */
2849 targetm
.min_anchor_offset
= 0;
2850 targetm
.max_anchor_offset
= 127;
2852 else if (TARGET_THUMB2
)
2854 /* The minimum is set such that the total size of the block
2855 for a particular anchor is 248 + 1 + 4095 bytes, which is
2856 divisible by eight, ensuring natural spacing of anchors. */
2857 targetm
.min_anchor_offset
= -248;
2858 targetm
.max_anchor_offset
= 4095;
2861 /* V5 code we generate is completely interworking capable, so we turn off
2862 TARGET_INTERWORK here to avoid many tests later on. */
2864 /* XXX However, we must pass the right pre-processor defines to CPP
2865 or GLD can get confused. This is a hack. */
2866 if (TARGET_INTERWORK
)
2867 arm_cpp_interwork
= 1;
2870 target_flags
&= ~MASK_INTERWORK
;
2872 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2873 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2875 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2876 error ("iwmmxt abi requires an iwmmxt capable cpu");
2878 if (!global_options_set
.x_arm_fpu_index
)
2880 const char *target_fpu_name
;
2883 #ifdef FPUTYPE_DEFAULT
2884 target_fpu_name
= FPUTYPE_DEFAULT
;
2886 target_fpu_name
= "vfp";
2889 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2894 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2896 if (TARGET_NEON
&& !arm_arch7
)
2897 error ("target CPU does not support NEON");
2899 switch (arm_fpu_desc
->model
)
2901 case ARM_FP_MODEL_VFP
:
2902 arm_fpu_attr
= FPU_VFP
;
2909 if (TARGET_AAPCS_BASED
)
2911 if (TARGET_CALLER_INTERWORKING
)
2912 error ("AAPCS does not support -mcaller-super-interworking");
2914 if (TARGET_CALLEE_INTERWORKING
)
2915 error ("AAPCS does not support -mcallee-super-interworking");
2918 /* iWMMXt and NEON are incompatible. */
2919 if (TARGET_IWMMXT
&& TARGET_NEON
)
2920 error ("iWMMXt and NEON are incompatible");
2922 /* iWMMXt unsupported under Thumb mode. */
2923 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2924 error ("iWMMXt unsupported under Thumb mode");
2926 /* __fp16 support currently assumes the core has ldrh. */
2927 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2928 sorry ("__fp16 and no ldrh");
2930 /* If soft-float is specified then don't use FPU. */
2931 if (TARGET_SOFT_FLOAT
)
2932 arm_fpu_attr
= FPU_NONE
;
2934 if (TARGET_AAPCS_BASED
)
2936 if (arm_abi
== ARM_ABI_IWMMXT
)
2937 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2938 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2939 && TARGET_HARD_FLOAT
2941 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2943 arm_pcs_default
= ARM_PCS_AAPCS
;
2947 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2948 sorry ("-mfloat-abi=hard and VFP");
2950 if (arm_abi
== ARM_ABI_APCS
)
2951 arm_pcs_default
= ARM_PCS_APCS
;
2953 arm_pcs_default
= ARM_PCS_ATPCS
;
2956 /* For arm2/3 there is no need to do any scheduling if we are doing
2957 software floating-point. */
2958 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2959 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2961 /* Use the cp15 method if it is available. */
2962 if (target_thread_pointer
== TP_AUTO
)
2964 if (arm_arch6k
&& !TARGET_THUMB1
)
2965 target_thread_pointer
= TP_CP15
;
2967 target_thread_pointer
= TP_SOFT
;
2970 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2971 error ("can not use -mtp=cp15 with 16-bit Thumb");
2973 /* Override the default structure alignment for AAPCS ABI. */
2974 if (!global_options_set
.x_arm_structure_size_boundary
)
2976 if (TARGET_AAPCS_BASED
)
2977 arm_structure_size_boundary
= 8;
2981 if (arm_structure_size_boundary
!= 8
2982 && arm_structure_size_boundary
!= 32
2983 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2985 if (ARM_DOUBLEWORD_ALIGN
)
2987 "structure size boundary can only be set to 8, 32 or 64");
2989 warning (0, "structure size boundary can only be set to 8 or 32");
2990 arm_structure_size_boundary
2991 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2995 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2997 error ("RTP PIC is incompatible with Thumb");
3001 /* If stack checking is disabled, we can use r10 as the PIC register,
3002 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3003 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3005 if (TARGET_VXWORKS_RTP
)
3006 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3007 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3010 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3011 arm_pic_register
= 9;
3013 if (arm_pic_register_string
!= NULL
)
3015 int pic_register
= decode_reg_name (arm_pic_register_string
);
3018 warning (0, "-mpic-register= is useless without -fpic");
3020 /* Prevent the user from choosing an obviously stupid PIC register. */
3021 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3022 || pic_register
== HARD_FRAME_POINTER_REGNUM
3023 || pic_register
== STACK_POINTER_REGNUM
3024 || pic_register
>= PC_REGNUM
3025 || (TARGET_VXWORKS_RTP
3026 && (unsigned int) pic_register
!= arm_pic_register
))
3027 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3029 arm_pic_register
= pic_register
;
3032 if (TARGET_VXWORKS_RTP
3033 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3034 arm_pic_data_is_text_relative
= 0;
3036 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3037 if (fix_cm3_ldrd
== 2)
3039 if (arm_selected_cpu
->core
== cortexm3
)
3045 /* Enable -munaligned-access by default for
3046 - all ARMv6 architecture-based processors
3047 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3048 - ARMv8 architecture-base processors.
3050 Disable -munaligned-access by default for
3051 - all pre-ARMv6 architecture-based processors
3052 - ARMv6-M architecture-based processors. */
3054 if (unaligned_access
== 2)
3056 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3057 unaligned_access
= 1;
3059 unaligned_access
= 0;
3061 else if (unaligned_access
== 1
3062 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3064 warning (0, "target CPU does not support unaligned accesses");
3065 unaligned_access
= 0;
3068 if (TARGET_THUMB1
&& flag_schedule_insns
)
3070 /* Don't warn since it's on by default in -O2. */
3071 flag_schedule_insns
= 0;
3076 /* If optimizing for size, bump the number of instructions that we
3077 are prepared to conditionally execute (even on a StrongARM). */
3078 max_insns_skipped
= 6;
3080 /* For THUMB2, we limit the conditional sequence to one IT block. */
3082 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3085 max_insns_skipped
= current_tune
->max_insns_skipped
;
3087 /* Hot/Cold partitioning is not currently supported, since we can't
3088 handle literal pool placement in that case. */
3089 if (flag_reorder_blocks_and_partition
)
3091 inform (input_location
,
3092 "-freorder-blocks-and-partition not supported on this architecture");
3093 flag_reorder_blocks_and_partition
= 0;
3094 flag_reorder_blocks
= 1;
3098 /* Hoisting PIC address calculations more aggressively provides a small,
3099 but measurable, size reduction for PIC code. Therefore, we decrease
3100 the bar for unrestricted expression hoisting to the cost of PIC address
3101 calculation, which is 2 instructions. */
3102 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3103 global_options
.x_param_values
,
3104 global_options_set
.x_param_values
);
3106 /* ARM EABI defaults to strict volatile bitfields. */
3107 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3108 && abi_version_at_least(2))
3109 flag_strict_volatile_bitfields
= 1;
3111 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3112 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3113 if (flag_prefetch_loop_arrays
< 0
3116 && current_tune
->num_prefetch_slots
> 0)
3117 flag_prefetch_loop_arrays
= 1;
3119 /* Set up parameters to be used in prefetching algorithm. Do not override the
3120 defaults unless we are tuning for a core we have researched values for. */
3121 if (current_tune
->num_prefetch_slots
> 0)
3122 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3123 current_tune
->num_prefetch_slots
,
3124 global_options
.x_param_values
,
3125 global_options_set
.x_param_values
);
3126 if (current_tune
->l1_cache_line_size
>= 0)
3127 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3128 current_tune
->l1_cache_line_size
,
3129 global_options
.x_param_values
,
3130 global_options_set
.x_param_values
);
3131 if (current_tune
->l1_cache_size
>= 0)
3132 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3133 current_tune
->l1_cache_size
,
3134 global_options
.x_param_values
,
3135 global_options_set
.x_param_values
);
3137 /* Use Neon to perform 64-bits operations rather than core
3139 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3140 if (use_neon_for_64bits
== 1)
3141 prefer_neon_for_64bits
= true;
3143 /* Use the alternative scheduling-pressure algorithm by default. */
3144 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3145 global_options
.x_param_values
,
3146 global_options_set
.x_param_values
);
3148 /* Disable shrink-wrap when optimizing function for size, since it tends to
3149 generate additional returns. */
3150 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3151 flag_shrink_wrap
= false;
3152 /* TBD: Dwarf info for apcs frame is not handled yet. */
3153 if (TARGET_APCS_FRAME
)
3154 flag_shrink_wrap
= false;
3156 /* We only support -mslow-flash-data on armv7-m targets. */
3157 if (target_slow_flash_data
3158 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3159 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3160 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3162 /* Currently, for slow flash data, we just disable literal pools. */
3163 if (target_slow_flash_data
)
3164 arm_disable_literal_pool
= true;
3166 /* Thumb2 inline assembly code should always use unified syntax.
3167 This will apply to ARM and Thumb1 eventually. */
3169 inline_asm_unified
= 1;
3171 /* Register global variables with the garbage collector. */
3172 arm_add_gc_roots ();
3176 arm_add_gc_roots (void)
3178 gcc_obstack_init(&minipool_obstack
);
3179 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3182 /* A table of known ARM exception types.
3183 For use with the interrupt function attribute. */
3187 const char *const arg
;
3188 const unsigned long return_value
;
3192 static const isr_attribute_arg isr_attribute_args
[] =
3194 { "IRQ", ARM_FT_ISR
},
3195 { "irq", ARM_FT_ISR
},
3196 { "FIQ", ARM_FT_FIQ
},
3197 { "fiq", ARM_FT_FIQ
},
3198 { "ABORT", ARM_FT_ISR
},
3199 { "abort", ARM_FT_ISR
},
3200 { "ABORT", ARM_FT_ISR
},
3201 { "abort", ARM_FT_ISR
},
3202 { "UNDEF", ARM_FT_EXCEPTION
},
3203 { "undef", ARM_FT_EXCEPTION
},
3204 { "SWI", ARM_FT_EXCEPTION
},
3205 { "swi", ARM_FT_EXCEPTION
},
3206 { NULL
, ARM_FT_NORMAL
}
3209 /* Returns the (interrupt) function type of the current
3210 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3212 static unsigned long
3213 arm_isr_value (tree argument
)
3215 const isr_attribute_arg
* ptr
;
3219 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3221 /* No argument - default to IRQ. */
3222 if (argument
== NULL_TREE
)
3225 /* Get the value of the argument. */
3226 if (TREE_VALUE (argument
) == NULL_TREE
3227 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3228 return ARM_FT_UNKNOWN
;
3230 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3232 /* Check it against the list of known arguments. */
3233 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3234 if (streq (arg
, ptr
->arg
))
3235 return ptr
->return_value
;
3237 /* An unrecognized interrupt type. */
3238 return ARM_FT_UNKNOWN
;
3241 /* Computes the type of the current function. */
3243 static unsigned long
3244 arm_compute_func_type (void)
3246 unsigned long type
= ARM_FT_UNKNOWN
;
3250 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3252 /* Decide if the current function is volatile. Such functions
3253 never return, and many memory cycles can be saved by not storing
3254 register values that will never be needed again. This optimization
3255 was added to speed up context switching in a kernel application. */
3257 && (TREE_NOTHROW (current_function_decl
)
3258 || !(flag_unwind_tables
3260 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3261 && TREE_THIS_VOLATILE (current_function_decl
))
3262 type
|= ARM_FT_VOLATILE
;
3264 if (cfun
->static_chain_decl
!= NULL
)
3265 type
|= ARM_FT_NESTED
;
3267 attr
= DECL_ATTRIBUTES (current_function_decl
);
3269 a
= lookup_attribute ("naked", attr
);
3271 type
|= ARM_FT_NAKED
;
3273 a
= lookup_attribute ("isr", attr
);
3275 a
= lookup_attribute ("interrupt", attr
);
3278 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3280 type
|= arm_isr_value (TREE_VALUE (a
));
3285 /* Returns the type of the current function. */
3288 arm_current_func_type (void)
3290 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3291 cfun
->machine
->func_type
= arm_compute_func_type ();
3293 return cfun
->machine
->func_type
;
3297 arm_allocate_stack_slots_for_args (void)
3299 /* Naked functions should not allocate stack slots for arguments. */
3300 return !IS_NAKED (arm_current_func_type ());
3304 arm_warn_func_return (tree decl
)
3306 /* Naked functions are implemented entirely in assembly, including the
3307 return sequence, so suppress warnings about this. */
3308 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3312 /* Output assembler code for a block containing the constant parts
3313 of a trampoline, leaving space for the variable parts.
3315 On the ARM, (if r8 is the static chain regnum, and remembering that
3316 referencing pc adds an offset of 8) the trampoline looks like:
3319 .word static chain value
3320 .word function's address
3321 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3324 arm_asm_trampoline_template (FILE *f
)
3328 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3329 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3331 else if (TARGET_THUMB2
)
3333 /* The Thumb-2 trampoline is similar to the arm implementation.
3334 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3335 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3336 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3337 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3341 ASM_OUTPUT_ALIGN (f
, 2);
3342 fprintf (f
, "\t.code\t16\n");
3343 fprintf (f
, ".Ltrampoline_start:\n");
3344 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3345 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3346 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3347 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3348 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3349 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3351 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3352 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3355 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3358 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3360 rtx fnaddr
, mem
, a_tramp
;
3362 emit_block_move (m_tramp
, assemble_trampoline_template (),
3363 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3365 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3366 emit_move_insn (mem
, chain_value
);
3368 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3369 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3370 emit_move_insn (mem
, fnaddr
);
3372 a_tramp
= XEXP (m_tramp
, 0);
3373 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3374 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3375 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3378 /* Thumb trampolines should be entered in thumb mode, so set
3379 the bottom bit of the address. */
3382 arm_trampoline_adjust_address (rtx addr
)
3385 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3386 NULL
, 0, OPTAB_LIB_WIDEN
);
3390 /* Return 1 if it is possible to return using a single instruction.
3391 If SIBLING is non-null, this is a test for a return before a sibling
3392 call. SIBLING is the call insn, so we can examine its register usage. */
3395 use_return_insn (int iscond
, rtx sibling
)
3398 unsigned int func_type
;
3399 unsigned long saved_int_regs
;
3400 unsigned HOST_WIDE_INT stack_adjust
;
3401 arm_stack_offsets
*offsets
;
3403 /* Never use a return instruction before reload has run. */
3404 if (!reload_completed
)
3407 func_type
= arm_current_func_type ();
3409 /* Naked, volatile and stack alignment functions need special
3411 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3414 /* So do interrupt functions that use the frame pointer and Thumb
3415 interrupt functions. */
3416 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3419 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3420 && !optimize_function_for_size_p (cfun
))
3423 offsets
= arm_get_frame_offsets ();
3424 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3426 /* As do variadic functions. */
3427 if (crtl
->args
.pretend_args_size
3428 || cfun
->machine
->uses_anonymous_args
3429 /* Or if the function calls __builtin_eh_return () */
3430 || crtl
->calls_eh_return
3431 /* Or if the function calls alloca */
3432 || cfun
->calls_alloca
3433 /* Or if there is a stack adjustment. However, if the stack pointer
3434 is saved on the stack, we can use a pre-incrementing stack load. */
3435 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3436 && stack_adjust
== 4)))
3439 saved_int_regs
= offsets
->saved_regs_mask
;
3441 /* Unfortunately, the insn
3443 ldmib sp, {..., sp, ...}
3445 triggers a bug on most SA-110 based devices, such that the stack
3446 pointer won't be correctly restored if the instruction takes a
3447 page fault. We work around this problem by popping r3 along with
3448 the other registers, since that is never slower than executing
3449 another instruction.
3451 We test for !arm_arch5 here, because code for any architecture
3452 less than this could potentially be run on one of the buggy
3454 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3456 /* Validate that r3 is a call-clobbered register (always true in
3457 the default abi) ... */
3458 if (!call_used_regs
[3])
3461 /* ... that it isn't being used for a return value ... */
3462 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3465 /* ... or for a tail-call argument ... */
3468 gcc_assert (CALL_P (sibling
));
3470 if (find_regno_fusage (sibling
, USE
, 3))
3474 /* ... and that there are no call-saved registers in r0-r2
3475 (always true in the default ABI). */
3476 if (saved_int_regs
& 0x7)
3480 /* Can't be done if interworking with Thumb, and any registers have been
3482 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3485 /* On StrongARM, conditional returns are expensive if they aren't
3486 taken and multiple registers have been stacked. */
3487 if (iscond
&& arm_tune_strongarm
)
3489 /* Conditional return when just the LR is stored is a simple
3490 conditional-load instruction, that's not expensive. */
3491 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3495 && arm_pic_register
!= INVALID_REGNUM
3496 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3500 /* If there are saved registers but the LR isn't saved, then we need
3501 two instructions for the return. */
3502 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3505 /* Can't be done if any of the VFP regs are pushed,
3506 since this also requires an insn. */
3507 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3508 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3509 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3512 if (TARGET_REALLY_IWMMXT
)
3513 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3514 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3520 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3521 shrink-wrapping if possible. This is the case if we need to emit a
3522 prologue, which we can test by looking at the offsets. */
3524 use_simple_return_p (void)
3526 arm_stack_offsets
*offsets
;
3528 offsets
= arm_get_frame_offsets ();
3529 return offsets
->outgoing_args
!= 0;
3532 /* Return TRUE if int I is a valid immediate ARM constant. */
3535 const_ok_for_arm (HOST_WIDE_INT i
)
3539 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3540 be all zero, or all one. */
3541 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3542 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3543 != ((~(unsigned HOST_WIDE_INT
) 0)
3544 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3547 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3549 /* Fast return for 0 and small values. We must do this for zero, since
3550 the code below can't handle that one case. */
3551 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3554 /* Get the number of trailing zeros. */
3555 lowbit
= ffs((int) i
) - 1;
3557 /* Only even shifts are allowed in ARM mode so round down to the
3558 nearest even number. */
3562 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3567 /* Allow rotated constants in ARM mode. */
3569 && ((i
& ~0xc000003f) == 0
3570 || (i
& ~0xf000000f) == 0
3571 || (i
& ~0xfc000003) == 0))
3578 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3581 if (i
== v
|| i
== (v
| (v
<< 8)))
3584 /* Allow repeated pattern 0xXY00XY00. */
3594 /* Return true if I is a valid constant for the operation CODE. */
3596 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3598 if (const_ok_for_arm (i
))
3604 /* See if we can use movw. */
3605 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3608 /* Otherwise, try mvn. */
3609 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3612 /* See if we can use addw or subw. */
3614 && ((i
& 0xfffff000) == 0
3615 || ((-i
) & 0xfffff000) == 0))
3617 /* else fall through. */
3637 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3639 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3645 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3649 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3656 /* Return true if I is a valid di mode constant for the operation CODE. */
3658 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3660 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3661 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3662 rtx hi
= GEN_INT (hi_val
);
3663 rtx lo
= GEN_INT (lo_val
);
3673 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3674 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3676 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3683 /* Emit a sequence of insns to handle a large constant.
3684 CODE is the code of the operation required, it can be any of SET, PLUS,
3685 IOR, AND, XOR, MINUS;
3686 MODE is the mode in which the operation is being performed;
3687 VAL is the integer to operate on;
3688 SOURCE is the other operand (a register, or a null-pointer for SET);
3689 SUBTARGETS means it is safe to create scratch registers if that will
3690 either produce a simpler sequence, or we will want to cse the values.
3691 Return value is the number of insns emitted. */
3693 /* ??? Tweak this for thumb2. */
3695 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3696 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3700 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3701 cond
= COND_EXEC_TEST (PATTERN (insn
));
3705 if (subtargets
|| code
== SET
3706 || (REG_P (target
) && REG_P (source
)
3707 && REGNO (target
) != REGNO (source
)))
3709 /* After arm_reorg has been called, we can't fix up expensive
3710 constants by pushing them into memory so we must synthesize
3711 them in-line, regardless of the cost. This is only likely to
3712 be more costly on chips that have load delay slots and we are
3713 compiling without running the scheduler (so no splitting
3714 occurred before the final instruction emission).
3716 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3718 if (!cfun
->machine
->after_arm_reorg
3720 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3722 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3727 /* Currently SET is the only monadic value for CODE, all
3728 the rest are diadic. */
3729 if (TARGET_USE_MOVT
)
3730 arm_emit_movpair (target
, GEN_INT (val
));
3732 emit_set_insn (target
, GEN_INT (val
));
3738 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3740 if (TARGET_USE_MOVT
)
3741 arm_emit_movpair (temp
, GEN_INT (val
));
3743 emit_set_insn (temp
, GEN_INT (val
));
3745 /* For MINUS, the value is subtracted from, since we never
3746 have subtraction of a constant. */
3748 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3750 emit_set_insn (target
,
3751 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3757 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3761 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3762 ARM/THUMB2 immediates, and add up to VAL.
3763 Thr function return value gives the number of insns required. */
3765 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3766 struct four_ints
*return_sequence
)
3768 int best_consecutive_zeros
= 0;
3772 struct four_ints tmp_sequence
;
3774 /* If we aren't targeting ARM, the best place to start is always at
3775 the bottom, otherwise look more closely. */
3778 for (i
= 0; i
< 32; i
+= 2)
3780 int consecutive_zeros
= 0;
3782 if (!(val
& (3 << i
)))
3784 while ((i
< 32) && !(val
& (3 << i
)))
3786 consecutive_zeros
+= 2;
3789 if (consecutive_zeros
> best_consecutive_zeros
)
3791 best_consecutive_zeros
= consecutive_zeros
;
3792 best_start
= i
- consecutive_zeros
;
3799 /* So long as it won't require any more insns to do so, it's
3800 desirable to emit a small constant (in bits 0...9) in the last
3801 insn. This way there is more chance that it can be combined with
3802 a later addressing insn to form a pre-indexed load or store
3803 operation. Consider:
3805 *((volatile int *)0xe0000100) = 1;
3806 *((volatile int *)0xe0000110) = 2;
3808 We want this to wind up as:
3812 str rB, [rA, #0x100]
3814 str rB, [rA, #0x110]
3816 rather than having to synthesize both large constants from scratch.
3818 Therefore, we calculate how many insns would be required to emit
3819 the constant starting from `best_start', and also starting from
3820 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3821 yield a shorter sequence, we may as well use zero. */
3822 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3824 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3826 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3827 if (insns2
<= insns1
)
3829 *return_sequence
= tmp_sequence
;
3837 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3839 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3840 struct four_ints
*return_sequence
, int i
)
3842 int remainder
= val
& 0xffffffff;
3845 /* Try and find a way of doing the job in either two or three
3848 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3849 location. We start at position I. This may be the MSB, or
3850 optimial_immediate_sequence may have positioned it at the largest block
3851 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3852 wrapping around to the top of the word when we drop off the bottom.
3853 In the worst case this code should produce no more than four insns.
3855 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3856 constants, shifted to any arbitrary location. We should always start
3861 unsigned int b1
, b2
, b3
, b4
;
3862 unsigned HOST_WIDE_INT result
;
3865 gcc_assert (insns
< 4);
3870 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3871 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3874 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3875 /* We can use addw/subw for the last 12 bits. */
3879 /* Use an 8-bit shifted/rotated immediate. */
3883 result
= remainder
& ((0x0ff << end
)
3884 | ((i
< end
) ? (0xff >> (32 - end
))
3891 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3892 arbitrary shifts. */
3893 i
-= TARGET_ARM
? 2 : 1;
3897 /* Next, see if we can do a better job with a thumb2 replicated
3900 We do it this way around to catch the cases like 0x01F001E0 where
3901 two 8-bit immediates would work, but a replicated constant would
3904 TODO: 16-bit constants that don't clear all the bits, but still win.
3905 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3908 b1
= (remainder
& 0xff000000) >> 24;
3909 b2
= (remainder
& 0x00ff0000) >> 16;
3910 b3
= (remainder
& 0x0000ff00) >> 8;
3911 b4
= remainder
& 0xff;
3915 /* The 8-bit immediate already found clears b1 (and maybe b2),
3916 but must leave b3 and b4 alone. */
3918 /* First try to find a 32-bit replicated constant that clears
3919 almost everything. We can assume that we can't do it in one,
3920 or else we wouldn't be here. */
3921 unsigned int tmp
= b1
& b2
& b3
& b4
;
3922 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3924 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3925 + (tmp
== b3
) + (tmp
== b4
);
3927 && (matching_bytes
>= 3
3928 || (matching_bytes
== 2
3929 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3931 /* At least 3 of the bytes match, and the fourth has at
3932 least as many bits set, or two of the bytes match
3933 and it will only require one more insn to finish. */
3941 /* Second, try to find a 16-bit replicated constant that can
3942 leave three of the bytes clear. If b2 or b4 is already
3943 zero, then we can. If the 8-bit from above would not
3944 clear b2 anyway, then we still win. */
3945 else if (b1
== b3
&& (!b2
|| !b4
3946 || (remainder
& 0x00ff0000 & ~result
)))
3948 result
= remainder
& 0xff00ff00;
3954 /* The 8-bit immediate already found clears b2 (and maybe b3)
3955 and we don't get here unless b1 is alredy clear, but it will
3956 leave b4 unchanged. */
3958 /* If we can clear b2 and b4 at once, then we win, since the
3959 8-bits couldn't possibly reach that far. */
3962 result
= remainder
& 0x00ff00ff;
3968 return_sequence
->i
[insns
++] = result
;
3969 remainder
&= ~result
;
3971 if (code
== SET
|| code
== MINUS
)
3979 /* Emit an instruction with the indicated PATTERN. If COND is
3980 non-NULL, conditionalize the execution of the instruction on COND
3984 emit_constant_insn (rtx cond
, rtx pattern
)
3987 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3988 emit_insn (pattern
);
3991 /* As above, but extra parameter GENERATE which, if clear, suppresses
3995 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
3996 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4001 int final_invert
= 0;
4003 int set_sign_bit_copies
= 0;
4004 int clear_sign_bit_copies
= 0;
4005 int clear_zero_bit_copies
= 0;
4006 int set_zero_bit_copies
= 0;
4007 int insns
= 0, neg_insns
, inv_insns
;
4008 unsigned HOST_WIDE_INT temp1
, temp2
;
4009 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4010 struct four_ints
*immediates
;
4011 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4013 /* Find out which operations are safe for a given CODE. Also do a quick
4014 check for degenerate cases; these can occur when DImode operations
4027 if (remainder
== 0xffffffff)
4030 emit_constant_insn (cond
,
4031 gen_rtx_SET (VOIDmode
, target
,
4032 GEN_INT (ARM_SIGN_EXTEND (val
))));
4038 if (reload_completed
&& rtx_equal_p (target
, source
))
4042 emit_constant_insn (cond
,
4043 gen_rtx_SET (VOIDmode
, target
, source
));
4052 emit_constant_insn (cond
,
4053 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
4056 if (remainder
== 0xffffffff)
4058 if (reload_completed
&& rtx_equal_p (target
, source
))
4061 emit_constant_insn (cond
,
4062 gen_rtx_SET (VOIDmode
, target
, source
));
4071 if (reload_completed
&& rtx_equal_p (target
, source
))
4074 emit_constant_insn (cond
,
4075 gen_rtx_SET (VOIDmode
, target
, source
));
4079 if (remainder
== 0xffffffff)
4082 emit_constant_insn (cond
,
4083 gen_rtx_SET (VOIDmode
, target
,
4084 gen_rtx_NOT (mode
, source
)));
4091 /* We treat MINUS as (val - source), since (source - val) is always
4092 passed as (source + (-val)). */
4096 emit_constant_insn (cond
,
4097 gen_rtx_SET (VOIDmode
, target
,
4098 gen_rtx_NEG (mode
, source
)));
4101 if (const_ok_for_arm (val
))
4104 emit_constant_insn (cond
,
4105 gen_rtx_SET (VOIDmode
, target
,
4106 gen_rtx_MINUS (mode
, GEN_INT (val
),
4117 /* If we can do it in one insn get out quickly. */
4118 if (const_ok_for_op (val
, code
))
4121 emit_constant_insn (cond
,
4122 gen_rtx_SET (VOIDmode
, target
,
4124 ? gen_rtx_fmt_ee (code
, mode
, source
,
4130 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4132 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4133 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4137 if (mode
== SImode
&& i
== 16)
4138 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4140 emit_constant_insn (cond
,
4141 gen_zero_extendhisi2
4142 (target
, gen_lowpart (HImode
, source
)));
4144 /* Extz only supports SImode, but we can coerce the operands
4146 emit_constant_insn (cond
,
4147 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4148 gen_lowpart (SImode
, source
),
4149 GEN_INT (i
), const0_rtx
));
4155 /* Calculate a few attributes that may be useful for specific
4157 /* Count number of leading zeros. */
4158 for (i
= 31; i
>= 0; i
--)
4160 if ((remainder
& (1 << i
)) == 0)
4161 clear_sign_bit_copies
++;
4166 /* Count number of leading 1's. */
4167 for (i
= 31; i
>= 0; i
--)
4169 if ((remainder
& (1 << i
)) != 0)
4170 set_sign_bit_copies
++;
4175 /* Count number of trailing zero's. */
4176 for (i
= 0; i
<= 31; i
++)
4178 if ((remainder
& (1 << i
)) == 0)
4179 clear_zero_bit_copies
++;
4184 /* Count number of trailing 1's. */
4185 for (i
= 0; i
<= 31; i
++)
4187 if ((remainder
& (1 << i
)) != 0)
4188 set_zero_bit_copies
++;
4196 /* See if we can do this by sign_extending a constant that is known
4197 to be negative. This is a good, way of doing it, since the shift
4198 may well merge into a subsequent insn. */
4199 if (set_sign_bit_copies
> 1)
4201 if (const_ok_for_arm
4202 (temp1
= ARM_SIGN_EXTEND (remainder
4203 << (set_sign_bit_copies
- 1))))
4207 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4208 emit_constant_insn (cond
,
4209 gen_rtx_SET (VOIDmode
, new_src
,
4211 emit_constant_insn (cond
,
4212 gen_ashrsi3 (target
, new_src
,
4213 GEN_INT (set_sign_bit_copies
- 1)));
4217 /* For an inverted constant, we will need to set the low bits,
4218 these will be shifted out of harm's way. */
4219 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4220 if (const_ok_for_arm (~temp1
))
4224 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4225 emit_constant_insn (cond
,
4226 gen_rtx_SET (VOIDmode
, new_src
,
4228 emit_constant_insn (cond
,
4229 gen_ashrsi3 (target
, new_src
,
4230 GEN_INT (set_sign_bit_copies
- 1)));
4236 /* See if we can calculate the value as the difference between two
4237 valid immediates. */
4238 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4240 int topshift
= clear_sign_bit_copies
& ~1;
4242 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4243 & (0xff000000 >> topshift
));
4245 /* If temp1 is zero, then that means the 9 most significant
4246 bits of remainder were 1 and we've caused it to overflow.
4247 When topshift is 0 we don't need to do anything since we
4248 can borrow from 'bit 32'. */
4249 if (temp1
== 0 && topshift
!= 0)
4250 temp1
= 0x80000000 >> (topshift
- 1);
4252 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4254 if (const_ok_for_arm (temp2
))
4258 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4259 emit_constant_insn (cond
,
4260 gen_rtx_SET (VOIDmode
, new_src
,
4262 emit_constant_insn (cond
,
4263 gen_addsi3 (target
, new_src
,
4271 /* See if we can generate this by setting the bottom (or the top)
4272 16 bits, and then shifting these into the other half of the
4273 word. We only look for the simplest cases, to do more would cost
4274 too much. Be careful, however, not to generate this when the
4275 alternative would take fewer insns. */
4276 if (val
& 0xffff0000)
4278 temp1
= remainder
& 0xffff0000;
4279 temp2
= remainder
& 0x0000ffff;
4281 /* Overlaps outside this range are best done using other methods. */
4282 for (i
= 9; i
< 24; i
++)
4284 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4285 && !const_ok_for_arm (temp2
))
4287 rtx new_src
= (subtargets
4288 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4290 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4291 source
, subtargets
, generate
);
4299 gen_rtx_ASHIFT (mode
, source
,
4306 /* Don't duplicate cases already considered. */
4307 for (i
= 17; i
< 24; i
++)
4309 if (((temp1
| (temp1
>> i
)) == remainder
)
4310 && !const_ok_for_arm (temp1
))
4312 rtx new_src
= (subtargets
4313 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4315 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4316 source
, subtargets
, generate
);
4321 gen_rtx_SET (VOIDmode
, target
,
4324 gen_rtx_LSHIFTRT (mode
, source
,
4335 /* If we have IOR or XOR, and the constant can be loaded in a
4336 single instruction, and we can find a temporary to put it in,
4337 then this can be done in two instructions instead of 3-4. */
4339 /* TARGET can't be NULL if SUBTARGETS is 0 */
4340 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4342 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4346 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4348 emit_constant_insn (cond
,
4349 gen_rtx_SET (VOIDmode
, sub
,
4351 emit_constant_insn (cond
,
4352 gen_rtx_SET (VOIDmode
, target
,
4353 gen_rtx_fmt_ee (code
, mode
,
4364 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4365 and the remainder 0s for e.g. 0xfff00000)
4366 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4368 This can be done in 2 instructions by using shifts with mov or mvn.
4373 mvn r0, r0, lsr #12 */
4374 if (set_sign_bit_copies
> 8
4375 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4379 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4380 rtx shift
= GEN_INT (set_sign_bit_copies
);
4384 gen_rtx_SET (VOIDmode
, sub
,
4386 gen_rtx_ASHIFT (mode
,
4391 gen_rtx_SET (VOIDmode
, target
,
4393 gen_rtx_LSHIFTRT (mode
, sub
,
4400 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4402 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4404 For eg. r0 = r0 | 0xfff
4409 if (set_zero_bit_copies
> 8
4410 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4414 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4415 rtx shift
= GEN_INT (set_zero_bit_copies
);
4419 gen_rtx_SET (VOIDmode
, sub
,
4421 gen_rtx_LSHIFTRT (mode
,
4426 gen_rtx_SET (VOIDmode
, target
,
4428 gen_rtx_ASHIFT (mode
, sub
,
4434 /* This will never be reached for Thumb2 because orn is a valid
4435 instruction. This is for Thumb1 and the ARM 32 bit cases.
4437 x = y | constant (such that ~constant is a valid constant)
4439 x = ~(~y & ~constant).
4441 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4445 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4446 emit_constant_insn (cond
,
4447 gen_rtx_SET (VOIDmode
, sub
,
4448 gen_rtx_NOT (mode
, source
)));
4451 sub
= gen_reg_rtx (mode
);
4452 emit_constant_insn (cond
,
4453 gen_rtx_SET (VOIDmode
, sub
,
4454 gen_rtx_AND (mode
, source
,
4456 emit_constant_insn (cond
,
4457 gen_rtx_SET (VOIDmode
, target
,
4458 gen_rtx_NOT (mode
, sub
)));
4465 /* See if two shifts will do 2 or more insn's worth of work. */
4466 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4468 HOST_WIDE_INT shift_mask
= ((0xffffffff
4469 << (32 - clear_sign_bit_copies
))
4472 if ((remainder
| shift_mask
) != 0xffffffff)
4476 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4477 insns
= arm_gen_constant (AND
, mode
, cond
,
4478 remainder
| shift_mask
,
4479 new_src
, source
, subtargets
, 1);
4484 rtx targ
= subtargets
? NULL_RTX
: target
;
4485 insns
= arm_gen_constant (AND
, mode
, cond
,
4486 remainder
| shift_mask
,
4487 targ
, source
, subtargets
, 0);
4493 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4494 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4496 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4497 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4503 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4505 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4507 if ((remainder
| shift_mask
) != 0xffffffff)
4511 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4513 insns
= arm_gen_constant (AND
, mode
, cond
,
4514 remainder
| shift_mask
,
4515 new_src
, source
, subtargets
, 1);
4520 rtx targ
= subtargets
? NULL_RTX
: target
;
4522 insns
= arm_gen_constant (AND
, mode
, cond
,
4523 remainder
| shift_mask
,
4524 targ
, source
, subtargets
, 0);
4530 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4531 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4533 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4534 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4546 /* Calculate what the instruction sequences would be if we generated it
4547 normally, negated, or inverted. */
4549 /* AND cannot be split into multiple insns, so invert and use BIC. */
4552 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4555 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4560 if (can_invert
|| final_invert
)
4561 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4566 immediates
= &pos_immediates
;
4568 /* Is the negated immediate sequence more efficient? */
4569 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4572 immediates
= &neg_immediates
;
4577 /* Is the inverted immediate sequence more efficient?
4578 We must allow for an extra NOT instruction for XOR operations, although
4579 there is some chance that the final 'mvn' will get optimized later. */
4580 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4583 immediates
= &inv_immediates
;
4591 /* Now output the chosen sequence as instructions. */
4594 for (i
= 0; i
< insns
; i
++)
4596 rtx new_src
, temp1_rtx
;
4598 temp1
= immediates
->i
[i
];
4600 if (code
== SET
|| code
== MINUS
)
4601 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4602 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4603 new_src
= gen_reg_rtx (mode
);
4609 else if (can_negate
)
4612 temp1
= trunc_int_for_mode (temp1
, mode
);
4613 temp1_rtx
= GEN_INT (temp1
);
4617 else if (code
== MINUS
)
4618 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4620 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4622 emit_constant_insn (cond
,
4623 gen_rtx_SET (VOIDmode
, new_src
,
4629 can_negate
= can_invert
;
4633 else if (code
== MINUS
)
4641 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4642 gen_rtx_NOT (mode
, source
)));
4649 /* Canonicalize a comparison so that we are more likely to recognize it.
4650 This can be done for a few constant compares, where we can make the
4651 immediate value easier to load. */
4654 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4655 bool op0_preserve_value
)
4658 unsigned HOST_WIDE_INT i
, maxval
;
4660 mode
= GET_MODE (*op0
);
4661 if (mode
== VOIDmode
)
4662 mode
= GET_MODE (*op1
);
4664 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4666 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4667 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4668 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4669 for GTU/LEU in Thumb mode. */
4674 if (*code
== GT
|| *code
== LE
4675 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4677 /* Missing comparison. First try to use an available
4679 if (CONST_INT_P (*op1
))
4687 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4689 *op1
= GEN_INT (i
+ 1);
4690 *code
= *code
== GT
? GE
: LT
;
4696 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4697 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4699 *op1
= GEN_INT (i
+ 1);
4700 *code
= *code
== GTU
? GEU
: LTU
;
4709 /* If that did not work, reverse the condition. */
4710 if (!op0_preserve_value
)
4715 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4721 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4722 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4723 to facilitate possible combining with a cmp into 'ands'. */
4725 && GET_CODE (*op0
) == ZERO_EXTEND
4726 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4727 && GET_MODE (XEXP (*op0
, 0)) == QImode
4728 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4729 && subreg_lowpart_p (XEXP (*op0
, 0))
4730 && *op1
== const0_rtx
)
4731 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4734 /* Comparisons smaller than DImode. Only adjust comparisons against
4735 an out-of-range constant. */
4736 if (!CONST_INT_P (*op1
)
4737 || const_ok_for_arm (INTVAL (*op1
))
4738 || const_ok_for_arm (- INTVAL (*op1
)))
4752 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4754 *op1
= GEN_INT (i
+ 1);
4755 *code
= *code
== GT
? GE
: LT
;
4763 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4765 *op1
= GEN_INT (i
- 1);
4766 *code
= *code
== GE
? GT
: LE
;
4773 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4774 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4776 *op1
= GEN_INT (i
+ 1);
4777 *code
= *code
== GTU
? GEU
: LTU
;
4785 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4787 *op1
= GEN_INT (i
- 1);
4788 *code
= *code
== GEU
? GTU
: LEU
;
4799 /* Define how to find the value returned by a function. */
4802 arm_function_value(const_tree type
, const_tree func
,
4803 bool outgoing ATTRIBUTE_UNUSED
)
4806 int unsignedp ATTRIBUTE_UNUSED
;
4807 rtx r ATTRIBUTE_UNUSED
;
4809 mode
= TYPE_MODE (type
);
4811 if (TARGET_AAPCS_BASED
)
4812 return aapcs_allocate_return_reg (mode
, type
, func
);
4814 /* Promote integer types. */
4815 if (INTEGRAL_TYPE_P (type
))
4816 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4818 /* Promotes small structs returned in a register to full-word size
4819 for big-endian AAPCS. */
4820 if (arm_return_in_msb (type
))
4822 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4823 if (size
% UNITS_PER_WORD
!= 0)
4825 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4826 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4830 return arm_libcall_value_1 (mode
);
4833 /* libcall hashtable helpers. */
4835 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4837 typedef rtx_def value_type
;
4838 typedef rtx_def compare_type
;
4839 static inline hashval_t
hash (const value_type
*);
4840 static inline bool equal (const value_type
*, const compare_type
*);
4841 static inline void remove (value_type
*);
4845 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4847 return rtx_equal_p (p1
, p2
);
4851 libcall_hasher::hash (const value_type
*p1
)
4853 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4856 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4859 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4861 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4865 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4867 static bool init_done
= false;
4868 static libcall_table_type
*libcall_htab
= NULL
;
4874 libcall_htab
= new libcall_table_type (31);
4875 add_libcall (libcall_htab
,
4876 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4877 add_libcall (libcall_htab
,
4878 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4879 add_libcall (libcall_htab
,
4880 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4881 add_libcall (libcall_htab
,
4882 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4884 add_libcall (libcall_htab
,
4885 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4886 add_libcall (libcall_htab
,
4887 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4888 add_libcall (libcall_htab
,
4889 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4890 add_libcall (libcall_htab
,
4891 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4893 add_libcall (libcall_htab
,
4894 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4895 add_libcall (libcall_htab
,
4896 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4897 add_libcall (libcall_htab
,
4898 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4899 add_libcall (libcall_htab
,
4900 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4901 add_libcall (libcall_htab
,
4902 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4903 add_libcall (libcall_htab
,
4904 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4905 add_libcall (libcall_htab
,
4906 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4907 add_libcall (libcall_htab
,
4908 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4910 /* Values from double-precision helper functions are returned in core
4911 registers if the selected core only supports single-precision
4912 arithmetic, even if we are using the hard-float ABI. The same is
4913 true for single-precision helpers, but we will never be using the
4914 hard-float ABI on a CPU which doesn't support single-precision
4915 operations in hardware. */
4916 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4917 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4918 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4919 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4920 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4921 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4922 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4923 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4924 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4925 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4926 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4927 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4929 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4933 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
4937 arm_libcall_value_1 (machine_mode mode
)
4939 if (TARGET_AAPCS_BASED
)
4940 return aapcs_libcall_value (mode
);
4941 else if (TARGET_IWMMXT_ABI
4942 && arm_vector_mode_supported_p (mode
))
4943 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4945 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4948 /* Define how to find the value returned by a library function
4949 assuming the value has mode MODE. */
4952 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
4954 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4955 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4957 /* The following libcalls return their result in integer registers,
4958 even though they return a floating point value. */
4959 if (arm_libcall_uses_aapcs_base (libcall
))
4960 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4964 return arm_libcall_value_1 (mode
);
4967 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4970 arm_function_value_regno_p (const unsigned int regno
)
4972 if (regno
== ARG_REGISTER (1)
4974 && TARGET_AAPCS_BASED
4976 && TARGET_HARD_FLOAT
4977 && regno
== FIRST_VFP_REGNUM
)
4978 || (TARGET_IWMMXT_ABI
4979 && regno
== FIRST_IWMMXT_REGNUM
))
4985 /* Determine the amount of memory needed to store the possible return
4986 registers of an untyped call. */
4988 arm_apply_result_size (void)
4994 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4996 if (TARGET_IWMMXT_ABI
)
5003 /* Decide whether TYPE should be returned in memory (true)
5004 or in a register (false). FNTYPE is the type of the function making
5007 arm_return_in_memory (const_tree type
, const_tree fntype
)
5011 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5013 if (TARGET_AAPCS_BASED
)
5015 /* Simple, non-aggregate types (ie not including vectors and
5016 complex) are always returned in a register (or registers).
5017 We don't care about which register here, so we can short-cut
5018 some of the detail. */
5019 if (!AGGREGATE_TYPE_P (type
)
5020 && TREE_CODE (type
) != VECTOR_TYPE
5021 && TREE_CODE (type
) != COMPLEX_TYPE
)
5024 /* Any return value that is no larger than one word can be
5026 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5029 /* Check any available co-processors to see if they accept the
5030 type as a register candidate (VFP, for example, can return
5031 some aggregates in consecutive registers). These aren't
5032 available if the call is variadic. */
5033 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5036 /* Vector values should be returned using ARM registers, not
5037 memory (unless they're over 16 bytes, which will break since
5038 we only have four call-clobbered registers to play with). */
5039 if (TREE_CODE (type
) == VECTOR_TYPE
)
5040 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5042 /* The rest go in memory. */
5046 if (TREE_CODE (type
) == VECTOR_TYPE
)
5047 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5049 if (!AGGREGATE_TYPE_P (type
) &&
5050 (TREE_CODE (type
) != VECTOR_TYPE
))
5051 /* All simple types are returned in registers. */
5054 if (arm_abi
!= ARM_ABI_APCS
)
5056 /* ATPCS and later return aggregate types in memory only if they are
5057 larger than a word (or are variable size). */
5058 return (size
< 0 || size
> UNITS_PER_WORD
);
5061 /* For the arm-wince targets we choose to be compatible with Microsoft's
5062 ARM and Thumb compilers, which always return aggregates in memory. */
5064 /* All structures/unions bigger than one word are returned in memory.
5065 Also catch the case where int_size_in_bytes returns -1. In this case
5066 the aggregate is either huge or of variable size, and in either case
5067 we will want to return it via memory and not in a register. */
5068 if (size
< 0 || size
> UNITS_PER_WORD
)
5071 if (TREE_CODE (type
) == RECORD_TYPE
)
5075 /* For a struct the APCS says that we only return in a register
5076 if the type is 'integer like' and every addressable element
5077 has an offset of zero. For practical purposes this means
5078 that the structure can have at most one non bit-field element
5079 and that this element must be the first one in the structure. */
5081 /* Find the first field, ignoring non FIELD_DECL things which will
5082 have been created by C++. */
5083 for (field
= TYPE_FIELDS (type
);
5084 field
&& TREE_CODE (field
) != FIELD_DECL
;
5085 field
= DECL_CHAIN (field
))
5089 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5091 /* Check that the first field is valid for returning in a register. */
5093 /* ... Floats are not allowed */
5094 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5097 /* ... Aggregates that are not themselves valid for returning in
5098 a register are not allowed. */
5099 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5102 /* Now check the remaining fields, if any. Only bitfields are allowed,
5103 since they are not addressable. */
5104 for (field
= DECL_CHAIN (field
);
5106 field
= DECL_CHAIN (field
))
5108 if (TREE_CODE (field
) != FIELD_DECL
)
5111 if (!DECL_BIT_FIELD_TYPE (field
))
5118 if (TREE_CODE (type
) == UNION_TYPE
)
5122 /* Unions can be returned in registers if every element is
5123 integral, or can be returned in an integer register. */
5124 for (field
= TYPE_FIELDS (type
);
5126 field
= DECL_CHAIN (field
))
5128 if (TREE_CODE (field
) != FIELD_DECL
)
5131 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5134 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5140 #endif /* not ARM_WINCE */
5142 /* Return all other types in memory. */
5146 const struct pcs_attribute_arg
5150 } pcs_attribute_args
[] =
5152 {"aapcs", ARM_PCS_AAPCS
},
5153 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5155 /* We could recognize these, but changes would be needed elsewhere
5156 * to implement them. */
5157 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5158 {"atpcs", ARM_PCS_ATPCS
},
5159 {"apcs", ARM_PCS_APCS
},
5161 {NULL
, ARM_PCS_UNKNOWN
}
5165 arm_pcs_from_attribute (tree attr
)
5167 const struct pcs_attribute_arg
*ptr
;
5170 /* Get the value of the argument. */
5171 if (TREE_VALUE (attr
) == NULL_TREE
5172 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5173 return ARM_PCS_UNKNOWN
;
5175 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5177 /* Check it against the list of known arguments. */
5178 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5179 if (streq (arg
, ptr
->arg
))
5182 /* An unrecognized interrupt type. */
5183 return ARM_PCS_UNKNOWN
;
5186 /* Get the PCS variant to use for this call. TYPE is the function's type
5187 specification, DECL is the specific declartion. DECL may be null if
5188 the call could be indirect or if this is a library call. */
5190 arm_get_pcs_model (const_tree type
, const_tree decl
)
5192 bool user_convention
= false;
5193 enum arm_pcs user_pcs
= arm_pcs_default
;
5198 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5201 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5202 user_convention
= true;
5205 if (TARGET_AAPCS_BASED
)
5207 /* Detect varargs functions. These always use the base rules
5208 (no argument is ever a candidate for a co-processor
5210 bool base_rules
= stdarg_p (type
);
5212 if (user_convention
)
5214 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5215 sorry ("non-AAPCS derived PCS variant");
5216 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5217 error ("variadic functions must use the base AAPCS variant");
5221 return ARM_PCS_AAPCS
;
5222 else if (user_convention
)
5224 else if (decl
&& flag_unit_at_a_time
)
5226 /* Local functions never leak outside this compilation unit,
5227 so we are free to use whatever conventions are
5229 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5230 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5232 return ARM_PCS_AAPCS_LOCAL
;
5235 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5236 sorry ("PCS variant");
5238 /* For everything else we use the target's default. */
5239 return arm_pcs_default
;
5244 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5245 const_tree fntype ATTRIBUTE_UNUSED
,
5246 rtx libcall ATTRIBUTE_UNUSED
,
5247 const_tree fndecl ATTRIBUTE_UNUSED
)
5249 /* Record the unallocated VFP registers. */
5250 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5251 pcum
->aapcs_vfp_reg_alloc
= 0;
5254 /* Walk down the type tree of TYPE counting consecutive base elements.
5255 If *MODEP is VOIDmode, then set it to the first valid floating point
5256 type. If a non-floating point type is found, or if a floating point
5257 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5258 otherwise return the count in the sub-tree. */
5260 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5265 switch (TREE_CODE (type
))
5268 mode
= TYPE_MODE (type
);
5269 if (mode
!= DFmode
&& mode
!= SFmode
)
5272 if (*modep
== VOIDmode
)
5281 mode
= TYPE_MODE (TREE_TYPE (type
));
5282 if (mode
!= DFmode
&& mode
!= SFmode
)
5285 if (*modep
== VOIDmode
)
5294 /* Use V2SImode and V4SImode as representatives of all 64-bit
5295 and 128-bit vector types, whether or not those modes are
5296 supported with the present options. */
5297 size
= int_size_in_bytes (type
);
5310 if (*modep
== VOIDmode
)
5313 /* Vector modes are considered to be opaque: two vectors are
5314 equivalent for the purposes of being homogeneous aggregates
5315 if they are the same size. */
5324 tree index
= TYPE_DOMAIN (type
);
5326 /* Can't handle incomplete types nor sizes that are not
5328 if (!COMPLETE_TYPE_P (type
)
5329 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5332 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5335 || !TYPE_MAX_VALUE (index
)
5336 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5337 || !TYPE_MIN_VALUE (index
)
5338 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5342 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5343 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5345 /* There must be no padding. */
5346 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5358 /* Can't handle incomplete types nor sizes that are not
5360 if (!COMPLETE_TYPE_P (type
)
5361 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5364 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5366 if (TREE_CODE (field
) != FIELD_DECL
)
5369 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5375 /* There must be no padding. */
5376 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5383 case QUAL_UNION_TYPE
:
5385 /* These aren't very interesting except in a degenerate case. */
5390 /* Can't handle incomplete types nor sizes that are not
5392 if (!COMPLETE_TYPE_P (type
)
5393 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5396 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5398 if (TREE_CODE (field
) != FIELD_DECL
)
5401 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5404 count
= count
> sub_count
? count
: sub_count
;
5407 /* There must be no padding. */
5408 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5421 /* Return true if PCS_VARIANT should use VFP registers. */
5423 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5425 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5427 static bool seen_thumb1_vfp
= false;
5429 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5431 sorry ("Thumb-1 hard-float VFP ABI");
5432 /* sorry() is not immediately fatal, so only display this once. */
5433 seen_thumb1_vfp
= true;
5439 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5442 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5443 (TARGET_VFP_DOUBLE
|| !is_double
));
5446 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5447 suitable for passing or returning in VFP registers for the PCS
5448 variant selected. If it is, then *BASE_MODE is updated to contain
5449 a machine mode describing each element of the argument's type and
5450 *COUNT to hold the number of such elements. */
5452 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5453 machine_mode mode
, const_tree type
,
5454 machine_mode
*base_mode
, int *count
)
5456 machine_mode new_mode
= VOIDmode
;
5458 /* If we have the type information, prefer that to working things
5459 out from the mode. */
5462 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5464 if (ag_count
> 0 && ag_count
<= 4)
5469 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5470 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5471 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5476 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5479 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5485 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5488 *base_mode
= new_mode
;
5493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5494 machine_mode mode
, const_tree type
)
5496 int count ATTRIBUTE_UNUSED
;
5497 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5499 if (!use_vfp_abi (pcs_variant
, false))
5501 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5509 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5512 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5513 &pcum
->aapcs_vfp_rmode
,
5514 &pcum
->aapcs_vfp_rcount
);
5518 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5519 const_tree type ATTRIBUTE_UNUSED
)
5521 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5522 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5525 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5526 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5528 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5530 || (mode
== TImode
&& ! TARGET_NEON
)
5531 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5534 int rcount
= pcum
->aapcs_vfp_rcount
;
5536 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5540 /* Avoid using unsupported vector modes. */
5541 if (rmode
== V2SImode
)
5543 else if (rmode
== V4SImode
)
5550 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5551 for (i
= 0; i
< rcount
; i
++)
5553 rtx tmp
= gen_rtx_REG (rmode
,
5554 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5555 tmp
= gen_rtx_EXPR_LIST
5557 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5558 XVECEXP (par
, 0, i
) = tmp
;
5561 pcum
->aapcs_reg
= par
;
5564 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5571 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5573 const_tree type ATTRIBUTE_UNUSED
)
5575 if (!use_vfp_abi (pcs_variant
, false))
5578 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5581 machine_mode ag_mode
;
5586 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5591 if (ag_mode
== V2SImode
)
5593 else if (ag_mode
== V4SImode
)
5599 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5600 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5601 for (i
= 0; i
< count
; i
++)
5603 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5604 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5605 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5606 XVECEXP (par
, 0, i
) = tmp
;
5612 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5616 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5617 machine_mode mode ATTRIBUTE_UNUSED
,
5618 const_tree type ATTRIBUTE_UNUSED
)
5620 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5621 pcum
->aapcs_vfp_reg_alloc
= 0;
5625 #define AAPCS_CP(X) \
5627 aapcs_ ## X ## _cum_init, \
5628 aapcs_ ## X ## _is_call_candidate, \
5629 aapcs_ ## X ## _allocate, \
5630 aapcs_ ## X ## _is_return_candidate, \
5631 aapcs_ ## X ## _allocate_return_reg, \
5632 aapcs_ ## X ## _advance \
5635 /* Table of co-processors that can be used to pass arguments in
5636 registers. Idealy no arugment should be a candidate for more than
5637 one co-processor table entry, but the table is processed in order
5638 and stops after the first match. If that entry then fails to put
5639 the argument into a co-processor register, the argument will go on
5643 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5644 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5646 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5647 BLKmode) is a candidate for this co-processor's registers; this
5648 function should ignore any position-dependent state in
5649 CUMULATIVE_ARGS and only use call-type dependent information. */
5650 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5652 /* Return true if the argument does get a co-processor register; it
5653 should set aapcs_reg to an RTX of the register allocated as is
5654 required for a return from FUNCTION_ARG. */
5655 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5657 /* Return true if a result of mode MODE (or type TYPE if MODE is
5658 BLKmode) is can be returned in this co-processor's registers. */
5659 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5661 /* Allocate and return an RTX element to hold the return type of a
5662 call, this routine must not fail and will only be called if
5663 is_return_candidate returned true with the same parameters. */
5664 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5666 /* Finish processing this argument and prepare to start processing
5668 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5669 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5677 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5682 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5683 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5690 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5692 /* We aren't passed a decl, so we can't check that a call is local.
5693 However, it isn't clear that that would be a win anyway, since it
5694 might limit some tail-calling opportunities. */
5695 enum arm_pcs pcs_variant
;
5699 const_tree fndecl
= NULL_TREE
;
5701 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5704 fntype
= TREE_TYPE (fntype
);
5707 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5710 pcs_variant
= arm_pcs_default
;
5712 if (pcs_variant
!= ARM_PCS_AAPCS
)
5716 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5717 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5726 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5729 /* We aren't passed a decl, so we can't check that a call is local.
5730 However, it isn't clear that that would be a win anyway, since it
5731 might limit some tail-calling opportunities. */
5732 enum arm_pcs pcs_variant
;
5733 int unsignedp ATTRIBUTE_UNUSED
;
5737 const_tree fndecl
= NULL_TREE
;
5739 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5742 fntype
= TREE_TYPE (fntype
);
5745 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5748 pcs_variant
= arm_pcs_default
;
5750 /* Promote integer types. */
5751 if (type
&& INTEGRAL_TYPE_P (type
))
5752 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5754 if (pcs_variant
!= ARM_PCS_AAPCS
)
5758 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5759 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5761 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5765 /* Promotes small structs returned in a register to full-word size
5766 for big-endian AAPCS. */
5767 if (type
&& arm_return_in_msb (type
))
5769 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5770 if (size
% UNITS_PER_WORD
!= 0)
5772 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5773 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5777 return gen_rtx_REG (mode
, R0_REGNUM
);
5781 aapcs_libcall_value (machine_mode mode
)
5783 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5784 && GET_MODE_SIZE (mode
) <= 4)
5787 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5790 /* Lay out a function argument using the AAPCS rules. The rule
5791 numbers referred to here are those in the AAPCS. */
5793 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5794 const_tree type
, bool named
)
5799 /* We only need to do this once per argument. */
5800 if (pcum
->aapcs_arg_processed
)
5803 pcum
->aapcs_arg_processed
= true;
5805 /* Special case: if named is false then we are handling an incoming
5806 anonymous argument which is on the stack. */
5810 /* Is this a potential co-processor register candidate? */
5811 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5813 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5814 pcum
->aapcs_cprc_slot
= slot
;
5816 /* We don't have to apply any of the rules from part B of the
5817 preparation phase, these are handled elsewhere in the
5822 /* A Co-processor register candidate goes either in its own
5823 class of registers or on the stack. */
5824 if (!pcum
->aapcs_cprc_failed
[slot
])
5826 /* C1.cp - Try to allocate the argument to co-processor
5828 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5831 /* C2.cp - Put the argument on the stack and note that we
5832 can't assign any more candidates in this slot. We also
5833 need to note that we have allocated stack space, so that
5834 we won't later try to split a non-cprc candidate between
5835 core registers and the stack. */
5836 pcum
->aapcs_cprc_failed
[slot
] = true;
5837 pcum
->can_split
= false;
5840 /* We didn't get a register, so this argument goes on the
5842 gcc_assert (pcum
->can_split
== false);
5847 /* C3 - For double-word aligned arguments, round the NCRN up to the
5848 next even number. */
5849 ncrn
= pcum
->aapcs_ncrn
;
5850 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5853 nregs
= ARM_NUM_REGS2(mode
, type
);
5855 /* Sigh, this test should really assert that nregs > 0, but a GCC
5856 extension allows empty structs and then gives them empty size; it
5857 then allows such a structure to be passed by value. For some of
5858 the code below we have to pretend that such an argument has
5859 non-zero size so that we 'locate' it correctly either in
5860 registers or on the stack. */
5861 gcc_assert (nregs
>= 0);
5863 nregs2
= nregs
? nregs
: 1;
5865 /* C4 - Argument fits entirely in core registers. */
5866 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5868 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5869 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5873 /* C5 - Some core registers left and there are no arguments already
5874 on the stack: split this argument between the remaining core
5875 registers and the stack. */
5876 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5878 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5879 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5880 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5884 /* C6 - NCRN is set to 4. */
5885 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5887 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5892 for a call to a function whose data type is FNTYPE.
5893 For a library call, FNTYPE is NULL. */
5895 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5897 tree fndecl ATTRIBUTE_UNUSED
)
5899 /* Long call handling. */
5901 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5903 pcum
->pcs_variant
= arm_pcs_default
;
5905 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5907 if (arm_libcall_uses_aapcs_base (libname
))
5908 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5910 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5911 pcum
->aapcs_reg
= NULL_RTX
;
5912 pcum
->aapcs_partial
= 0;
5913 pcum
->aapcs_arg_processed
= false;
5914 pcum
->aapcs_cprc_slot
= -1;
5915 pcum
->can_split
= true;
5917 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5921 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5923 pcum
->aapcs_cprc_failed
[i
] = false;
5924 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5932 /* On the ARM, the offset starts at 0. */
5934 pcum
->iwmmxt_nregs
= 0;
5935 pcum
->can_split
= true;
5937 /* Varargs vectors are treated the same as long long.
5938 named_count avoids having to change the way arm handles 'named' */
5939 pcum
->named_count
= 0;
5942 if (TARGET_REALLY_IWMMXT
&& fntype
)
5946 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5948 fn_arg
= TREE_CHAIN (fn_arg
))
5949 pcum
->named_count
+= 1;
5951 if (! pcum
->named_count
)
5952 pcum
->named_count
= INT_MAX
;
5956 /* Return true if we use LRA instead of reload pass. */
5960 return arm_lra_flag
;
5963 /* Return true if mode/type need doubleword alignment. */
5965 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
5967 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5968 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5972 /* Determine where to put an argument to a function.
5973 Value is zero to push the argument on the stack,
5974 or a hard register in which to store the argument.
5976 MODE is the argument's machine mode.
5977 TYPE is the data type of the argument (as a tree).
5978 This is null for libcalls where that information may
5980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5981 the preceding args and about the function being called.
5982 NAMED is nonzero if this argument is a named parameter
5983 (otherwise it is an extra parameter matching an ellipsis).
5985 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5986 other arguments are passed on the stack. If (NAMED == 0) (which happens
5987 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5988 defined), say it is passed in the stack (function_prologue will
5989 indeed make it pass in the stack if necessary). */
5992 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
5993 const_tree type
, bool named
)
5995 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5998 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5999 a call insn (op3 of a call_value insn). */
6000 if (mode
== VOIDmode
)
6003 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6005 aapcs_layout_arg (pcum
, mode
, type
, named
);
6006 return pcum
->aapcs_reg
;
6009 /* Varargs vectors are treated the same as long long.
6010 named_count avoids having to change the way arm handles 'named' */
6011 if (TARGET_IWMMXT_ABI
6012 && arm_vector_mode_supported_p (mode
)
6013 && pcum
->named_count
> pcum
->nargs
+ 1)
6015 if (pcum
->iwmmxt_nregs
<= 9)
6016 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6019 pcum
->can_split
= false;
6024 /* Put doubleword aligned quantities in even register pairs. */
6026 && ARM_DOUBLEWORD_ALIGN
6027 && arm_needs_doubleword_align (mode
, type
))
6030 /* Only allow splitting an arg between regs and memory if all preceding
6031 args were allocated to regs. For args passed by reference we only count
6032 the reference pointer. */
6033 if (pcum
->can_split
)
6036 nregs
= ARM_NUM_REGS2 (mode
, type
);
6038 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6041 return gen_rtx_REG (mode
, pcum
->nregs
);
6045 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6047 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6048 ? DOUBLEWORD_ALIGNMENT
6053 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6054 tree type
, bool named
)
6056 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6057 int nregs
= pcum
->nregs
;
6059 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6061 aapcs_layout_arg (pcum
, mode
, type
, named
);
6062 return pcum
->aapcs_partial
;
6065 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6068 if (NUM_ARG_REGS
> nregs
6069 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6071 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6076 /* Update the data in PCUM to advance over an argument
6077 of mode MODE and data type TYPE.
6078 (TYPE is null for libcalls where that information may not be available.) */
6081 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6082 const_tree type
, bool named
)
6084 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6086 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6088 aapcs_layout_arg (pcum
, mode
, type
, named
);
6090 if (pcum
->aapcs_cprc_slot
>= 0)
6092 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6094 pcum
->aapcs_cprc_slot
= -1;
6097 /* Generic stuff. */
6098 pcum
->aapcs_arg_processed
= false;
6099 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6100 pcum
->aapcs_reg
= NULL_RTX
;
6101 pcum
->aapcs_partial
= 0;
6106 if (arm_vector_mode_supported_p (mode
)
6107 && pcum
->named_count
> pcum
->nargs
6108 && TARGET_IWMMXT_ABI
)
6109 pcum
->iwmmxt_nregs
+= 1;
6111 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6115 /* Variable sized types are passed by reference. This is a GCC
6116 extension to the ARM ABI. */
6119 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6120 machine_mode mode ATTRIBUTE_UNUSED
,
6121 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6123 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6126 /* Encode the current state of the #pragma [no_]long_calls. */
6129 OFF
, /* No #pragma [no_]long_calls is in effect. */
6130 LONG
, /* #pragma long_calls is in effect. */
6131 SHORT
/* #pragma no_long_calls is in effect. */
6134 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6137 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6139 arm_pragma_long_calls
= LONG
;
6143 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6145 arm_pragma_long_calls
= SHORT
;
6149 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6151 arm_pragma_long_calls
= OFF
;
6154 /* Handle an attribute requiring a FUNCTION_DECL;
6155 arguments as in struct attribute_spec.handler. */
6157 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6158 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6160 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6162 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6164 *no_add_attrs
= true;
6170 /* Handle an "interrupt" or "isr" attribute;
6171 arguments as in struct attribute_spec.handler. */
6173 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6178 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6180 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6182 *no_add_attrs
= true;
6184 /* FIXME: the argument if any is checked for type attributes;
6185 should it be checked for decl ones? */
6189 if (TREE_CODE (*node
) == FUNCTION_TYPE
6190 || TREE_CODE (*node
) == METHOD_TYPE
)
6192 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6194 warning (OPT_Wattributes
, "%qE attribute ignored",
6196 *no_add_attrs
= true;
6199 else if (TREE_CODE (*node
) == POINTER_TYPE
6200 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6201 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6202 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6204 *node
= build_variant_type_copy (*node
);
6205 TREE_TYPE (*node
) = build_type_attribute_variant
6207 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6208 *no_add_attrs
= true;
6212 /* Possibly pass this attribute on from the type to a decl. */
6213 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6214 | (int) ATTR_FLAG_FUNCTION_NEXT
6215 | (int) ATTR_FLAG_ARRAY_NEXT
))
6217 *no_add_attrs
= true;
6218 return tree_cons (name
, args
, NULL_TREE
);
6222 warning (OPT_Wattributes
, "%qE attribute ignored",
6231 /* Handle a "pcs" attribute; arguments as in struct
6232 attribute_spec.handler. */
6234 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6235 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6237 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6239 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6240 *no_add_attrs
= true;
6245 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6246 /* Handle the "notshared" attribute. This attribute is another way of
6247 requesting hidden visibility. ARM's compiler supports
6248 "__declspec(notshared)"; we support the same thing via an
6252 arm_handle_notshared_attribute (tree
*node
,
6253 tree name ATTRIBUTE_UNUSED
,
6254 tree args ATTRIBUTE_UNUSED
,
6255 int flags ATTRIBUTE_UNUSED
,
6258 tree decl
= TYPE_NAME (*node
);
6262 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6263 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6264 *no_add_attrs
= false;
6270 /* Return 0 if the attributes for two types are incompatible, 1 if they
6271 are compatible, and 2 if they are nearly compatible (which causes a
6272 warning to be generated). */
6274 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6278 /* Check for mismatch of non-default calling convention. */
6279 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6282 /* Check for mismatched call attributes. */
6283 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6284 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6285 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6286 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6288 /* Only bother to check if an attribute is defined. */
6289 if (l1
| l2
| s1
| s2
)
6291 /* If one type has an attribute, the other must have the same attribute. */
6292 if ((l1
!= l2
) || (s1
!= s2
))
6295 /* Disallow mixed attributes. */
6296 if ((l1
& s2
) || (l2
& s1
))
6300 /* Check for mismatched ISR attribute. */
6301 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6303 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6304 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6306 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6313 /* Assigns default attributes to newly defined type. This is used to
6314 set short_call/long_call attributes for function types of
6315 functions defined inside corresponding #pragma scopes. */
6317 arm_set_default_type_attributes (tree type
)
6319 /* Add __attribute__ ((long_call)) to all functions, when
6320 inside #pragma long_calls or __attribute__ ((short_call)),
6321 when inside #pragma no_long_calls. */
6322 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6324 tree type_attr_list
, attr_name
;
6325 type_attr_list
= TYPE_ATTRIBUTES (type
);
6327 if (arm_pragma_long_calls
== LONG
)
6328 attr_name
= get_identifier ("long_call");
6329 else if (arm_pragma_long_calls
== SHORT
)
6330 attr_name
= get_identifier ("short_call");
6334 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6335 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6339 /* Return true if DECL is known to be linked into section SECTION. */
6342 arm_function_in_section_p (tree decl
, section
*section
)
6344 /* We can only be certain about functions defined in the same
6345 compilation unit. */
6346 if (!TREE_STATIC (decl
))
6349 /* Make sure that SYMBOL always binds to the definition in this
6350 compilation unit. */
6351 if (!targetm
.binds_local_p (decl
))
6354 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6355 if (!DECL_SECTION_NAME (decl
))
6357 /* Make sure that we will not create a unique section for DECL. */
6358 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6362 return function_section (decl
) == section
;
6365 /* Return nonzero if a 32-bit "long_call" should be generated for
6366 a call from the current function to DECL. We generate a long_call
6369 a. has an __attribute__((long call))
6370 or b. is within the scope of a #pragma long_calls
6371 or c. the -mlong-calls command line switch has been specified
6373 However we do not generate a long call if the function:
6375 d. has an __attribute__ ((short_call))
6376 or e. is inside the scope of a #pragma no_long_calls
6377 or f. is defined in the same section as the current function. */
6380 arm_is_long_call_p (tree decl
)
6385 return TARGET_LONG_CALLS
;
6387 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6388 if (lookup_attribute ("short_call", attrs
))
6391 /* For "f", be conservative, and only cater for cases in which the
6392 whole of the current function is placed in the same section. */
6393 if (!flag_reorder_blocks_and_partition
6394 && TREE_CODE (decl
) == FUNCTION_DECL
6395 && arm_function_in_section_p (decl
, current_function_section ()))
6398 if (lookup_attribute ("long_call", attrs
))
6401 return TARGET_LONG_CALLS
;
6404 /* Return nonzero if it is ok to make a tail-call to DECL. */
6406 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6408 unsigned long func_type
;
6410 if (cfun
->machine
->sibcall_blocked
)
6413 /* Never tailcall something if we are generating code for Thumb-1. */
6417 /* The PIC register is live on entry to VxWorks PLT entries, so we
6418 must make the call before restoring the PIC register. */
6419 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6422 /* If we are interworking and the function is not declared static
6423 then we can't tail-call it unless we know that it exists in this
6424 compilation unit (since it might be a Thumb routine). */
6425 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6426 && !TREE_ASM_WRITTEN (decl
))
6429 func_type
= arm_current_func_type ();
6430 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6431 if (IS_INTERRUPT (func_type
))
6434 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6436 /* Check that the return value locations are the same. For
6437 example that we aren't returning a value from the sibling in
6438 a VFP register but then need to transfer it to a core
6442 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6443 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6445 if (!rtx_equal_p (a
, b
))
6449 /* Never tailcall if function may be called with a misaligned SP. */
6450 if (IS_STACKALIGN (func_type
))
6453 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6454 references should become a NOP. Don't convert such calls into
6456 if (TARGET_AAPCS_BASED
6457 && arm_abi
== ARM_ABI_AAPCS
6459 && DECL_WEAK (decl
))
6462 /* Everything else is ok. */
6467 /* Addressing mode support functions. */
6469 /* Return nonzero if X is a legitimate immediate operand when compiling
6470 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6472 legitimate_pic_operand_p (rtx x
)
6474 if (GET_CODE (x
) == SYMBOL_REF
6475 || (GET_CODE (x
) == CONST
6476 && GET_CODE (XEXP (x
, 0)) == PLUS
6477 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6483 /* Record that the current function needs a PIC register. Initialize
6484 cfun->machine->pic_reg if we have not already done so. */
6487 require_pic_register (void)
6489 /* A lot of the logic here is made obscure by the fact that this
6490 routine gets called as part of the rtx cost estimation process.
6491 We don't want those calls to affect any assumptions about the real
6492 function; and further, we can't call entry_of_function() until we
6493 start the real expansion process. */
6494 if (!crtl
->uses_pic_offset_table
)
6496 gcc_assert (can_create_pseudo_p ());
6497 if (arm_pic_register
!= INVALID_REGNUM
6498 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6500 if (!cfun
->machine
->pic_reg
)
6501 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6503 /* Play games to avoid marking the function as needing pic
6504 if we are being called as part of the cost-estimation
6506 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6507 crtl
->uses_pic_offset_table
= 1;
6511 rtx_insn
*seq
, *insn
;
6513 if (!cfun
->machine
->pic_reg
)
6514 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6516 /* Play games to avoid marking the function as needing pic
6517 if we are being called as part of the cost-estimation
6519 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6521 crtl
->uses_pic_offset_table
= 1;
6524 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6525 && arm_pic_register
> LAST_LO_REGNUM
)
6526 emit_move_insn (cfun
->machine
->pic_reg
,
6527 gen_rtx_REG (Pmode
, arm_pic_register
));
6529 arm_load_pic_register (0UL);
6534 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6536 INSN_LOCATION (insn
) = prologue_location
;
6538 /* We can be called during expansion of PHI nodes, where
6539 we can't yet emit instructions directly in the final
6540 insn stream. Queue the insns on the entry edge, they will
6541 be committed after everything else is expanded. */
6542 insert_insn_on_edge (seq
,
6543 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6550 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6552 if (GET_CODE (orig
) == SYMBOL_REF
6553 || GET_CODE (orig
) == LABEL_REF
)
6559 gcc_assert (can_create_pseudo_p ());
6560 reg
= gen_reg_rtx (Pmode
);
6563 /* VxWorks does not impose a fixed gap between segments; the run-time
6564 gap can be different from the object-file gap. We therefore can't
6565 use GOTOFF unless we are absolutely sure that the symbol is in the
6566 same segment as the GOT. Unfortunately, the flexibility of linker
6567 scripts means that we can't be sure of that in general, so assume
6568 that GOTOFF is never valid on VxWorks. */
6569 if ((GET_CODE (orig
) == LABEL_REF
6570 || (GET_CODE (orig
) == SYMBOL_REF
&&
6571 SYMBOL_REF_LOCAL_P (orig
)))
6573 && arm_pic_data_is_text_relative
)
6574 insn
= arm_pic_static_addr (orig
, reg
);
6580 /* If this function doesn't have a pic register, create one now. */
6581 require_pic_register ();
6583 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6585 /* Make the MEM as close to a constant as possible. */
6586 mem
= SET_SRC (pat
);
6587 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6588 MEM_READONLY_P (mem
) = 1;
6589 MEM_NOTRAP_P (mem
) = 1;
6591 insn
= emit_insn (pat
);
6594 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6596 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6600 else if (GET_CODE (orig
) == CONST
)
6604 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6605 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6608 /* Handle the case where we have: const (UNSPEC_TLS). */
6609 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6610 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6613 /* Handle the case where we have:
6614 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6616 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6617 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6618 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6620 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6626 gcc_assert (can_create_pseudo_p ());
6627 reg
= gen_reg_rtx (Pmode
);
6630 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6632 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6633 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6634 base
== reg
? 0 : reg
);
6636 if (CONST_INT_P (offset
))
6638 /* The base register doesn't really matter, we only want to
6639 test the index for the appropriate mode. */
6640 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6642 gcc_assert (can_create_pseudo_p ());
6643 offset
= force_reg (Pmode
, offset
);
6646 if (CONST_INT_P (offset
))
6647 return plus_constant (Pmode
, base
, INTVAL (offset
));
6650 if (GET_MODE_SIZE (mode
) > 4
6651 && (GET_MODE_CLASS (mode
) == MODE_INT
6652 || TARGET_SOFT_FLOAT
))
6654 emit_insn (gen_addsi3 (reg
, base
, offset
));
6658 return gen_rtx_PLUS (Pmode
, base
, offset
);
6665 /* Find a spare register to use during the prolog of a function. */
6668 thumb_find_work_register (unsigned long pushed_regs_mask
)
6672 /* Check the argument registers first as these are call-used. The
6673 register allocation order means that sometimes r3 might be used
6674 but earlier argument registers might not, so check them all. */
6675 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6676 if (!df_regs_ever_live_p (reg
))
6679 /* Before going on to check the call-saved registers we can try a couple
6680 more ways of deducing that r3 is available. The first is when we are
6681 pushing anonymous arguments onto the stack and we have less than 4
6682 registers worth of fixed arguments(*). In this case r3 will be part of
6683 the variable argument list and so we can be sure that it will be
6684 pushed right at the start of the function. Hence it will be available
6685 for the rest of the prologue.
6686 (*): ie crtl->args.pretend_args_size is greater than 0. */
6687 if (cfun
->machine
->uses_anonymous_args
6688 && crtl
->args
.pretend_args_size
> 0)
6689 return LAST_ARG_REGNUM
;
6691 /* The other case is when we have fixed arguments but less than 4 registers
6692 worth. In this case r3 might be used in the body of the function, but
6693 it is not being used to convey an argument into the function. In theory
6694 we could just check crtl->args.size to see how many bytes are
6695 being passed in argument registers, but it seems that it is unreliable.
6696 Sometimes it will have the value 0 when in fact arguments are being
6697 passed. (See testcase execute/20021111-1.c for an example). So we also
6698 check the args_info.nregs field as well. The problem with this field is
6699 that it makes no allowances for arguments that are passed to the
6700 function but which are not used. Hence we could miss an opportunity
6701 when a function has an unused argument in r3. But it is better to be
6702 safe than to be sorry. */
6703 if (! cfun
->machine
->uses_anonymous_args
6704 && crtl
->args
.size
>= 0
6705 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6706 && (TARGET_AAPCS_BASED
6707 ? crtl
->args
.info
.aapcs_ncrn
< 4
6708 : crtl
->args
.info
.nregs
< 4))
6709 return LAST_ARG_REGNUM
;
6711 /* Otherwise look for a call-saved register that is going to be pushed. */
6712 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6713 if (pushed_regs_mask
& (1 << reg
))
6718 /* Thumb-2 can use high regs. */
6719 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6720 if (pushed_regs_mask
& (1 << reg
))
6723 /* Something went wrong - thumb_compute_save_reg_mask()
6724 should have arranged for a suitable register to be pushed. */
6728 static GTY(()) int pic_labelno
;
6730 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6734 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6736 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6738 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6741 gcc_assert (flag_pic
);
6743 pic_reg
= cfun
->machine
->pic_reg
;
6744 if (TARGET_VXWORKS_RTP
)
6746 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6747 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6748 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6750 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6752 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6753 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6757 /* We use an UNSPEC rather than a LABEL_REF because this label
6758 never appears in the code stream. */
6760 labelno
= GEN_INT (pic_labelno
++);
6761 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6762 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6764 /* On the ARM the PC register contains 'dot + 8' at the time of the
6765 addition, on the Thumb it is 'dot + 4'. */
6766 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6767 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6769 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6773 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6775 else /* TARGET_THUMB1 */
6777 if (arm_pic_register
!= INVALID_REGNUM
6778 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6780 /* We will have pushed the pic register, so we should always be
6781 able to find a work register. */
6782 pic_tmp
= gen_rtx_REG (SImode
,
6783 thumb_find_work_register (saved_regs
));
6784 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6785 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6786 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6788 else if (arm_pic_register
!= INVALID_REGNUM
6789 && arm_pic_register
> LAST_LO_REGNUM
6790 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6792 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6793 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6794 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6797 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6801 /* Need to emit this whether or not we obey regdecls,
6802 since setjmp/longjmp can cause life info to screw up. */
6806 /* Generate code to load the address of a static var when flag_pic is set. */
6808 arm_pic_static_addr (rtx orig
, rtx reg
)
6810 rtx l1
, labelno
, offset_rtx
, insn
;
6812 gcc_assert (flag_pic
);
6814 /* We use an UNSPEC rather than a LABEL_REF because this label
6815 never appears in the code stream. */
6816 labelno
= GEN_INT (pic_labelno
++);
6817 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6818 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6820 /* On the ARM the PC register contains 'dot + 8' at the time of the
6821 addition, on the Thumb it is 'dot + 4'. */
6822 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6823 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6824 UNSPEC_SYMBOL_OFFSET
);
6825 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6827 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6831 /* Return nonzero if X is valid as an ARM state addressing register. */
6833 arm_address_register_rtx_p (rtx x
, int strict_p
)
6843 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6845 return (regno
<= LAST_ARM_REGNUM
6846 || regno
>= FIRST_PSEUDO_REGISTER
6847 || regno
== FRAME_POINTER_REGNUM
6848 || regno
== ARG_POINTER_REGNUM
);
6851 /* Return TRUE if this rtx is the difference of a symbol and a label,
6852 and will reduce to a PC-relative relocation in the object file.
6853 Expressions like this can be left alone when generating PIC, rather
6854 than forced through the GOT. */
6856 pcrel_constant_p (rtx x
)
6858 if (GET_CODE (x
) == MINUS
)
6859 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6864 /* Return true if X will surely end up in an index register after next
6867 will_be_in_index_register (const_rtx x
)
6869 /* arm.md: calculate_pic_address will split this into a register. */
6870 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6873 /* Return nonzero if X is a valid ARM state address operand. */
6875 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6879 enum rtx_code code
= GET_CODE (x
);
6881 if (arm_address_register_rtx_p (x
, strict_p
))
6884 use_ldrd
= (TARGET_LDRD
6886 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6888 if (code
== POST_INC
|| code
== PRE_DEC
6889 || ((code
== PRE_INC
|| code
== POST_DEC
)
6890 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6891 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6893 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6894 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6895 && GET_CODE (XEXP (x
, 1)) == PLUS
6896 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6898 rtx addend
= XEXP (XEXP (x
, 1), 1);
6900 /* Don't allow ldrd post increment by register because it's hard
6901 to fixup invalid register choices. */
6903 && GET_CODE (x
) == POST_MODIFY
6907 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6908 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6911 /* After reload constants split into minipools will have addresses
6912 from a LABEL_REF. */
6913 else if (reload_completed
6914 && (code
== LABEL_REF
6916 && GET_CODE (XEXP (x
, 0)) == PLUS
6917 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6918 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6921 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6924 else if (code
== PLUS
)
6926 rtx xop0
= XEXP (x
, 0);
6927 rtx xop1
= XEXP (x
, 1);
6929 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6930 && ((CONST_INT_P (xop1
)
6931 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6932 || (!strict_p
&& will_be_in_index_register (xop1
))))
6933 || (arm_address_register_rtx_p (xop1
, strict_p
)
6934 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6938 /* Reload currently can't handle MINUS, so disable this for now */
6939 else if (GET_CODE (x
) == MINUS
)
6941 rtx xop0
= XEXP (x
, 0);
6942 rtx xop1
= XEXP (x
, 1);
6944 return (arm_address_register_rtx_p (xop0
, strict_p
)
6945 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6949 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6950 && code
== SYMBOL_REF
6951 && CONSTANT_POOL_ADDRESS_P (x
)
6953 && symbol_mentioned_p (get_pool_constant (x
))
6954 && ! pcrel_constant_p (get_pool_constant (x
))))
6960 /* Return nonzero if X is a valid Thumb-2 address operand. */
6962 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
6965 enum rtx_code code
= GET_CODE (x
);
6967 if (arm_address_register_rtx_p (x
, strict_p
))
6970 use_ldrd
= (TARGET_LDRD
6972 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6974 if (code
== POST_INC
|| code
== PRE_DEC
6975 || ((code
== PRE_INC
|| code
== POST_DEC
)
6976 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6977 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6979 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6980 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6981 && GET_CODE (XEXP (x
, 1)) == PLUS
6982 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6984 /* Thumb-2 only has autoincrement by constant. */
6985 rtx addend
= XEXP (XEXP (x
, 1), 1);
6986 HOST_WIDE_INT offset
;
6988 if (!CONST_INT_P (addend
))
6991 offset
= INTVAL(addend
);
6992 if (GET_MODE_SIZE (mode
) <= 4)
6993 return (offset
> -256 && offset
< 256);
6995 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6996 && (offset
& 3) == 0);
6999 /* After reload constants split into minipools will have addresses
7000 from a LABEL_REF. */
7001 else if (reload_completed
7002 && (code
== LABEL_REF
7004 && GET_CODE (XEXP (x
, 0)) == PLUS
7005 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7006 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7009 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7012 else if (code
== PLUS
)
7014 rtx xop0
= XEXP (x
, 0);
7015 rtx xop1
= XEXP (x
, 1);
7017 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7018 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7019 || (!strict_p
&& will_be_in_index_register (xop1
))))
7020 || (arm_address_register_rtx_p (xop1
, strict_p
)
7021 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7024 /* Normally we can assign constant values to target registers without
7025 the help of constant pool. But there are cases we have to use constant
7027 1) assign a label to register.
7028 2) sign-extend a 8bit value to 32bit and then assign to register.
7030 Constant pool access in format:
7031 (set (reg r0) (mem (symbol_ref (".LC0"))))
7032 will cause the use of literal pool (later in function arm_reorg).
7033 So here we mark such format as an invalid format, then the compiler
7034 will adjust it into:
7035 (set (reg r0) (symbol_ref (".LC0")))
7036 (set (reg r0) (mem (reg r0))).
7037 No extra register is required, and (mem (reg r0)) won't cause the use
7038 of literal pools. */
7039 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7040 && CONSTANT_POOL_ADDRESS_P (x
))
7043 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7044 && code
== SYMBOL_REF
7045 && CONSTANT_POOL_ADDRESS_P (x
)
7047 && symbol_mentioned_p (get_pool_constant (x
))
7048 && ! pcrel_constant_p (get_pool_constant (x
))))
7054 /* Return nonzero if INDEX is valid for an address index operand in
7057 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7060 HOST_WIDE_INT range
;
7061 enum rtx_code code
= GET_CODE (index
);
7063 /* Standard coprocessor addressing modes. */
7064 if (TARGET_HARD_FLOAT
7066 && (mode
== SFmode
|| mode
== DFmode
))
7067 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7068 && INTVAL (index
) > -1024
7069 && (INTVAL (index
) & 3) == 0);
7071 /* For quad modes, we restrict the constant offset to be slightly less
7072 than what the instruction format permits. We do this because for
7073 quad mode moves, we will actually decompose them into two separate
7074 double-mode reads or writes. INDEX must therefore be a valid
7075 (double-mode) offset and so should INDEX+8. */
7076 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7077 return (code
== CONST_INT
7078 && INTVAL (index
) < 1016
7079 && INTVAL (index
) > -1024
7080 && (INTVAL (index
) & 3) == 0);
7082 /* We have no such constraint on double mode offsets, so we permit the
7083 full range of the instruction format. */
7084 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7085 return (code
== CONST_INT
7086 && INTVAL (index
) < 1024
7087 && INTVAL (index
) > -1024
7088 && (INTVAL (index
) & 3) == 0);
7090 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7091 return (code
== CONST_INT
7092 && INTVAL (index
) < 1024
7093 && INTVAL (index
) > -1024
7094 && (INTVAL (index
) & 3) == 0);
7096 if (arm_address_register_rtx_p (index
, strict_p
)
7097 && (GET_MODE_SIZE (mode
) <= 4))
7100 if (mode
== DImode
|| mode
== DFmode
)
7102 if (code
== CONST_INT
)
7104 HOST_WIDE_INT val
= INTVAL (index
);
7107 return val
> -256 && val
< 256;
7109 return val
> -4096 && val
< 4092;
7112 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7115 if (GET_MODE_SIZE (mode
) <= 4
7119 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7123 rtx xiop0
= XEXP (index
, 0);
7124 rtx xiop1
= XEXP (index
, 1);
7126 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7127 && power_of_two_operand (xiop1
, SImode
))
7128 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7129 && power_of_two_operand (xiop0
, SImode
)));
7131 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7132 || code
== ASHIFT
|| code
== ROTATERT
)
7134 rtx op
= XEXP (index
, 1);
7136 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7139 && INTVAL (op
) <= 31);
7143 /* For ARM v4 we may be doing a sign-extend operation during the
7149 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7155 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7157 return (code
== CONST_INT
7158 && INTVAL (index
) < range
7159 && INTVAL (index
) > -range
);
7162 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7163 index operand. i.e. 1, 2, 4 or 8. */
7165 thumb2_index_mul_operand (rtx op
)
7169 if (!CONST_INT_P (op
))
7173 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7176 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7178 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7180 enum rtx_code code
= GET_CODE (index
);
7182 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7183 /* Standard coprocessor addressing modes. */
7184 if (TARGET_HARD_FLOAT
7186 && (mode
== SFmode
|| mode
== DFmode
))
7187 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7188 /* Thumb-2 allows only > -256 index range for it's core register
7189 load/stores. Since we allow SF/DF in core registers, we have
7190 to use the intersection between -256~4096 (core) and -1024~1024
7192 && INTVAL (index
) > -256
7193 && (INTVAL (index
) & 3) == 0);
7195 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7197 /* For DImode assume values will usually live in core regs
7198 and only allow LDRD addressing modes. */
7199 if (!TARGET_LDRD
|| mode
!= DImode
)
7200 return (code
== CONST_INT
7201 && INTVAL (index
) < 1024
7202 && INTVAL (index
) > -1024
7203 && (INTVAL (index
) & 3) == 0);
7206 /* For quad modes, we restrict the constant offset to be slightly less
7207 than what the instruction format permits. We do this because for
7208 quad mode moves, we will actually decompose them into two separate
7209 double-mode reads or writes. INDEX must therefore be a valid
7210 (double-mode) offset and so should INDEX+8. */
7211 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7212 return (code
== CONST_INT
7213 && INTVAL (index
) < 1016
7214 && INTVAL (index
) > -1024
7215 && (INTVAL (index
) & 3) == 0);
7217 /* We have no such constraint on double mode offsets, so we permit the
7218 full range of the instruction format. */
7219 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7220 return (code
== CONST_INT
7221 && INTVAL (index
) < 1024
7222 && INTVAL (index
) > -1024
7223 && (INTVAL (index
) & 3) == 0);
7225 if (arm_address_register_rtx_p (index
, strict_p
)
7226 && (GET_MODE_SIZE (mode
) <= 4))
7229 if (mode
== DImode
|| mode
== DFmode
)
7231 if (code
== CONST_INT
)
7233 HOST_WIDE_INT val
= INTVAL (index
);
7234 /* ??? Can we assume ldrd for thumb2? */
7235 /* Thumb-2 ldrd only has reg+const addressing modes. */
7236 /* ldrd supports offsets of +-1020.
7237 However the ldr fallback does not. */
7238 return val
> -256 && val
< 256 && (val
& 3) == 0;
7246 rtx xiop0
= XEXP (index
, 0);
7247 rtx xiop1
= XEXP (index
, 1);
7249 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7250 && thumb2_index_mul_operand (xiop1
))
7251 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7252 && thumb2_index_mul_operand (xiop0
)));
7254 else if (code
== ASHIFT
)
7256 rtx op
= XEXP (index
, 1);
7258 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7261 && INTVAL (op
) <= 3);
7264 return (code
== CONST_INT
7265 && INTVAL (index
) < 4096
7266 && INTVAL (index
) > -256);
7269 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7271 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7281 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7283 return (regno
<= LAST_LO_REGNUM
7284 || regno
> LAST_VIRTUAL_REGISTER
7285 || regno
== FRAME_POINTER_REGNUM
7286 || (GET_MODE_SIZE (mode
) >= 4
7287 && (regno
== STACK_POINTER_REGNUM
7288 || regno
>= FIRST_PSEUDO_REGISTER
7289 || x
== hard_frame_pointer_rtx
7290 || x
== arg_pointer_rtx
)));
7293 /* Return nonzero if x is a legitimate index register. This is the case
7294 for any base register that can access a QImode object. */
7296 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7298 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7301 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7303 The AP may be eliminated to either the SP or the FP, so we use the
7304 least common denominator, e.g. SImode, and offsets from 0 to 64.
7306 ??? Verify whether the above is the right approach.
7308 ??? Also, the FP may be eliminated to the SP, so perhaps that
7309 needs special handling also.
7311 ??? Look at how the mips16 port solves this problem. It probably uses
7312 better ways to solve some of these problems.
7314 Although it is not incorrect, we don't accept QImode and HImode
7315 addresses based on the frame pointer or arg pointer until the
7316 reload pass starts. This is so that eliminating such addresses
7317 into stack based ones won't produce impossible code. */
7319 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7321 /* ??? Not clear if this is right. Experiment. */
7322 if (GET_MODE_SIZE (mode
) < 4
7323 && !(reload_in_progress
|| reload_completed
)
7324 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7325 || reg_mentioned_p (arg_pointer_rtx
, x
)
7326 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7327 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7328 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7329 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7332 /* Accept any base register. SP only in SImode or larger. */
7333 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7336 /* This is PC relative data before arm_reorg runs. */
7337 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7338 && GET_CODE (x
) == SYMBOL_REF
7339 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7342 /* This is PC relative data after arm_reorg runs. */
7343 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7345 && (GET_CODE (x
) == LABEL_REF
7346 || (GET_CODE (x
) == CONST
7347 && GET_CODE (XEXP (x
, 0)) == PLUS
7348 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7349 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7352 /* Post-inc indexing only supported for SImode and larger. */
7353 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7354 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7357 else if (GET_CODE (x
) == PLUS
)
7359 /* REG+REG address can be any two index registers. */
7360 /* We disallow FRAME+REG addressing since we know that FRAME
7361 will be replaced with STACK, and SP relative addressing only
7362 permits SP+OFFSET. */
7363 if (GET_MODE_SIZE (mode
) <= 4
7364 && XEXP (x
, 0) != frame_pointer_rtx
7365 && XEXP (x
, 1) != frame_pointer_rtx
7366 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7367 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7368 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7371 /* REG+const has 5-7 bit offset for non-SP registers. */
7372 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7373 || XEXP (x
, 0) == arg_pointer_rtx
)
7374 && CONST_INT_P (XEXP (x
, 1))
7375 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7378 /* REG+const has 10-bit offset for SP, but only SImode and
7379 larger is supported. */
7380 /* ??? Should probably check for DI/DFmode overflow here
7381 just like GO_IF_LEGITIMATE_OFFSET does. */
7382 else if (REG_P (XEXP (x
, 0))
7383 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7384 && GET_MODE_SIZE (mode
) >= 4
7385 && CONST_INT_P (XEXP (x
, 1))
7386 && INTVAL (XEXP (x
, 1)) >= 0
7387 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7388 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7391 else if (REG_P (XEXP (x
, 0))
7392 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7393 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7394 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7395 && REGNO (XEXP (x
, 0))
7396 <= LAST_VIRTUAL_POINTER_REGISTER
))
7397 && GET_MODE_SIZE (mode
) >= 4
7398 && CONST_INT_P (XEXP (x
, 1))
7399 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7403 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7404 && GET_MODE_SIZE (mode
) == 4
7405 && GET_CODE (x
) == SYMBOL_REF
7406 && CONSTANT_POOL_ADDRESS_P (x
)
7408 && symbol_mentioned_p (get_pool_constant (x
))
7409 && ! pcrel_constant_p (get_pool_constant (x
))))
7415 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7416 instruction of mode MODE. */
7418 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7420 switch (GET_MODE_SIZE (mode
))
7423 return val
>= 0 && val
< 32;
7426 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7430 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7436 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7439 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7440 else if (TARGET_THUMB2
)
7441 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7442 else /* if (TARGET_THUMB1) */
7443 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7446 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7448 Given an rtx X being reloaded into a reg required to be
7449 in class CLASS, return the class of reg to actually use.
7450 In general this is just CLASS, but for the Thumb core registers and
7451 immediate constants we prefer a LO_REGS class or a subset. */
7454 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7460 if (rclass
== GENERAL_REGS
)
7467 /* Build the SYMBOL_REF for __tls_get_addr. */
7469 static GTY(()) rtx tls_get_addr_libfunc
;
7472 get_tls_get_addr (void)
7474 if (!tls_get_addr_libfunc
)
7475 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7476 return tls_get_addr_libfunc
;
7480 arm_load_tp (rtx target
)
7483 target
= gen_reg_rtx (SImode
);
7487 /* Can return in any reg. */
7488 emit_insn (gen_load_tp_hard (target
));
7492 /* Always returned in r0. Immediately copy the result into a pseudo,
7493 otherwise other uses of r0 (e.g. setting up function arguments) may
7494 clobber the value. */
7498 emit_insn (gen_load_tp_soft ());
7500 tmp
= gen_rtx_REG (SImode
, 0);
7501 emit_move_insn (target
, tmp
);
7507 load_tls_operand (rtx x
, rtx reg
)
7511 if (reg
== NULL_RTX
)
7512 reg
= gen_reg_rtx (SImode
);
7514 tmp
= gen_rtx_CONST (SImode
, x
);
7516 emit_move_insn (reg
, tmp
);
7522 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7524 rtx insns
, label
, labelno
, sum
;
7526 gcc_assert (reloc
!= TLS_DESCSEQ
);
7529 labelno
= GEN_INT (pic_labelno
++);
7530 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7531 label
= gen_rtx_CONST (VOIDmode
, label
);
7533 sum
= gen_rtx_UNSPEC (Pmode
,
7534 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7535 GEN_INT (TARGET_ARM
? 8 : 4)),
7537 reg
= load_tls_operand (sum
, reg
);
7540 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7542 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7544 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7545 LCT_PURE
, /* LCT_CONST? */
7546 Pmode
, 1, reg
, Pmode
);
7548 insns
= get_insns ();
7555 arm_tls_descseq_addr (rtx x
, rtx reg
)
7557 rtx labelno
= GEN_INT (pic_labelno
++);
7558 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7559 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7560 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7561 gen_rtx_CONST (VOIDmode
, label
),
7562 GEN_INT (!TARGET_ARM
)),
7564 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7566 emit_insn (gen_tlscall (x
, labelno
));
7568 reg
= gen_reg_rtx (SImode
);
7570 gcc_assert (REGNO (reg
) != 0);
7572 emit_move_insn (reg
, reg0
);
7578 legitimize_tls_address (rtx x
, rtx reg
)
7580 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7581 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7585 case TLS_MODEL_GLOBAL_DYNAMIC
:
7586 if (TARGET_GNU2_TLS
)
7588 reg
= arm_tls_descseq_addr (x
, reg
);
7590 tp
= arm_load_tp (NULL_RTX
);
7592 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7596 /* Original scheme */
7597 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7598 dest
= gen_reg_rtx (Pmode
);
7599 emit_libcall_block (insns
, dest
, ret
, x
);
7603 case TLS_MODEL_LOCAL_DYNAMIC
:
7604 if (TARGET_GNU2_TLS
)
7606 reg
= arm_tls_descseq_addr (x
, reg
);
7608 tp
= arm_load_tp (NULL_RTX
);
7610 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7614 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7616 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7617 share the LDM result with other LD model accesses. */
7618 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7620 dest
= gen_reg_rtx (Pmode
);
7621 emit_libcall_block (insns
, dest
, ret
, eqv
);
7623 /* Load the addend. */
7624 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7625 GEN_INT (TLS_LDO32
)),
7627 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7628 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7632 case TLS_MODEL_INITIAL_EXEC
:
7633 labelno
= GEN_INT (pic_labelno
++);
7634 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7635 label
= gen_rtx_CONST (VOIDmode
, label
);
7636 sum
= gen_rtx_UNSPEC (Pmode
,
7637 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7638 GEN_INT (TARGET_ARM
? 8 : 4)),
7640 reg
= load_tls_operand (sum
, reg
);
7643 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7644 else if (TARGET_THUMB2
)
7645 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7648 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7649 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7652 tp
= arm_load_tp (NULL_RTX
);
7654 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7656 case TLS_MODEL_LOCAL_EXEC
:
7657 tp
= arm_load_tp (NULL_RTX
);
7659 reg
= gen_rtx_UNSPEC (Pmode
,
7660 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7662 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7664 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7671 /* Try machine-dependent ways of modifying an illegitimate address
7672 to be legitimate. If we find one, return the new, valid address. */
7674 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7676 if (arm_tls_referenced_p (x
))
7680 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7682 addend
= XEXP (XEXP (x
, 0), 1);
7683 x
= XEXP (XEXP (x
, 0), 0);
7686 if (GET_CODE (x
) != SYMBOL_REF
)
7689 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7691 x
= legitimize_tls_address (x
, NULL_RTX
);
7695 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7704 /* TODO: legitimize_address for Thumb2. */
7707 return thumb_legitimize_address (x
, orig_x
, mode
);
7710 if (GET_CODE (x
) == PLUS
)
7712 rtx xop0
= XEXP (x
, 0);
7713 rtx xop1
= XEXP (x
, 1);
7715 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7716 xop0
= force_reg (SImode
, xop0
);
7718 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7719 && !symbol_mentioned_p (xop1
))
7720 xop1
= force_reg (SImode
, xop1
);
7722 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7723 && CONST_INT_P (xop1
))
7725 HOST_WIDE_INT n
, low_n
;
7729 /* VFP addressing modes actually allow greater offsets, but for
7730 now we just stick with the lowest common denominator. */
7732 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7744 low_n
= ((mode
) == TImode
? 0
7745 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7749 base_reg
= gen_reg_rtx (SImode
);
7750 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7751 emit_move_insn (base_reg
, val
);
7752 x
= plus_constant (Pmode
, base_reg
, low_n
);
7754 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7755 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7758 /* XXX We don't allow MINUS any more -- see comment in
7759 arm_legitimate_address_outer_p (). */
7760 else if (GET_CODE (x
) == MINUS
)
7762 rtx xop0
= XEXP (x
, 0);
7763 rtx xop1
= XEXP (x
, 1);
7765 if (CONSTANT_P (xop0
))
7766 xop0
= force_reg (SImode
, xop0
);
7768 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7769 xop1
= force_reg (SImode
, xop1
);
7771 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7772 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7775 /* Make sure to take full advantage of the pre-indexed addressing mode
7776 with absolute addresses which often allows for the base register to
7777 be factorized for multiple adjacent memory references, and it might
7778 even allows for the mini pool to be avoided entirely. */
7779 else if (CONST_INT_P (x
) && optimize
> 0)
7782 HOST_WIDE_INT mask
, base
, index
;
7785 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7786 use a 8-bit index. So let's use a 12-bit index for SImode only and
7787 hope that arm_gen_constant will enable ldrb to use more bits. */
7788 bits
= (mode
== SImode
) ? 12 : 8;
7789 mask
= (1 << bits
) - 1;
7790 base
= INTVAL (x
) & ~mask
;
7791 index
= INTVAL (x
) & mask
;
7792 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7794 /* It'll most probably be more efficient to generate the base
7795 with more bits set and use a negative index instead. */
7799 base_reg
= force_reg (SImode
, GEN_INT (base
));
7800 x
= plus_constant (Pmode
, base_reg
, index
);
7805 /* We need to find and carefully transform any SYMBOL and LABEL
7806 references; so go back to the original address expression. */
7807 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7809 if (new_x
!= orig_x
)
7817 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7818 to be legitimate. If we find one, return the new, valid address. */
7820 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7822 if (GET_CODE (x
) == PLUS
7823 && CONST_INT_P (XEXP (x
, 1))
7824 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7825 || INTVAL (XEXP (x
, 1)) < 0))
7827 rtx xop0
= XEXP (x
, 0);
7828 rtx xop1
= XEXP (x
, 1);
7829 HOST_WIDE_INT offset
= INTVAL (xop1
);
7831 /* Try and fold the offset into a biasing of the base register and
7832 then offsetting that. Don't do this when optimizing for space
7833 since it can cause too many CSEs. */
7834 if (optimize_size
&& offset
>= 0
7835 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7837 HOST_WIDE_INT delta
;
7840 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7841 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7842 delta
= 31 * GET_MODE_SIZE (mode
);
7844 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7846 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7848 x
= plus_constant (Pmode
, xop0
, delta
);
7850 else if (offset
< 0 && offset
> -256)
7851 /* Small negative offsets are best done with a subtract before the
7852 dereference, forcing these into a register normally takes two
7854 x
= force_operand (x
, NULL_RTX
);
7857 /* For the remaining cases, force the constant into a register. */
7858 xop1
= force_reg (SImode
, xop1
);
7859 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7862 else if (GET_CODE (x
) == PLUS
7863 && s_register_operand (XEXP (x
, 1), SImode
)
7864 && !s_register_operand (XEXP (x
, 0), SImode
))
7866 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7868 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7873 /* We need to find and carefully transform any SYMBOL and LABEL
7874 references; so go back to the original address expression. */
7875 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7877 if (new_x
!= orig_x
)
7885 arm_legitimize_reload_address (rtx
*p
,
7887 int opnum
, int type
,
7888 int ind_levels ATTRIBUTE_UNUSED
)
7890 /* We must recognize output that we have already generated ourselves. */
7891 if (GET_CODE (*p
) == PLUS
7892 && GET_CODE (XEXP (*p
, 0)) == PLUS
7893 && REG_P (XEXP (XEXP (*p
, 0), 0))
7894 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7895 && CONST_INT_P (XEXP (*p
, 1)))
7897 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7898 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7899 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7903 if (GET_CODE (*p
) == PLUS
7904 && REG_P (XEXP (*p
, 0))
7905 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7906 /* If the base register is equivalent to a constant, let the generic
7907 code handle it. Otherwise we will run into problems if a future
7908 reload pass decides to rematerialize the constant. */
7909 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7910 && CONST_INT_P (XEXP (*p
, 1)))
7912 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7913 HOST_WIDE_INT low
, high
;
7915 /* Detect coprocessor load/stores. */
7916 bool coproc_p
= ((TARGET_HARD_FLOAT
7918 && (mode
== SFmode
|| mode
== DFmode
))
7919 || (TARGET_REALLY_IWMMXT
7920 && VALID_IWMMXT_REG_MODE (mode
))
7922 && (VALID_NEON_DREG_MODE (mode
)
7923 || VALID_NEON_QREG_MODE (mode
))));
7925 /* For some conditions, bail out when lower two bits are unaligned. */
7926 if ((val
& 0x3) != 0
7927 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7929 /* For DI, and DF under soft-float: */
7930 || ((mode
== DImode
|| mode
== DFmode
)
7931 /* Without ldrd, we use stm/ldm, which does not
7932 fair well with unaligned bits. */
7934 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7935 || TARGET_THUMB2
))))
7938 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7939 of which the (reg+high) gets turned into a reload add insn,
7940 we try to decompose the index into high/low values that can often
7941 also lead to better reload CSE.
7943 ldr r0, [r2, #4100] // Offset too large
7944 ldr r1, [r2, #4104] // Offset too large
7946 is best reloaded as:
7952 which post-reload CSE can simplify in most cases to eliminate the
7953 second add instruction:
7958 The idea here is that we want to split out the bits of the constant
7959 as a mask, rather than as subtracting the maximum offset that the
7960 respective type of load/store used can handle.
7962 When encountering negative offsets, we can still utilize it even if
7963 the overall offset is positive; sometimes this may lead to an immediate
7964 that can be constructed with fewer instructions.
7966 ldr r0, [r2, #0x3FFFFC]
7968 This is best reloaded as:
7969 add t1, r2, #0x400000
7972 The trick for spotting this for a load insn with N bits of offset
7973 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7974 negative offset that is going to make bit N and all the bits below
7975 it become zero in the remainder part.
7977 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7978 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7979 used in most cases of ARM load/store instructions. */
7981 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7982 (((VAL) & ((1 << (N)) - 1)) \
7983 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7988 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7990 /* NEON quad-word load/stores are made of two double-word accesses,
7991 so the valid index range is reduced by 8. Treat as 9-bit range if
7993 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7994 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7996 else if (GET_MODE_SIZE (mode
) == 8)
7999 low
= (TARGET_THUMB2
8000 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
8001 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
8003 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8004 to access doublewords. The supported load/store offsets are
8005 -8, -4, and 4, which we try to produce here. */
8006 low
= ((val
& 0xf) ^ 0x8) - 0x8;
8008 else if (GET_MODE_SIZE (mode
) < 8)
8010 /* NEON element load/stores do not have an offset. */
8011 if (TARGET_NEON_FP16
&& mode
== HFmode
)
8016 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8017 Try the wider 12-bit range first, and re-try if the result
8019 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8021 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
8025 if (mode
== HImode
|| mode
== HFmode
)
8028 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
8031 /* The storehi/movhi_bytes fallbacks can use only
8032 [-4094,+4094] of the full ldrb/strb index range. */
8033 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8034 if (low
== 4095 || low
== -4095)
8039 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8045 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
8046 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
8047 - (unsigned HOST_WIDE_INT
) 0x80000000);
8048 /* Check for overflow or zero */
8049 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
8052 /* Reload the high part into a base reg; leave the low part
8054 Note that replacing this gen_rtx_PLUS with plus_constant is
8055 wrong in this case because we rely on the
8056 (plus (plus reg c1) c2) structure being preserved so that
8057 XEXP (*p, 0) in push_reload below uses the correct term. */
8058 *p
= gen_rtx_PLUS (GET_MODE (*p
),
8059 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
8062 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
8063 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
8064 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8072 thumb_legitimize_reload_address (rtx
*x_p
,
8074 int opnum
, int type
,
8075 int ind_levels ATTRIBUTE_UNUSED
)
8079 if (GET_CODE (x
) == PLUS
8080 && GET_MODE_SIZE (mode
) < 4
8081 && REG_P (XEXP (x
, 0))
8082 && XEXP (x
, 0) == stack_pointer_rtx
8083 && CONST_INT_P (XEXP (x
, 1))
8084 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8089 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8090 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8094 /* If both registers are hi-regs, then it's better to reload the
8095 entire expression rather than each register individually. That
8096 only requires one reload register rather than two. */
8097 if (GET_CODE (x
) == PLUS
8098 && REG_P (XEXP (x
, 0))
8099 && REG_P (XEXP (x
, 1))
8100 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8101 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8106 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8107 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8114 /* Return TRUE if X contains any TLS symbol references. */
8117 arm_tls_referenced_p (rtx x
)
8119 if (! TARGET_HAVE_TLS
)
8122 subrtx_iterator::array_type array
;
8123 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8125 const_rtx x
= *iter
;
8126 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8129 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8130 TLS offsets, not real symbol references. */
8131 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8132 iter
.skip_subrtxes ();
8137 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8139 On the ARM, allow any integer (invalid ones are removed later by insn
8140 patterns), nice doubles and symbol_refs which refer to the function's
8143 When generating pic allow anything. */
8146 arm_legitimate_constant_p_1 (machine_mode mode
, rtx x
)
8148 /* At present, we have no support for Neon structure constants, so forbid
8149 them here. It might be possible to handle simple cases like 0 and -1
8151 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8154 return flag_pic
|| !label_mentioned_p (x
);
8158 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8160 return (CONST_INT_P (x
)
8161 || CONST_DOUBLE_P (x
)
8162 || CONSTANT_ADDRESS_P (x
)
8167 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8169 return (!arm_cannot_force_const_mem (mode
, x
)
8171 ? arm_legitimate_constant_p_1 (mode
, x
)
8172 : thumb_legitimate_constant_p (mode
, x
)));
8175 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8178 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8182 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8184 split_const (x
, &base
, &offset
);
8185 if (GET_CODE (base
) == SYMBOL_REF
8186 && !offset_within_block_p (base
, INTVAL (offset
)))
8189 return arm_tls_referenced_p (x
);
8192 #define REG_OR_SUBREG_REG(X) \
8194 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8196 #define REG_OR_SUBREG_RTX(X) \
8197 (REG_P (X) ? (X) : SUBREG_REG (X))
8200 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8202 machine_mode mode
= GET_MODE (x
);
8211 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8218 return COSTS_N_INSNS (1);
8221 if (CONST_INT_P (XEXP (x
, 1)))
8224 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8231 return COSTS_N_INSNS (2) + cycles
;
8233 return COSTS_N_INSNS (1) + 16;
8236 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8238 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8239 return (COSTS_N_INSNS (words
)
8240 + 4 * ((MEM_P (SET_SRC (x
)))
8241 + MEM_P (SET_DEST (x
))));
8246 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8248 if (thumb_shiftable_const (INTVAL (x
)))
8249 return COSTS_N_INSNS (2);
8250 return COSTS_N_INSNS (3);
8252 else if ((outer
== PLUS
|| outer
== COMPARE
)
8253 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8255 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8256 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8257 return COSTS_N_INSNS (1);
8258 else if (outer
== AND
)
8261 /* This duplicates the tests in the andsi3 expander. */
8262 for (i
= 9; i
<= 31; i
++)
8263 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8264 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8265 return COSTS_N_INSNS (2);
8267 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8268 || outer
== LSHIFTRT
)
8270 return COSTS_N_INSNS (2);
8276 return COSTS_N_INSNS (3);
8294 /* XXX another guess. */
8295 /* Memory costs quite a lot for the first word, but subsequent words
8296 load at the equivalent of a single insn each. */
8297 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8298 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8303 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8309 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8310 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8316 return total
+ COSTS_N_INSNS (1);
8318 /* Assume a two-shift sequence. Increase the cost slightly so
8319 we prefer actual shifts over an extend operation. */
8320 return total
+ 1 + COSTS_N_INSNS (2);
8328 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8330 machine_mode mode
= GET_MODE (x
);
8331 enum rtx_code subcode
;
8333 enum rtx_code code
= GET_CODE (x
);
8339 /* Memory costs quite a lot for the first word, but subsequent words
8340 load at the equivalent of a single insn each. */
8341 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8348 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8349 *total
= COSTS_N_INSNS (2);
8350 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8351 *total
= COSTS_N_INSNS (4);
8353 *total
= COSTS_N_INSNS (20);
8357 if (REG_P (XEXP (x
, 1)))
8358 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8359 else if (!CONST_INT_P (XEXP (x
, 1)))
8360 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8366 *total
+= COSTS_N_INSNS (4);
8371 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8372 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8375 *total
+= COSTS_N_INSNS (3);
8379 *total
+= COSTS_N_INSNS (1);
8380 /* Increase the cost of complex shifts because they aren't any faster,
8381 and reduce dual issue opportunities. */
8382 if (arm_tune_cortex_a9
8383 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8391 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8392 if (CONST_INT_P (XEXP (x
, 0))
8393 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8395 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8399 if (CONST_INT_P (XEXP (x
, 1))
8400 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8402 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8409 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8411 if (TARGET_HARD_FLOAT
8413 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8415 *total
= COSTS_N_INSNS (1);
8416 if (CONST_DOUBLE_P (XEXP (x
, 0))
8417 && arm_const_double_rtx (XEXP (x
, 0)))
8419 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8423 if (CONST_DOUBLE_P (XEXP (x
, 1))
8424 && arm_const_double_rtx (XEXP (x
, 1)))
8426 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8432 *total
= COSTS_N_INSNS (20);
8436 *total
= COSTS_N_INSNS (1);
8437 if (CONST_INT_P (XEXP (x
, 0))
8438 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8440 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8444 subcode
= GET_CODE (XEXP (x
, 1));
8445 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8446 || subcode
== LSHIFTRT
8447 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8449 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8450 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8454 /* A shift as a part of RSB costs no more than RSB itself. */
8455 if (GET_CODE (XEXP (x
, 0)) == MULT
8456 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8458 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8459 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8464 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8466 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8467 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8471 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8472 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8474 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8475 if (REG_P (XEXP (XEXP (x
, 1), 0))
8476 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8477 *total
+= COSTS_N_INSNS (1);
8485 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8486 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8487 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8489 *total
= COSTS_N_INSNS (1);
8490 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8492 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8496 /* MLA: All arguments must be registers. We filter out
8497 multiplication by a power of two, so that we fall down into
8499 if (GET_CODE (XEXP (x
, 0)) == MULT
8500 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8502 /* The cost comes from the cost of the multiply. */
8506 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8508 if (TARGET_HARD_FLOAT
8510 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8512 *total
= COSTS_N_INSNS (1);
8513 if (CONST_DOUBLE_P (XEXP (x
, 1))
8514 && arm_const_double_rtx (XEXP (x
, 1)))
8516 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8523 *total
= COSTS_N_INSNS (20);
8527 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8528 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8530 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8531 if (REG_P (XEXP (XEXP (x
, 0), 0))
8532 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8533 *total
+= COSTS_N_INSNS (1);
8539 case AND
: case XOR
: case IOR
:
8541 /* Normally the frame registers will be spilt into reg+const during
8542 reload, so it is a bad idea to combine them with other instructions,
8543 since then they might not be moved outside of loops. As a compromise
8544 we allow integration with ops that have a constant as their second
8546 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8547 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8548 && !CONST_INT_P (XEXP (x
, 1)))
8549 *total
= COSTS_N_INSNS (1);
8553 *total
+= COSTS_N_INSNS (2);
8554 if (CONST_INT_P (XEXP (x
, 1))
8555 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8557 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8564 *total
+= COSTS_N_INSNS (1);
8565 if (CONST_INT_P (XEXP (x
, 1))
8566 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8568 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8571 subcode
= GET_CODE (XEXP (x
, 0));
8572 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8573 || subcode
== LSHIFTRT
8574 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8576 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8577 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8582 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8584 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8585 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8589 if (subcode
== UMIN
|| subcode
== UMAX
8590 || subcode
== SMIN
|| subcode
== SMAX
)
8592 *total
= COSTS_N_INSNS (3);
8599 /* This should have been handled by the CPU specific routines. */
8603 if (arm_arch3m
&& mode
== SImode
8604 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8605 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8606 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8607 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8608 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8609 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8611 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8614 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8618 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8620 if (TARGET_HARD_FLOAT
8622 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8624 *total
= COSTS_N_INSNS (1);
8627 *total
= COSTS_N_INSNS (2);
8633 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8634 if (mode
== SImode
&& code
== NOT
)
8636 subcode
= GET_CODE (XEXP (x
, 0));
8637 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8638 || subcode
== LSHIFTRT
8639 || subcode
== ROTATE
|| subcode
== ROTATERT
8641 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8643 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8644 /* Register shifts cost an extra cycle. */
8645 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8646 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8655 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8657 *total
= COSTS_N_INSNS (4);
8661 operand
= XEXP (x
, 0);
8663 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8664 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8665 && REG_P (XEXP (operand
, 0))
8666 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8667 *total
+= COSTS_N_INSNS (1);
8668 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8669 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8673 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8675 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8681 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8682 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8684 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8690 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8691 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8693 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8713 /* SCC insns. In the case where the comparison has already been
8714 performed, then they cost 2 instructions. Otherwise they need
8715 an additional comparison before them. */
8716 *total
= COSTS_N_INSNS (2);
8717 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8724 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8730 *total
+= COSTS_N_INSNS (1);
8731 if (CONST_INT_P (XEXP (x
, 1))
8732 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8734 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8738 subcode
= GET_CODE (XEXP (x
, 0));
8739 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8740 || subcode
== LSHIFTRT
8741 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8743 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8744 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8749 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8751 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8752 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8762 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8763 if (!CONST_INT_P (XEXP (x
, 1))
8764 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8765 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8769 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8771 if (TARGET_HARD_FLOAT
8773 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8775 *total
= COSTS_N_INSNS (1);
8778 *total
= COSTS_N_INSNS (20);
8781 *total
= COSTS_N_INSNS (1);
8783 *total
+= COSTS_N_INSNS (3);
8789 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8791 rtx op
= XEXP (x
, 0);
8792 machine_mode opmode
= GET_MODE (op
);
8795 *total
+= COSTS_N_INSNS (1);
8797 if (opmode
!= SImode
)
8801 /* If !arm_arch4, we use one of the extendhisi2_mem
8802 or movhi_bytes patterns for HImode. For a QImode
8803 sign extension, we first zero-extend from memory
8804 and then perform a shift sequence. */
8805 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8806 *total
+= COSTS_N_INSNS (2);
8809 *total
+= COSTS_N_INSNS (1);
8811 /* We don't have the necessary insn, so we need to perform some
8813 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8814 /* An and with constant 255. */
8815 *total
+= COSTS_N_INSNS (1);
8817 /* A shift sequence. Increase costs slightly to avoid
8818 combining two shifts into an extend operation. */
8819 *total
+= COSTS_N_INSNS (2) + 1;
8825 switch (GET_MODE (XEXP (x
, 0)))
8832 *total
= COSTS_N_INSNS (1);
8842 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8846 if (const_ok_for_arm (INTVAL (x
))
8847 || const_ok_for_arm (~INTVAL (x
)))
8848 *total
= COSTS_N_INSNS (1);
8850 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8851 INTVAL (x
), NULL_RTX
,
8858 *total
= COSTS_N_INSNS (3);
8862 *total
= COSTS_N_INSNS (1);
8866 *total
= COSTS_N_INSNS (1);
8867 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8871 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8872 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8873 *total
= COSTS_N_INSNS (1);
8875 *total
= COSTS_N_INSNS (4);
8879 /* The vec_extract patterns accept memory operands that require an
8880 address reload. Account for the cost of that reload to give the
8881 auto-inc-dec pass an incentive to try to replace them. */
8882 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8883 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8885 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8886 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8887 *total
+= COSTS_N_INSNS (1);
8890 /* Likewise for the vec_set patterns. */
8891 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8892 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8893 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8895 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8896 *total
= rtx_cost (mem
, code
, 0, speed
);
8897 if (!neon_vector_mem_operand (mem
, 2, true))
8898 *total
+= COSTS_N_INSNS (1);
8904 /* We cost this as high as our memory costs to allow this to
8905 be hoisted from loops. */
8906 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8908 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8914 && TARGET_HARD_FLOAT
8916 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8917 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8918 *total
= COSTS_N_INSNS (1);
8920 *total
= COSTS_N_INSNS (4);
8924 *total
= COSTS_N_INSNS (4);
8929 /* Estimates the size cost of thumb1 instructions.
8930 For now most of the code is copied from thumb1_rtx_costs. We need more
8931 fine grain tuning when we have more related test cases. */
8933 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8935 machine_mode mode
= GET_MODE (x
);
8944 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8948 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8949 defined by RTL expansion, especially for the expansion of
8951 if ((GET_CODE (XEXP (x
, 0)) == MULT
8952 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8953 || (GET_CODE (XEXP (x
, 1)) == MULT
8954 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8955 return COSTS_N_INSNS (2);
8956 /* On purpose fall through for normal RTX. */
8960 return COSTS_N_INSNS (1);
8963 if (CONST_INT_P (XEXP (x
, 1)))
8965 /* Thumb1 mul instruction can't operate on const. We must Load it
8966 into a register first. */
8967 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8968 /* For the targets which have a very small and high-latency multiply
8969 unit, we prefer to synthesize the mult with up to 5 instructions,
8970 giving a good balance between size and performance. */
8971 if (arm_arch6m
&& arm_m_profile_small_mul
)
8972 return COSTS_N_INSNS (5);
8974 return COSTS_N_INSNS (1) + const_size
;
8976 return COSTS_N_INSNS (1);
8979 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8981 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8982 return COSTS_N_INSNS (words
)
8983 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8984 || satisfies_constraint_K (SET_SRC (x
))
8985 /* thumb1_movdi_insn. */
8986 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8991 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8992 return COSTS_N_INSNS (1);
8993 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8994 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8995 return COSTS_N_INSNS (2);
8996 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8997 if (thumb_shiftable_const (INTVAL (x
)))
8998 return COSTS_N_INSNS (2);
8999 return COSTS_N_INSNS (3);
9001 else if ((outer
== PLUS
|| outer
== COMPARE
)
9002 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9004 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9005 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9006 return COSTS_N_INSNS (1);
9007 else if (outer
== AND
)
9010 /* This duplicates the tests in the andsi3 expander. */
9011 for (i
= 9; i
<= 31; i
++)
9012 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
9013 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
9014 return COSTS_N_INSNS (2);
9016 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9017 || outer
== LSHIFTRT
)
9019 return COSTS_N_INSNS (2);
9025 return COSTS_N_INSNS (3);
9039 return COSTS_N_INSNS (1);
9042 return (COSTS_N_INSNS (1)
9044 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9045 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9046 ? COSTS_N_INSNS (1) : 0));
9050 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9055 /* XXX still guessing. */
9056 switch (GET_MODE (XEXP (x
, 0)))
9059 return (1 + (mode
== DImode
? 4 : 0)
9060 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9063 return (4 + (mode
== DImode
? 4 : 0)
9064 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9067 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9078 /* RTX costs when optimizing for size. */
9080 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9083 machine_mode mode
= GET_MODE (x
);
9086 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9090 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9094 /* A memory access costs 1 insn if the mode is small, or the address is
9095 a single register, otherwise it costs one insn per word. */
9096 if (REG_P (XEXP (x
, 0)))
9097 *total
= COSTS_N_INSNS (1);
9099 && GET_CODE (XEXP (x
, 0)) == PLUS
9100 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9101 /* This will be split into two instructions.
9102 See arm.md:calculate_pic_address. */
9103 *total
= COSTS_N_INSNS (2);
9105 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9112 /* Needs a libcall, so it costs about this. */
9113 *total
= COSTS_N_INSNS (2);
9117 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9119 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9127 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9129 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9132 else if (mode
== SImode
)
9134 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9135 /* Slightly disparage register shifts, but not by much. */
9136 if (!CONST_INT_P (XEXP (x
, 1)))
9137 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9141 /* Needs a libcall. */
9142 *total
= COSTS_N_INSNS (2);
9146 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9147 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9149 *total
= COSTS_N_INSNS (1);
9155 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9156 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9158 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9159 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9160 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9161 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9162 || subcode1
== ASHIFTRT
)
9164 /* It's just the cost of the two operands. */
9169 *total
= COSTS_N_INSNS (1);
9173 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9177 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9178 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9180 *total
= COSTS_N_INSNS (1);
9184 /* A shift as a part of ADD costs nothing. */
9185 if (GET_CODE (XEXP (x
, 0)) == MULT
9186 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9188 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9189 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9190 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9195 case AND
: case XOR
: case IOR
:
9198 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9200 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9201 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9202 || (code
== AND
&& subcode
== NOT
))
9204 /* It's just the cost of the two operands. */
9210 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9214 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9218 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9219 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9221 *total
= COSTS_N_INSNS (1);
9227 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9236 if (cc_register (XEXP (x
, 0), VOIDmode
))
9239 *total
= COSTS_N_INSNS (1);
9243 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9244 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9245 *total
= COSTS_N_INSNS (1);
9247 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9252 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9255 if (const_ok_for_arm (INTVAL (x
)))
9256 /* A multiplication by a constant requires another instruction
9257 to load the constant to a register. */
9258 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9260 else if (const_ok_for_arm (~INTVAL (x
)))
9261 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9262 else if (const_ok_for_arm (-INTVAL (x
)))
9264 if (outer_code
== COMPARE
|| outer_code
== PLUS
9265 || outer_code
== MINUS
)
9268 *total
= COSTS_N_INSNS (1);
9271 *total
= COSTS_N_INSNS (2);
9277 *total
= COSTS_N_INSNS (2);
9281 *total
= COSTS_N_INSNS (4);
9286 && TARGET_HARD_FLOAT
9287 && outer_code
== SET
9288 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9289 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9290 *total
= COSTS_N_INSNS (1);
9292 *total
= COSTS_N_INSNS (4);
9297 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9298 cost of these slightly. */
9299 *total
= COSTS_N_INSNS (1) + 1;
9306 if (mode
!= VOIDmode
)
9307 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9309 *total
= COSTS_N_INSNS (4); /* How knows? */
9314 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9315 operand, then return the operand that is being shifted. If the shift
9316 is not by a constant, then set SHIFT_REG to point to the operand.
9317 Return NULL if OP is not a shifter operand. */
9319 shifter_op_p (rtx op
, rtx
*shift_reg
)
9321 enum rtx_code code
= GET_CODE (op
);
9323 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9324 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9325 return XEXP (op
, 0);
9326 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9327 return XEXP (op
, 0);
9328 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9329 || code
== ASHIFTRT
)
9331 if (!CONST_INT_P (XEXP (op
, 1)))
9332 *shift_reg
= XEXP (op
, 1);
9333 return XEXP (op
, 0);
9340 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9342 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9343 gcc_assert (GET_CODE (x
) == UNSPEC
);
9345 switch (XINT (x
, 1))
9347 case UNSPEC_UNALIGNED_LOAD
:
9348 /* We can only do unaligned loads into the integer unit, and we can't
9350 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9352 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9353 + extra_cost
->ldst
.load_unaligned
);
9356 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9357 ADDR_SPACE_GENERIC
, speed_p
);
9361 case UNSPEC_UNALIGNED_STORE
:
9362 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9364 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9365 + extra_cost
->ldst
.store_unaligned
);
9367 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9369 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9370 ADDR_SPACE_GENERIC
, speed_p
);
9380 *cost
= COSTS_N_INSNS (1);
9382 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9386 *cost
= COSTS_N_INSNS (2);
9392 /* Cost of a libcall. We assume one insn per argument, an amount for the
9393 call (one insn for -Os) and then one for processing the result. */
9394 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9396 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9399 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9400 if (shift_op != NULL \
9401 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9406 *cost += extra_cost->alu.arith_shift_reg; \
9407 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9410 *cost += extra_cost->alu.arith_shift; \
9412 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9413 + rtx_cost (XEXP (x, 1 - IDX), \
9420 /* RTX costs. Make an estimate of the cost of executing the operation
9421 X, which is contained with an operation with code OUTER_CODE.
9422 SPEED_P indicates whether the cost desired is the performance cost,
9423 or the size cost. The estimate is stored in COST and the return
9424 value is TRUE if the cost calculation is final, or FALSE if the
9425 caller should recurse through the operands of X to add additional
9428 We currently make no attempt to model the size savings of Thumb-2
9429 16-bit instructions. At the normal points in compilation where
9430 this code is called we have no measure of whether the condition
9431 flags are live or not, and thus no realistic way to determine what
9432 the size will eventually be. */
9434 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9435 const struct cpu_cost_table
*extra_cost
,
9436 int *cost
, bool speed_p
)
9438 machine_mode mode
= GET_MODE (x
);
9443 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9445 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9453 /* SET RTXs don't have a mode so we get it from the destination. */
9454 mode
= GET_MODE (SET_DEST (x
));
9456 if (REG_P (SET_SRC (x
))
9457 && REG_P (SET_DEST (x
)))
9459 /* Assume that most copies can be done with a single insn,
9460 unless we don't have HW FP, in which case everything
9461 larger than word mode will require two insns. */
9462 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9463 && GET_MODE_SIZE (mode
) > 4)
9466 /* Conditional register moves can be encoded
9467 in 16 bits in Thumb mode. */
9468 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9474 if (CONST_INT_P (SET_SRC (x
)))
9476 /* Handle CONST_INT here, since the value doesn't have a mode
9477 and we would otherwise be unable to work out the true cost. */
9478 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9480 /* Slightly lower the cost of setting a core reg to a constant.
9481 This helps break up chains and allows for better scheduling. */
9482 if (REG_P (SET_DEST (x
))
9483 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9486 /* Immediate moves with an immediate in the range [0, 255] can be
9487 encoded in 16 bits in Thumb mode. */
9488 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9489 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9491 goto const_int_cost
;
9497 /* A memory access costs 1 insn if the mode is small, or the address is
9498 a single register, otherwise it costs one insn per word. */
9499 if (REG_P (XEXP (x
, 0)))
9500 *cost
= COSTS_N_INSNS (1);
9502 && GET_CODE (XEXP (x
, 0)) == PLUS
9503 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9504 /* This will be split into two instructions.
9505 See arm.md:calculate_pic_address. */
9506 *cost
= COSTS_N_INSNS (2);
9508 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9510 /* For speed optimizations, add the costs of the address and
9511 accessing memory. */
9514 *cost
+= (extra_cost
->ldst
.load
9515 + arm_address_cost (XEXP (x
, 0), mode
,
9516 ADDR_SPACE_GENERIC
, speed_p
));
9518 *cost
+= extra_cost
->ldst
.load
;
9524 /* Calculations of LDM costs are complex. We assume an initial cost
9525 (ldm_1st) which will load the number of registers mentioned in
9526 ldm_regs_per_insn_1st registers; then each additional
9527 ldm_regs_per_insn_subsequent registers cost one more insn. The
9528 formula for N regs is thus:
9530 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9531 + ldm_regs_per_insn_subsequent - 1)
9532 / ldm_regs_per_insn_subsequent).
9534 Additional costs may also be added for addressing. A similar
9535 formula is used for STM. */
9537 bool is_ldm
= load_multiple_operation (x
, SImode
);
9538 bool is_stm
= store_multiple_operation (x
, SImode
);
9540 *cost
= COSTS_N_INSNS (1);
9542 if (is_ldm
|| is_stm
)
9546 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9547 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9548 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9549 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9550 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9551 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9552 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9554 *cost
+= regs_per_insn_1st
9555 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9556 + regs_per_insn_sub
- 1)
9557 / regs_per_insn_sub
);
9566 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9567 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9568 *cost
= COSTS_N_INSNS (speed_p
9569 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9570 else if (mode
== SImode
&& TARGET_IDIV
)
9571 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9573 *cost
= LIBCALL_COST (2);
9574 return false; /* All arguments must be in registers. */
9578 *cost
= LIBCALL_COST (2);
9579 return false; /* All arguments must be in registers. */
9582 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9584 *cost
= (COSTS_N_INSNS (2)
9585 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9587 *cost
+= extra_cost
->alu
.shift_reg
;
9595 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9597 *cost
= (COSTS_N_INSNS (3)
9598 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9600 *cost
+= 2 * extra_cost
->alu
.shift
;
9603 else if (mode
== SImode
)
9605 *cost
= (COSTS_N_INSNS (1)
9606 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9607 /* Slightly disparage register shifts at -Os, but not by much. */
9608 if (!CONST_INT_P (XEXP (x
, 1)))
9609 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9610 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9613 else if (GET_MODE_CLASS (mode
) == MODE_INT
9614 && GET_MODE_SIZE (mode
) < 4)
9618 *cost
= (COSTS_N_INSNS (1)
9619 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9620 /* Slightly disparage register shifts at -Os, but not by
9622 if (!CONST_INT_P (XEXP (x
, 1)))
9623 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9624 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9626 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9628 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9630 /* Can use SBFX/UBFX. */
9631 *cost
= COSTS_N_INSNS (1);
9633 *cost
+= extra_cost
->alu
.bfx
;
9634 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9638 *cost
= COSTS_N_INSNS (2);
9639 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9642 if (CONST_INT_P (XEXP (x
, 1)))
9643 *cost
+= 2 * extra_cost
->alu
.shift
;
9645 *cost
+= (extra_cost
->alu
.shift
9646 + extra_cost
->alu
.shift_reg
);
9649 /* Slightly disparage register shifts. */
9650 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9655 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9656 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9659 if (CONST_INT_P (XEXP (x
, 1)))
9660 *cost
+= (2 * extra_cost
->alu
.shift
9661 + extra_cost
->alu
.log_shift
);
9663 *cost
+= (extra_cost
->alu
.shift
9664 + extra_cost
->alu
.shift_reg
9665 + extra_cost
->alu
.log_shift_reg
);
9671 *cost
= LIBCALL_COST (2);
9679 *cost
= COSTS_N_INSNS (1);
9681 *cost
+= extra_cost
->alu
.rev
;
9688 /* No rev instruction available. Look at arm_legacy_rev
9689 and thumb_legacy_rev for the form of RTL used then. */
9692 *cost
= COSTS_N_INSNS (10);
9696 *cost
+= 6 * extra_cost
->alu
.shift
;
9697 *cost
+= 3 * extra_cost
->alu
.logical
;
9702 *cost
= COSTS_N_INSNS (5);
9706 *cost
+= 2 * extra_cost
->alu
.shift
;
9707 *cost
+= extra_cost
->alu
.arith_shift
;
9708 *cost
+= 2 * extra_cost
->alu
.logical
;
9716 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9717 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9719 *cost
= COSTS_N_INSNS (1);
9720 if (GET_CODE (XEXP (x
, 0)) == MULT
9721 || GET_CODE (XEXP (x
, 1)) == MULT
)
9723 rtx mul_op0
, mul_op1
, sub_op
;
9726 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9728 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9730 mul_op0
= XEXP (XEXP (x
, 0), 0);
9731 mul_op1
= XEXP (XEXP (x
, 0), 1);
9732 sub_op
= XEXP (x
, 1);
9736 mul_op0
= XEXP (XEXP (x
, 1), 0);
9737 mul_op1
= XEXP (XEXP (x
, 1), 1);
9738 sub_op
= XEXP (x
, 0);
9741 /* The first operand of the multiply may be optionally
9743 if (GET_CODE (mul_op0
) == NEG
)
9744 mul_op0
= XEXP (mul_op0
, 0);
9746 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9747 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9748 + rtx_cost (sub_op
, code
, 0, speed_p
));
9754 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9760 rtx shift_by_reg
= NULL
;
9764 *cost
= COSTS_N_INSNS (1);
9766 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9767 if (shift_op
== NULL
)
9769 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9770 non_shift_op
= XEXP (x
, 0);
9773 non_shift_op
= XEXP (x
, 1);
9775 if (shift_op
!= NULL
)
9777 if (shift_by_reg
!= NULL
)
9780 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9781 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9784 *cost
+= extra_cost
->alu
.arith_shift
;
9786 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9787 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9792 && GET_CODE (XEXP (x
, 1)) == MULT
)
9796 *cost
+= extra_cost
->mult
[0].add
;
9797 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9798 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9799 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9803 if (CONST_INT_P (XEXP (x
, 0)))
9805 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9806 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9808 *cost
= COSTS_N_INSNS (insns
);
9810 *cost
+= insns
* extra_cost
->alu
.arith
;
9811 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9818 if (GET_MODE_CLASS (mode
) == MODE_INT
9819 && GET_MODE_SIZE (mode
) < 4)
9821 rtx shift_op
, shift_reg
;
9824 /* We check both sides of the MINUS for shifter operands since,
9825 unlike PLUS, it's not commutative. */
9827 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9828 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9830 /* Slightly disparage, as we might need to widen the result. */
9831 *cost
= 1 + COSTS_N_INSNS (1);
9833 *cost
+= extra_cost
->alu
.arith
;
9835 if (CONST_INT_P (XEXP (x
, 0)))
9837 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9846 *cost
= COSTS_N_INSNS (2);
9848 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9850 rtx op1
= XEXP (x
, 1);
9853 *cost
+= 2 * extra_cost
->alu
.arith
;
9855 if (GET_CODE (op1
) == ZERO_EXTEND
)
9856 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9858 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9859 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9863 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9866 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9867 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9869 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9872 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9873 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9876 *cost
+= (extra_cost
->alu
.arith
9877 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9878 ? extra_cost
->alu
.arith
9879 : extra_cost
->alu
.arith_shift
));
9880 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9881 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9882 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9887 *cost
+= 2 * extra_cost
->alu
.arith
;
9893 *cost
= LIBCALL_COST (2);
9897 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9898 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9900 *cost
= COSTS_N_INSNS (1);
9901 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9903 rtx mul_op0
, mul_op1
, add_op
;
9906 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9908 mul_op0
= XEXP (XEXP (x
, 0), 0);
9909 mul_op1
= XEXP (XEXP (x
, 0), 1);
9910 add_op
= XEXP (x
, 1);
9912 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9913 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9914 + rtx_cost (add_op
, code
, 0, speed_p
));
9920 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9923 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9925 *cost
= LIBCALL_COST (2);
9929 /* Narrow modes can be synthesized in SImode, but the range
9930 of useful sub-operations is limited. Check for shift operations
9931 on one of the operands. Only left shifts can be used in the
9933 if (GET_MODE_CLASS (mode
) == MODE_INT
9934 && GET_MODE_SIZE (mode
) < 4)
9936 rtx shift_op
, shift_reg
;
9939 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9941 if (CONST_INT_P (XEXP (x
, 1)))
9943 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9944 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9946 *cost
= COSTS_N_INSNS (insns
);
9948 *cost
+= insns
* extra_cost
->alu
.arith
;
9949 /* Slightly penalize a narrow operation as the result may
9951 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9955 /* Slightly penalize a narrow operation as the result may
9957 *cost
= 1 + COSTS_N_INSNS (1);
9959 *cost
+= extra_cost
->alu
.arith
;
9966 rtx shift_op
, shift_reg
;
9968 *cost
= COSTS_N_INSNS (1);
9970 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9971 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9973 /* UXTA[BH] or SXTA[BH]. */
9975 *cost
+= extra_cost
->alu
.extend_arith
;
9976 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9978 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9983 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9984 if (shift_op
!= NULL
)
9989 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9990 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9993 *cost
+= extra_cost
->alu
.arith_shift
;
9995 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9996 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9999 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10001 rtx mul_op
= XEXP (x
, 0);
10003 *cost
= COSTS_N_INSNS (1);
10005 if (TARGET_DSP_MULTIPLY
10006 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10007 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10008 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10009 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10010 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10011 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10012 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10013 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10014 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10015 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10016 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10017 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10020 /* SMLA[BT][BT]. */
10022 *cost
+= extra_cost
->mult
[0].extend_add
;
10023 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
10024 SIGN_EXTEND
, 0, speed_p
)
10025 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
10026 SIGN_EXTEND
, 0, speed_p
)
10027 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10032 *cost
+= extra_cost
->mult
[0].add
;
10033 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
10034 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
10035 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10038 if (CONST_INT_P (XEXP (x
, 1)))
10040 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10041 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10043 *cost
= COSTS_N_INSNS (insns
);
10045 *cost
+= insns
* extra_cost
->alu
.arith
;
10046 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10052 if (mode
== DImode
)
10055 && GET_CODE (XEXP (x
, 0)) == MULT
10056 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10057 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10058 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10059 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10061 *cost
= COSTS_N_INSNS (1);
10063 *cost
+= extra_cost
->mult
[1].extend_add
;
10064 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10065 ZERO_EXTEND
, 0, speed_p
)
10066 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10067 ZERO_EXTEND
, 0, speed_p
)
10068 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10072 *cost
= COSTS_N_INSNS (2);
10074 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10075 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10078 *cost
+= (extra_cost
->alu
.arith
10079 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10080 ? extra_cost
->alu
.arith
10081 : extra_cost
->alu
.arith_shift
));
10083 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10085 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10090 *cost
+= 2 * extra_cost
->alu
.arith
;
10095 *cost
= LIBCALL_COST (2);
10098 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10100 *cost
= COSTS_N_INSNS (1);
10102 *cost
+= extra_cost
->alu
.rev
;
10106 /* Fall through. */
10107 case AND
: case XOR
:
10108 if (mode
== SImode
)
10110 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10111 rtx op0
= XEXP (x
, 0);
10112 rtx shift_op
, shift_reg
;
10114 *cost
= COSTS_N_INSNS (1);
10118 || (code
== IOR
&& TARGET_THUMB2
)))
10119 op0
= XEXP (op0
, 0);
10122 shift_op
= shifter_op_p (op0
, &shift_reg
);
10123 if (shift_op
!= NULL
)
10128 *cost
+= extra_cost
->alu
.log_shift_reg
;
10129 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10132 *cost
+= extra_cost
->alu
.log_shift
;
10134 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10135 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10139 if (CONST_INT_P (XEXP (x
, 1)))
10141 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10142 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10145 *cost
= COSTS_N_INSNS (insns
);
10147 *cost
+= insns
* extra_cost
->alu
.logical
;
10148 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10153 *cost
+= extra_cost
->alu
.logical
;
10154 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10155 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10159 if (mode
== DImode
)
10161 rtx op0
= XEXP (x
, 0);
10162 enum rtx_code subcode
= GET_CODE (op0
);
10164 *cost
= COSTS_N_INSNS (2);
10168 || (code
== IOR
&& TARGET_THUMB2
)))
10169 op0
= XEXP (op0
, 0);
10171 if (GET_CODE (op0
) == ZERO_EXTEND
)
10174 *cost
+= 2 * extra_cost
->alu
.logical
;
10176 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10177 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10180 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10183 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10185 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10186 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10191 *cost
+= 2 * extra_cost
->alu
.logical
;
10197 *cost
= LIBCALL_COST (2);
10201 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10202 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10204 rtx op0
= XEXP (x
, 0);
10206 *cost
= COSTS_N_INSNS (1);
10208 if (GET_CODE (op0
) == NEG
)
10209 op0
= XEXP (op0
, 0);
10212 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10214 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10215 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10218 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10220 *cost
= LIBCALL_COST (2);
10224 if (mode
== SImode
)
10226 *cost
= COSTS_N_INSNS (1);
10227 if (TARGET_DSP_MULTIPLY
10228 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10229 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10230 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10231 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10232 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10233 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10234 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10235 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10236 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10237 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10238 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10239 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10242 /* SMUL[TB][TB]. */
10244 *cost
+= extra_cost
->mult
[0].extend
;
10245 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10246 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10250 *cost
+= extra_cost
->mult
[0].simple
;
10254 if (mode
== DImode
)
10257 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10258 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10259 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10260 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10262 *cost
= COSTS_N_INSNS (1);
10264 *cost
+= extra_cost
->mult
[1].extend
;
10265 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10266 ZERO_EXTEND
, 0, speed_p
)
10267 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10268 ZERO_EXTEND
, 0, speed_p
));
10272 *cost
= LIBCALL_COST (2);
10277 *cost
= LIBCALL_COST (2);
10281 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10282 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10284 *cost
= COSTS_N_INSNS (1);
10286 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10290 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10292 *cost
= LIBCALL_COST (1);
10296 if (mode
== SImode
)
10298 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10300 *cost
= COSTS_N_INSNS (2);
10301 /* Assume the non-flag-changing variant. */
10303 *cost
+= (extra_cost
->alu
.log_shift
10304 + extra_cost
->alu
.arith_shift
);
10305 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10309 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10310 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10312 *cost
= COSTS_N_INSNS (2);
10313 /* No extra cost for MOV imm and MVN imm. */
10314 /* If the comparison op is using the flags, there's no further
10315 cost, otherwise we need to add the cost of the comparison. */
10316 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10317 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10318 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10320 *cost
+= (COSTS_N_INSNS (1)
10321 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10323 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10326 *cost
+= extra_cost
->alu
.arith
;
10330 *cost
= COSTS_N_INSNS (1);
10332 *cost
+= extra_cost
->alu
.arith
;
10336 if (GET_MODE_CLASS (mode
) == MODE_INT
10337 && GET_MODE_SIZE (mode
) < 4)
10339 /* Slightly disparage, as we might need an extend operation. */
10340 *cost
= 1 + COSTS_N_INSNS (1);
10342 *cost
+= extra_cost
->alu
.arith
;
10346 if (mode
== DImode
)
10348 *cost
= COSTS_N_INSNS (2);
10350 *cost
+= 2 * extra_cost
->alu
.arith
;
10355 *cost
= LIBCALL_COST (1);
10359 if (mode
== SImode
)
10362 rtx shift_reg
= NULL
;
10364 *cost
= COSTS_N_INSNS (1);
10365 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10369 if (shift_reg
!= NULL
)
10372 *cost
+= extra_cost
->alu
.log_shift_reg
;
10373 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10376 *cost
+= extra_cost
->alu
.log_shift
;
10377 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10382 *cost
+= extra_cost
->alu
.logical
;
10385 if (mode
== DImode
)
10387 *cost
= COSTS_N_INSNS (2);
10393 *cost
+= LIBCALL_COST (1);
10398 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10400 *cost
= COSTS_N_INSNS (4);
10403 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10404 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10406 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10407 /* Assume that if one arm of the if_then_else is a register,
10408 that it will be tied with the result and eliminate the
10409 conditional insn. */
10410 if (REG_P (XEXP (x
, 1)))
10412 else if (REG_P (XEXP (x
, 2)))
10418 if (extra_cost
->alu
.non_exec_costs_exec
)
10419 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10421 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10424 *cost
+= op1cost
+ op2cost
;
10430 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10434 machine_mode op0mode
;
10435 /* We'll mostly assume that the cost of a compare is the cost of the
10436 LHS. However, there are some notable exceptions. */
10438 /* Floating point compares are never done as side-effects. */
10439 op0mode
= GET_MODE (XEXP (x
, 0));
10440 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10441 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10443 *cost
= COSTS_N_INSNS (1);
10445 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10447 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10449 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10455 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10457 *cost
= LIBCALL_COST (2);
10461 /* DImode compares normally take two insns. */
10462 if (op0mode
== DImode
)
10464 *cost
= COSTS_N_INSNS (2);
10466 *cost
+= 2 * extra_cost
->alu
.arith
;
10470 if (op0mode
== SImode
)
10475 if (XEXP (x
, 1) == const0_rtx
10476 && !(REG_P (XEXP (x
, 0))
10477 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10478 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10480 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10482 /* Multiply operations that set the flags are often
10483 significantly more expensive. */
10485 && GET_CODE (XEXP (x
, 0)) == MULT
10486 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10487 *cost
+= extra_cost
->mult
[0].flag_setting
;
10490 && GET_CODE (XEXP (x
, 0)) == PLUS
10491 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10492 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10494 *cost
+= extra_cost
->mult
[0].flag_setting
;
10499 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10500 if (shift_op
!= NULL
)
10502 *cost
= COSTS_N_INSNS (1);
10503 if (shift_reg
!= NULL
)
10505 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10507 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10510 *cost
+= extra_cost
->alu
.arith_shift
;
10511 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10512 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10516 *cost
= COSTS_N_INSNS (1);
10518 *cost
+= extra_cost
->alu
.arith
;
10519 if (CONST_INT_P (XEXP (x
, 1))
10520 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10522 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10530 *cost
= LIBCALL_COST (2);
10553 if (outer_code
== SET
)
10555 /* Is it a store-flag operation? */
10556 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10557 && XEXP (x
, 1) == const0_rtx
)
10559 /* Thumb also needs an IT insn. */
10560 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10563 if (XEXP (x
, 1) == const0_rtx
)
10568 /* LSR Rd, Rn, #31. */
10569 *cost
= COSTS_N_INSNS (1);
10571 *cost
+= extra_cost
->alu
.shift
;
10581 *cost
= COSTS_N_INSNS (2);
10585 /* RSBS T1, Rn, Rn, LSR #31
10587 *cost
= COSTS_N_INSNS (2);
10589 *cost
+= extra_cost
->alu
.arith_shift
;
10593 /* RSB Rd, Rn, Rn, ASR #1
10594 LSR Rd, Rd, #31. */
10595 *cost
= COSTS_N_INSNS (2);
10597 *cost
+= (extra_cost
->alu
.arith_shift
10598 + extra_cost
->alu
.shift
);
10604 *cost
= COSTS_N_INSNS (2);
10606 *cost
+= extra_cost
->alu
.shift
;
10610 /* Remaining cases are either meaningless or would take
10611 three insns anyway. */
10612 *cost
= COSTS_N_INSNS (3);
10615 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10620 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10621 if (CONST_INT_P (XEXP (x
, 1))
10622 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10624 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10631 /* Not directly inside a set. If it involves the condition code
10632 register it must be the condition for a branch, cond_exec or
10633 I_T_E operation. Since the comparison is performed elsewhere
10634 this is just the control part which has no additional
10636 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10637 && XEXP (x
, 1) == const0_rtx
)
10645 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10646 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10648 *cost
= COSTS_N_INSNS (1);
10650 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10654 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10656 *cost
= LIBCALL_COST (1);
10660 if (mode
== SImode
)
10662 *cost
= COSTS_N_INSNS (1);
10664 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10668 *cost
= LIBCALL_COST (1);
10672 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10673 && MEM_P (XEXP (x
, 0)))
10675 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10677 if (mode
== DImode
)
10678 *cost
+= COSTS_N_INSNS (1);
10683 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10684 *cost
+= extra_cost
->ldst
.load
;
10686 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10688 if (mode
== DImode
)
10689 *cost
+= extra_cost
->alu
.shift
;
10694 /* Widening from less than 32-bits requires an extend operation. */
10695 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10697 /* We have SXTB/SXTH. */
10698 *cost
= COSTS_N_INSNS (1);
10699 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10701 *cost
+= extra_cost
->alu
.extend
;
10703 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10705 /* Needs two shifts. */
10706 *cost
= COSTS_N_INSNS (2);
10707 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10709 *cost
+= 2 * extra_cost
->alu
.shift
;
10712 /* Widening beyond 32-bits requires one more insn. */
10713 if (mode
== DImode
)
10715 *cost
+= COSTS_N_INSNS (1);
10717 *cost
+= extra_cost
->alu
.shift
;
10724 || GET_MODE (XEXP (x
, 0)) == SImode
10725 || GET_MODE (XEXP (x
, 0)) == QImode
)
10726 && MEM_P (XEXP (x
, 0)))
10728 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10730 if (mode
== DImode
)
10731 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10736 /* Widening from less than 32-bits requires an extend operation. */
10737 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10739 /* UXTB can be a shorter instruction in Thumb2, but it might
10740 be slower than the AND Rd, Rn, #255 alternative. When
10741 optimizing for speed it should never be slower to use
10742 AND, and we don't really model 16-bit vs 32-bit insns
10744 *cost
= COSTS_N_INSNS (1);
10746 *cost
+= extra_cost
->alu
.logical
;
10748 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10750 /* We have UXTB/UXTH. */
10751 *cost
= COSTS_N_INSNS (1);
10752 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10754 *cost
+= extra_cost
->alu
.extend
;
10756 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10758 /* Needs two shifts. It's marginally preferable to use
10759 shifts rather than two BIC instructions as the second
10760 shift may merge with a subsequent insn as a shifter
10762 *cost
= COSTS_N_INSNS (2);
10763 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10765 *cost
+= 2 * extra_cost
->alu
.shift
;
10767 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10768 *cost
= COSTS_N_INSNS (1);
10770 /* Widening beyond 32-bits requires one more insn. */
10771 if (mode
== DImode
)
10773 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10780 /* CONST_INT has no mode, so we cannot tell for sure how many
10781 insns are really going to be needed. The best we can do is
10782 look at the value passed. If it fits in SImode, then assume
10783 that's the mode it will be used for. Otherwise assume it
10784 will be used in DImode. */
10785 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10790 /* Avoid blowing up in arm_gen_constant (). */
10791 if (!(outer_code
== PLUS
10792 || outer_code
== AND
10793 || outer_code
== IOR
10794 || outer_code
== XOR
10795 || outer_code
== MINUS
))
10799 if (mode
== SImode
)
10801 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10802 INTVAL (x
), NULL
, NULL
,
10808 *cost
+= COSTS_N_INSNS (arm_gen_constant
10809 (outer_code
, SImode
, NULL
,
10810 trunc_int_for_mode (INTVAL (x
), SImode
),
10812 + arm_gen_constant (outer_code
, SImode
, NULL
,
10813 INTVAL (x
) >> 32, NULL
,
10825 if (arm_arch_thumb2
&& !flag_pic
)
10826 *cost
= COSTS_N_INSNS (2);
10828 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10831 *cost
= COSTS_N_INSNS (2);
10835 *cost
+= COSTS_N_INSNS (1);
10837 *cost
+= extra_cost
->alu
.arith
;
10843 *cost
= COSTS_N_INSNS (4);
10848 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10849 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10851 if (vfp3_const_double_rtx (x
))
10853 *cost
= COSTS_N_INSNS (1);
10855 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10861 *cost
= COSTS_N_INSNS (1);
10862 if (mode
== DFmode
)
10863 *cost
+= extra_cost
->ldst
.loadd
;
10865 *cost
+= extra_cost
->ldst
.loadf
;
10868 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10872 *cost
= COSTS_N_INSNS (4);
10878 && TARGET_HARD_FLOAT
10879 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10880 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10881 *cost
= COSTS_N_INSNS (1);
10883 *cost
= COSTS_N_INSNS (4);
10888 *cost
= COSTS_N_INSNS (1);
10889 /* When optimizing for size, we prefer constant pool entries to
10890 MOVW/MOVT pairs, so bump the cost of these slightly. */
10896 *cost
= COSTS_N_INSNS (1);
10898 *cost
+= extra_cost
->alu
.clz
;
10902 if (XEXP (x
, 1) == const0_rtx
)
10904 *cost
= COSTS_N_INSNS (1);
10906 *cost
+= extra_cost
->alu
.log_shift
;
10907 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10910 /* Fall through. */
10914 *cost
= COSTS_N_INSNS (2);
10918 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10919 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10920 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10921 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10922 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10923 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10924 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10925 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10928 *cost
= COSTS_N_INSNS (1);
10930 *cost
+= extra_cost
->mult
[1].extend
;
10931 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10933 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10937 *cost
= LIBCALL_COST (1);
10941 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10944 /* Reading the PC is like reading any other register. Writing it
10945 is more expensive, but we take that into account elsewhere. */
10950 /* TODO: Simple zero_extract of bottom bits using AND. */
10951 /* Fall through. */
10955 && CONST_INT_P (XEXP (x
, 1))
10956 && CONST_INT_P (XEXP (x
, 2)))
10958 *cost
= COSTS_N_INSNS (1);
10960 *cost
+= extra_cost
->alu
.bfx
;
10961 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10964 /* Without UBFX/SBFX, need to resort to shift operations. */
10965 *cost
= COSTS_N_INSNS (2);
10967 *cost
+= 2 * extra_cost
->alu
.shift
;
10968 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10972 if (TARGET_HARD_FLOAT
)
10974 *cost
= COSTS_N_INSNS (1);
10976 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10977 if (!TARGET_FPU_ARMV8
10978 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10980 /* Pre v8, widening HF->DF is a two-step process, first
10981 widening to SFmode. */
10982 *cost
+= COSTS_N_INSNS (1);
10984 *cost
+= extra_cost
->fp
[0].widen
;
10986 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10990 *cost
= LIBCALL_COST (1);
10993 case FLOAT_TRUNCATE
:
10994 if (TARGET_HARD_FLOAT
)
10996 *cost
= COSTS_N_INSNS (1);
10998 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10999 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11001 /* Vector modes? */
11003 *cost
= LIBCALL_COST (1);
11007 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11009 rtx op0
= XEXP (x
, 0);
11010 rtx op1
= XEXP (x
, 1);
11011 rtx op2
= XEXP (x
, 2);
11013 *cost
= COSTS_N_INSNS (1);
11015 /* vfms or vfnma. */
11016 if (GET_CODE (op0
) == NEG
)
11017 op0
= XEXP (op0
, 0);
11019 /* vfnms or vfnma. */
11020 if (GET_CODE (op2
) == NEG
)
11021 op2
= XEXP (op2
, 0);
11023 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
11024 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
11025 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
11028 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11033 *cost
= LIBCALL_COST (3);
11038 if (TARGET_HARD_FLOAT
)
11040 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11042 *cost
= COSTS_N_INSNS (1);
11044 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
11045 /* Strip of the 'cost' of rounding towards zero. */
11046 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11047 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11049 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11050 /* ??? Increase the cost to deal with transferring from
11051 FP -> CORE registers? */
11054 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11055 && TARGET_FPU_ARMV8
)
11057 *cost
= COSTS_N_INSNS (1);
11059 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11062 /* Vector costs? */
11064 *cost
= LIBCALL_COST (1);
11068 case UNSIGNED_FLOAT
:
11069 if (TARGET_HARD_FLOAT
)
11071 /* ??? Increase the cost to deal with transferring from CORE
11072 -> FP registers? */
11073 *cost
= COSTS_N_INSNS (1);
11075 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11078 *cost
= LIBCALL_COST (1);
11082 *cost
= COSTS_N_INSNS (1);
11087 /* Just a guess. Guess number of instructions in the asm
11088 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11089 though (see PR60663). */
11090 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11091 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11093 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11097 if (mode
!= VOIDmode
)
11098 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11100 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11105 #undef HANDLE_NARROW_SHIFT_ARITH
11107 /* RTX costs when optimizing for size. */
11109 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11110 int *total
, bool speed
)
11114 if (TARGET_OLD_RTX_COSTS
11115 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11117 /* Old way. (Deprecated.) */
11119 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11120 (enum rtx_code
) outer_code
, total
);
11122 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11123 (enum rtx_code
) outer_code
, total
,
11129 if (current_tune
->insn_extra_cost
)
11130 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11131 (enum rtx_code
) outer_code
,
11132 current_tune
->insn_extra_cost
,
11134 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11135 && current_tune->insn_extra_cost != NULL */
11137 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11138 (enum rtx_code
) outer_code
,
11139 &generic_extra_costs
, total
, speed
);
11142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11144 print_rtl_single (dump_file
, x
);
11145 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11146 *total
, result
? "final" : "partial");
11151 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11152 supported on any "slowmul" cores, so it can be ignored. */
11155 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11156 int *total
, bool speed
)
11158 machine_mode mode
= GET_MODE (x
);
11162 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11169 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11172 *total
= COSTS_N_INSNS (20);
11176 if (CONST_INT_P (XEXP (x
, 1)))
11178 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11179 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11180 int cost
, const_ok
= const_ok_for_arm (i
);
11181 int j
, booth_unit_size
;
11183 /* Tune as appropriate. */
11184 cost
= const_ok
? 4 : 8;
11185 booth_unit_size
= 2;
11186 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11188 i
>>= booth_unit_size
;
11192 *total
= COSTS_N_INSNS (cost
);
11193 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11197 *total
= COSTS_N_INSNS (20);
11201 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11206 /* RTX cost for cores with a fast multiply unit (M variants). */
11209 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11210 int *total
, bool speed
)
11212 machine_mode mode
= GET_MODE (x
);
11216 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11220 /* ??? should thumb2 use different costs? */
11224 /* There is no point basing this on the tuning, since it is always the
11225 fast variant if it exists at all. */
11227 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11228 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11229 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11231 *total
= COSTS_N_INSNS(2);
11236 if (mode
== DImode
)
11238 *total
= COSTS_N_INSNS (5);
11242 if (CONST_INT_P (XEXP (x
, 1)))
11244 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11245 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11246 int cost
, const_ok
= const_ok_for_arm (i
);
11247 int j
, booth_unit_size
;
11249 /* Tune as appropriate. */
11250 cost
= const_ok
? 4 : 8;
11251 booth_unit_size
= 8;
11252 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11254 i
>>= booth_unit_size
;
11258 *total
= COSTS_N_INSNS(cost
);
11262 if (mode
== SImode
)
11264 *total
= COSTS_N_INSNS (4);
11268 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11270 if (TARGET_HARD_FLOAT
11272 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11274 *total
= COSTS_N_INSNS (1);
11279 /* Requires a lib call */
11280 *total
= COSTS_N_INSNS (20);
11284 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11289 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11290 so it can be ignored. */
11293 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11294 int *total
, bool speed
)
11296 machine_mode mode
= GET_MODE (x
);
11300 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11307 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11308 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11310 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11311 will stall until the multiplication is complete. */
11312 *total
= COSTS_N_INSNS (3);
11316 /* There is no point basing this on the tuning, since it is always the
11317 fast variant if it exists at all. */
11319 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11320 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11321 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11323 *total
= COSTS_N_INSNS (2);
11328 if (mode
== DImode
)
11330 *total
= COSTS_N_INSNS (5);
11334 if (CONST_INT_P (XEXP (x
, 1)))
11336 /* If operand 1 is a constant we can more accurately
11337 calculate the cost of the multiply. The multiplier can
11338 retire 15 bits on the first cycle and a further 12 on the
11339 second. We do, of course, have to load the constant into
11340 a register first. */
11341 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11342 /* There's a general overhead of one cycle. */
11344 unsigned HOST_WIDE_INT masked_const
;
11346 if (i
& 0x80000000)
11349 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11351 masked_const
= i
& 0xffff8000;
11352 if (masked_const
!= 0)
11355 masked_const
= i
& 0xf8000000;
11356 if (masked_const
!= 0)
11359 *total
= COSTS_N_INSNS (cost
);
11363 if (mode
== SImode
)
11365 *total
= COSTS_N_INSNS (3);
11369 /* Requires a lib call */
11370 *total
= COSTS_N_INSNS (20);
11374 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11379 /* RTX costs for 9e (and later) cores. */
11382 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11383 int *total
, bool speed
)
11385 machine_mode mode
= GET_MODE (x
);
11392 /* Small multiply: 32 cycles for an integer multiply inst. */
11393 if (arm_arch6m
&& arm_m_profile_small_mul
)
11394 *total
= COSTS_N_INSNS (32);
11396 *total
= COSTS_N_INSNS (3);
11400 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11408 /* There is no point basing this on the tuning, since it is always the
11409 fast variant if it exists at all. */
11411 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11412 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11413 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11415 *total
= COSTS_N_INSNS (2);
11420 if (mode
== DImode
)
11422 *total
= COSTS_N_INSNS (5);
11426 if (mode
== SImode
)
11428 *total
= COSTS_N_INSNS (2);
11432 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11434 if (TARGET_HARD_FLOAT
11436 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11438 *total
= COSTS_N_INSNS (1);
11443 *total
= COSTS_N_INSNS (20);
11447 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11450 /* All address computations that can be done are free, but rtx cost returns
11451 the same for practically all of them. So we weight the different types
11452 of address here in the order (most pref first):
11453 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11455 arm_arm_address_cost (rtx x
)
11457 enum rtx_code c
= GET_CODE (x
);
11459 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11461 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11466 if (CONST_INT_P (XEXP (x
, 1)))
11469 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11479 arm_thumb_address_cost (rtx x
)
11481 enum rtx_code c
= GET_CODE (x
);
11486 && REG_P (XEXP (x
, 0))
11487 && CONST_INT_P (XEXP (x
, 1)))
11494 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11495 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11497 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11500 /* Adjust cost hook for XScale. */
11502 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11504 /* Some true dependencies can have a higher cost depending
11505 on precisely how certain input operands are used. */
11506 if (REG_NOTE_KIND(link
) == 0
11507 && recog_memoized (insn
) >= 0
11508 && recog_memoized (dep
) >= 0)
11510 int shift_opnum
= get_attr_shift (insn
);
11511 enum attr_type attr_type
= get_attr_type (dep
);
11513 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11514 operand for INSN. If we have a shifted input operand and the
11515 instruction we depend on is another ALU instruction, then we may
11516 have to account for an additional stall. */
11517 if (shift_opnum
!= 0
11518 && (attr_type
== TYPE_ALU_SHIFT_IMM
11519 || attr_type
== TYPE_ALUS_SHIFT_IMM
11520 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11521 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11522 || attr_type
== TYPE_ALU_SHIFT_REG
11523 || attr_type
== TYPE_ALUS_SHIFT_REG
11524 || attr_type
== TYPE_LOGIC_SHIFT_REG
11525 || attr_type
== TYPE_LOGICS_SHIFT_REG
11526 || attr_type
== TYPE_MOV_SHIFT
11527 || attr_type
== TYPE_MVN_SHIFT
11528 || attr_type
== TYPE_MOV_SHIFT_REG
11529 || attr_type
== TYPE_MVN_SHIFT_REG
))
11531 rtx shifted_operand
;
11534 /* Get the shifted operand. */
11535 extract_insn (insn
);
11536 shifted_operand
= recog_data
.operand
[shift_opnum
];
11538 /* Iterate over all the operands in DEP. If we write an operand
11539 that overlaps with SHIFTED_OPERAND, then we have increase the
11540 cost of this dependency. */
11541 extract_insn (dep
);
11542 preprocess_constraints (dep
);
11543 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11545 /* We can ignore strict inputs. */
11546 if (recog_data
.operand_type
[opno
] == OP_IN
)
11549 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11561 /* Adjust cost hook for Cortex A9. */
11563 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11565 switch (REG_NOTE_KIND (link
))
11572 case REG_DEP_OUTPUT
:
11573 if (recog_memoized (insn
) >= 0
11574 && recog_memoized (dep
) >= 0)
11576 if (GET_CODE (PATTERN (insn
)) == SET
)
11579 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11581 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11583 enum attr_type attr_type_insn
= get_attr_type (insn
);
11584 enum attr_type attr_type_dep
= get_attr_type (dep
);
11586 /* By default all dependencies of the form
11589 have an extra latency of 1 cycle because
11590 of the input and output dependency in this
11591 case. However this gets modeled as an true
11592 dependency and hence all these checks. */
11593 if (REG_P (SET_DEST (PATTERN (insn
)))
11594 && REG_P (SET_DEST (PATTERN (dep
)))
11595 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11596 SET_DEST (PATTERN (dep
))))
11598 /* FMACS is a special case where the dependent
11599 instruction can be issued 3 cycles before
11600 the normal latency in case of an output
11602 if ((attr_type_insn
== TYPE_FMACS
11603 || attr_type_insn
== TYPE_FMACD
)
11604 && (attr_type_dep
== TYPE_FMACS
11605 || attr_type_dep
== TYPE_FMACD
))
11607 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11608 *cost
= insn_default_latency (dep
) - 3;
11610 *cost
= insn_default_latency (dep
);
11615 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11616 *cost
= insn_default_latency (dep
) + 1;
11618 *cost
= insn_default_latency (dep
);
11628 gcc_unreachable ();
11634 /* Adjust cost hook for FA726TE. */
11636 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11638 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11639 have penalty of 3. */
11640 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11641 && recog_memoized (insn
) >= 0
11642 && recog_memoized (dep
) >= 0
11643 && get_attr_conds (dep
) == CONDS_SET
)
11645 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11646 if (get_attr_conds (insn
) == CONDS_USE
11647 && get_attr_type (insn
) != TYPE_BRANCH
)
11653 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11654 || get_attr_conds (insn
) == CONDS_USE
)
11664 /* Implement TARGET_REGISTER_MOVE_COST.
11666 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11667 it is typically more expensive than a single memory access. We set
11668 the cost to less than two memory accesses so that floating
11669 point to integer conversion does not go through memory. */
11672 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11673 reg_class_t from
, reg_class_t to
)
11677 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11678 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11680 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11681 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11683 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11690 if (from
== HI_REGS
|| to
== HI_REGS
)
11697 /* Implement TARGET_MEMORY_MOVE_COST. */
11700 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11701 bool in ATTRIBUTE_UNUSED
)
11707 if (GET_MODE_SIZE (mode
) < 4)
11710 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11714 /* Vectorizer cost model implementation. */
11716 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11718 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11720 int misalign ATTRIBUTE_UNUSED
)
11724 switch (type_of_cost
)
11727 return current_tune
->vec_costs
->scalar_stmt_cost
;
11730 return current_tune
->vec_costs
->scalar_load_cost
;
11733 return current_tune
->vec_costs
->scalar_store_cost
;
11736 return current_tune
->vec_costs
->vec_stmt_cost
;
11739 return current_tune
->vec_costs
->vec_align_load_cost
;
11742 return current_tune
->vec_costs
->vec_store_cost
;
11744 case vec_to_scalar
:
11745 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11747 case scalar_to_vec
:
11748 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11750 case unaligned_load
:
11751 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11753 case unaligned_store
:
11754 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11756 case cond_branch_taken
:
11757 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11759 case cond_branch_not_taken
:
11760 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11763 case vec_promote_demote
:
11764 return current_tune
->vec_costs
->vec_stmt_cost
;
11766 case vec_construct
:
11767 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11768 return elements
/ 2 + 1;
11771 gcc_unreachable ();
11775 /* Implement targetm.vectorize.add_stmt_cost. */
11778 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11779 struct _stmt_vec_info
*stmt_info
, int misalign
,
11780 enum vect_cost_model_location where
)
11782 unsigned *cost
= (unsigned *) data
;
11783 unsigned retval
= 0;
11785 if (flag_vect_cost_model
)
11787 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11788 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11790 /* Statements in an inner loop relative to the loop being
11791 vectorized are weighted more heavily. The value here is
11792 arbitrary and could potentially be improved with analysis. */
11793 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11794 count
*= 50; /* FIXME. */
11796 retval
= (unsigned) (count
* stmt_cost
);
11797 cost
[where
] += retval
;
11803 /* Return true if and only if this insn can dual-issue only as older. */
11805 cortexa7_older_only (rtx_insn
*insn
)
11807 if (recog_memoized (insn
) < 0)
11810 switch (get_attr_type (insn
))
11812 case TYPE_ALU_DSP_REG
:
11813 case TYPE_ALU_SREG
:
11814 case TYPE_ALUS_SREG
:
11815 case TYPE_LOGIC_REG
:
11816 case TYPE_LOGICS_REG
:
11818 case TYPE_ADCS_REG
:
11823 case TYPE_SHIFT_IMM
:
11824 case TYPE_SHIFT_REG
:
11825 case TYPE_LOAD_BYTE
:
11828 case TYPE_FFARITHS
:
11830 case TYPE_FFARITHD
:
11848 case TYPE_F_STORES
:
11855 /* Return true if and only if this insn can dual-issue as younger. */
11857 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11859 if (recog_memoized (insn
) < 0)
11862 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11866 switch (get_attr_type (insn
))
11869 case TYPE_ALUS_IMM
:
11870 case TYPE_LOGIC_IMM
:
11871 case TYPE_LOGICS_IMM
:
11876 case TYPE_MOV_SHIFT
:
11877 case TYPE_MOV_SHIFT_REG
:
11887 /* Look for an instruction that can dual issue only as an older
11888 instruction, and move it in front of any instructions that can
11889 dual-issue as younger, while preserving the relative order of all
11890 other instructions in the ready list. This is a hueuristic to help
11891 dual-issue in later cycles, by postponing issue of more flexible
11892 instructions. This heuristic may affect dual issue opportunities
11893 in the current cycle. */
11895 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11896 int *n_readyp
, int clock
)
11899 int first_older_only
= -1, first_younger
= -1;
11903 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11907 /* Traverse the ready list from the head (the instruction to issue
11908 first), and looking for the first instruction that can issue as
11909 younger and the first instruction that can dual-issue only as
11911 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11913 rtx_insn
*insn
= ready
[i
];
11914 if (cortexa7_older_only (insn
))
11916 first_older_only
= i
;
11918 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11921 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11925 /* Nothing to reorder because either no younger insn found or insn
11926 that can dual-issue only as older appears before any insn that
11927 can dual-issue as younger. */
11928 if (first_younger
== -1)
11931 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11935 /* Nothing to reorder because no older-only insn in the ready list. */
11936 if (first_older_only
== -1)
11939 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11943 /* Move first_older_only insn before first_younger. */
11945 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11946 INSN_UID(ready
[first_older_only
]),
11947 INSN_UID(ready
[first_younger
]));
11948 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11949 for (i
= first_older_only
; i
< first_younger
; i
++)
11951 ready
[i
] = ready
[i
+1];
11954 ready
[i
] = first_older_only_insn
;
11958 /* Implement TARGET_SCHED_REORDER. */
11960 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11966 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11969 /* Do nothing for other cores. */
11973 return arm_issue_rate ();
11976 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11977 It corrects the value of COST based on the relationship between
11978 INSN and DEP through the dependence LINK. It returns the new
11979 value. There is a per-core adjust_cost hook to adjust scheduler costs
11980 and the per-core hook can choose to completely override the generic
11981 adjust_cost function. Only put bits of code into arm_adjust_cost that
11982 are common across all cores. */
11984 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11988 /* When generating Thumb-1 code, we want to place flag-setting operations
11989 close to a conditional branch which depends on them, so that we can
11990 omit the comparison. */
11992 && REG_NOTE_KIND (link
) == 0
11993 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11994 && recog_memoized (dep
) >= 0
11995 && get_attr_conds (dep
) == CONDS_SET
)
11998 if (current_tune
->sched_adjust_cost
!= NULL
)
12000 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12004 /* XXX Is this strictly true? */
12005 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12006 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12009 /* Call insns don't incur a stall, even if they follow a load. */
12010 if (REG_NOTE_KIND (link
) == 0
12014 if ((i_pat
= single_set (insn
)) != NULL
12015 && MEM_P (SET_SRC (i_pat
))
12016 && (d_pat
= single_set (dep
)) != NULL
12017 && MEM_P (SET_DEST (d_pat
)))
12019 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12020 /* This is a load after a store, there is no conflict if the load reads
12021 from a cached area. Assume that loads from the stack, and from the
12022 constant pool are cached, and that others will miss. This is a
12025 if ((GET_CODE (src_mem
) == SYMBOL_REF
12026 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12027 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12028 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12029 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12037 arm_max_conditional_execute (void)
12039 return max_insns_skipped
;
12043 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12046 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12048 return (optimize
> 0) ? 2 : 0;
12052 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12054 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12057 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12058 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12059 sequences of non-executed instructions in IT blocks probably take the same
12060 amount of time as executed instructions (and the IT instruction itself takes
12061 space in icache). This function was experimentally determined to give good
12062 results on a popular embedded benchmark. */
12065 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12067 return (TARGET_32BIT
&& speed_p
) ? 1
12068 : arm_default_branch_cost (speed_p
, predictable_p
);
12071 static bool fp_consts_inited
= false;
12073 static REAL_VALUE_TYPE value_fp0
;
12076 init_fp_table (void)
12080 r
= REAL_VALUE_ATOF ("0", DFmode
);
12082 fp_consts_inited
= true;
12085 /* Return TRUE if rtx X is a valid immediate FP constant. */
12087 arm_const_double_rtx (rtx x
)
12091 if (!fp_consts_inited
)
12094 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12095 if (REAL_VALUE_MINUS_ZERO (r
))
12098 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12104 /* VFPv3 has a fairly wide range of representable immediates, formed from
12105 "quarter-precision" floating-point values. These can be evaluated using this
12106 formula (with ^ for exponentiation):
12110 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12111 16 <= n <= 31 and 0 <= r <= 7.
12113 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12115 - A (most-significant) is the sign bit.
12116 - BCD are the exponent (encoded as r XOR 3).
12117 - EFGH are the mantissa (encoded as n - 16).
12120 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12121 fconst[sd] instruction, or -1 if X isn't suitable. */
12123 vfp3_const_double_index (rtx x
)
12125 REAL_VALUE_TYPE r
, m
;
12126 int sign
, exponent
;
12127 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12128 unsigned HOST_WIDE_INT mask
;
12129 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12132 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12135 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12137 /* We can't represent these things, so detect them first. */
12138 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12141 /* Extract sign, exponent and mantissa. */
12142 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12143 r
= real_value_abs (&r
);
12144 exponent
= REAL_EXP (&r
);
12145 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12146 highest (sign) bit, with a fixed binary point at bit point_pos.
12147 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12148 bits for the mantissa, this may fail (low bits would be lost). */
12149 real_ldexp (&m
, &r
, point_pos
- exponent
);
12150 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12151 mantissa
= w
.elt (0);
12152 mant_hi
= w
.elt (1);
12154 /* If there are bits set in the low part of the mantissa, we can't
12155 represent this value. */
12159 /* Now make it so that mantissa contains the most-significant bits, and move
12160 the point_pos to indicate that the least-significant bits have been
12162 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12163 mantissa
= mant_hi
;
12165 /* We can permit four significant bits of mantissa only, plus a high bit
12166 which is always 1. */
12167 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12168 if ((mantissa
& mask
) != 0)
12171 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12172 mantissa
>>= point_pos
- 5;
12174 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12175 floating-point immediate zero with Neon using an integer-zero load, but
12176 that case is handled elsewhere.) */
12180 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12182 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12183 normalized significands are in the range [1, 2). (Our mantissa is shifted
12184 left 4 places at this point relative to normalized IEEE754 values). GCC
12185 internally uses [0.5, 1) (see real.c), so the exponent returned from
12186 REAL_EXP must be altered. */
12187 exponent
= 5 - exponent
;
12189 if (exponent
< 0 || exponent
> 7)
12192 /* Sign, mantissa and exponent are now in the correct form to plug into the
12193 formula described in the comment above. */
12194 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12197 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12199 vfp3_const_double_rtx (rtx x
)
12204 return vfp3_const_double_index (x
) != -1;
12207 /* Recognize immediates which can be used in various Neon instructions. Legal
12208 immediates are described by the following table (for VMVN variants, the
12209 bitwise inverse of the constant shown is recognized. In either case, VMOV
12210 is output and the correct instruction to use for a given constant is chosen
12211 by the assembler). The constant shown is replicated across all elements of
12212 the destination vector.
12214 insn elems variant constant (binary)
12215 ---- ----- ------- -----------------
12216 vmov i32 0 00000000 00000000 00000000 abcdefgh
12217 vmov i32 1 00000000 00000000 abcdefgh 00000000
12218 vmov i32 2 00000000 abcdefgh 00000000 00000000
12219 vmov i32 3 abcdefgh 00000000 00000000 00000000
12220 vmov i16 4 00000000 abcdefgh
12221 vmov i16 5 abcdefgh 00000000
12222 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12223 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12224 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12225 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12226 vmvn i16 10 00000000 abcdefgh
12227 vmvn i16 11 abcdefgh 00000000
12228 vmov i32 12 00000000 00000000 abcdefgh 11111111
12229 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12230 vmov i32 14 00000000 abcdefgh 11111111 11111111
12231 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12232 vmov i8 16 abcdefgh
12233 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12234 eeeeeeee ffffffff gggggggg hhhhhhhh
12235 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12236 vmov f32 19 00000000 00000000 00000000 00000000
12238 For case 18, B = !b. Representable values are exactly those accepted by
12239 vfp3_const_double_index, but are output as floating-point numbers rather
12242 For case 19, we will change it to vmov.i32 when assembling.
12244 Variants 0-5 (inclusive) may also be used as immediates for the second
12245 operand of VORR/VBIC instructions.
12247 The INVERSE argument causes the bitwise inverse of the given operand to be
12248 recognized instead (used for recognizing legal immediates for the VAND/VORN
12249 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12250 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12251 output, rather than the real insns vbic/vorr).
12253 INVERSE makes no difference to the recognition of float vectors.
12255 The return value is the variant of immediate as shown in the above table, or
12256 -1 if the given value doesn't match any of the listed patterns.
12259 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12260 rtx
*modconst
, int *elementwidth
)
12262 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12264 for (i = 0; i < idx; i += (STRIDE)) \
12269 immtype = (CLASS); \
12270 elsize = (ELSIZE); \
12274 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12275 unsigned int innersize
;
12276 unsigned char bytes
[16];
12277 int immtype
= -1, matches
;
12278 unsigned int invmask
= inverse
? 0xff : 0;
12279 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12283 n_elts
= CONST_VECTOR_NUNITS (op
);
12284 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12289 if (mode
== VOIDmode
)
12291 innersize
= GET_MODE_SIZE (mode
);
12294 /* Vectors of float constants. */
12295 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12297 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12298 REAL_VALUE_TYPE r0
;
12300 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12303 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12305 for (i
= 1; i
< n_elts
; i
++)
12307 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12308 REAL_VALUE_TYPE re
;
12310 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12312 if (!REAL_VALUES_EQUAL (r0
, re
))
12317 *modconst
= CONST_VECTOR_ELT (op
, 0);
12322 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12328 /* Splat vector constant out into a byte vector. */
12329 for (i
= 0; i
< n_elts
; i
++)
12331 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12332 unsigned HOST_WIDE_INT elpart
;
12333 unsigned int part
, parts
;
12335 if (CONST_INT_P (el
))
12337 elpart
= INTVAL (el
);
12340 else if (CONST_DOUBLE_P (el
))
12342 elpart
= CONST_DOUBLE_LOW (el
);
12346 gcc_unreachable ();
12348 for (part
= 0; part
< parts
; part
++)
12351 for (byte
= 0; byte
< innersize
; byte
++)
12353 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12354 elpart
>>= BITS_PER_UNIT
;
12356 if (CONST_DOUBLE_P (el
))
12357 elpart
= CONST_DOUBLE_HIGH (el
);
12361 /* Sanity check. */
12362 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12366 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12367 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12369 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12370 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12372 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12373 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12375 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12376 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12378 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12380 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12382 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12383 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12385 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12386 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12388 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12389 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12391 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12392 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12394 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12396 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12398 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12399 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12401 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12402 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12404 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12405 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12407 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12408 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12410 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12412 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12413 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12421 *elementwidth
= elsize
;
12425 unsigned HOST_WIDE_INT imm
= 0;
12427 /* Un-invert bytes of recognized vector, if necessary. */
12429 for (i
= 0; i
< idx
; i
++)
12430 bytes
[i
] ^= invmask
;
12434 /* FIXME: Broken on 32-bit H_W_I hosts. */
12435 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12437 for (i
= 0; i
< 8; i
++)
12438 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12439 << (i
* BITS_PER_UNIT
);
12441 *modconst
= GEN_INT (imm
);
12445 unsigned HOST_WIDE_INT imm
= 0;
12447 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12448 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12450 *modconst
= GEN_INT (imm
);
12458 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12459 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12460 float elements), and a modified constant (whatever should be output for a
12461 VMOV) in *MODCONST. */
12464 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12465 rtx
*modconst
, int *elementwidth
)
12469 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12475 *modconst
= tmpconst
;
12478 *elementwidth
= tmpwidth
;
12483 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12484 the immediate is valid, write a constant suitable for using as an operand
12485 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12486 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12489 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12490 rtx
*modconst
, int *elementwidth
)
12494 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12496 if (retval
< 0 || retval
> 5)
12500 *modconst
= tmpconst
;
12503 *elementwidth
= tmpwidth
;
12508 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12509 the immediate is valid, write a constant suitable for using as an operand
12510 to VSHR/VSHL to *MODCONST and the corresponding element width to
12511 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12512 because they have different limitations. */
12515 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12516 rtx
*modconst
, int *elementwidth
,
12519 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12520 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12521 unsigned HOST_WIDE_INT last_elt
= 0;
12522 unsigned HOST_WIDE_INT maxshift
;
12524 /* Split vector constant out into a byte vector. */
12525 for (i
= 0; i
< n_elts
; i
++)
12527 rtx el
= CONST_VECTOR_ELT (op
, i
);
12528 unsigned HOST_WIDE_INT elpart
;
12530 if (CONST_INT_P (el
))
12531 elpart
= INTVAL (el
);
12532 else if (CONST_DOUBLE_P (el
))
12535 gcc_unreachable ();
12537 if (i
!= 0 && elpart
!= last_elt
)
12543 /* Shift less than element size. */
12544 maxshift
= innersize
* 8;
12548 /* Left shift immediate value can be from 0 to <size>-1. */
12549 if (last_elt
>= maxshift
)
12554 /* Right shift immediate value can be from 1 to <size>. */
12555 if (last_elt
== 0 || last_elt
> maxshift
)
12560 *elementwidth
= innersize
* 8;
12563 *modconst
= CONST_VECTOR_ELT (op
, 0);
12568 /* Return a string suitable for output of Neon immediate logic operation
12572 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12573 int inverse
, int quad
)
12575 int width
, is_valid
;
12576 static char templ
[40];
12578 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12580 gcc_assert (is_valid
!= 0);
12583 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12585 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12590 /* Return a string suitable for output of Neon immediate shift operation
12591 (VSHR or VSHL) MNEM. */
12594 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12595 machine_mode mode
, int quad
,
12598 int width
, is_valid
;
12599 static char templ
[40];
12601 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12602 gcc_assert (is_valid
!= 0);
12605 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12607 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12612 /* Output a sequence of pairwise operations to implement a reduction.
12613 NOTE: We do "too much work" here, because pairwise operations work on two
12614 registers-worth of operands in one go. Unfortunately we can't exploit those
12615 extra calculations to do the full operation in fewer steps, I don't think.
12616 Although all vector elements of the result but the first are ignored, we
12617 actually calculate the same result in each of the elements. An alternative
12618 such as initially loading a vector with zero to use as each of the second
12619 operands would use up an additional register and take an extra instruction,
12620 for no particular gain. */
12623 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12624 rtx (*reduc
) (rtx
, rtx
, rtx
))
12626 machine_mode inner
= GET_MODE_INNER (mode
);
12627 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12630 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12632 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12633 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12638 /* If VALS is a vector constant that can be loaded into a register
12639 using VDUP, generate instructions to do so and return an RTX to
12640 assign to the register. Otherwise return NULL_RTX. */
12643 neon_vdup_constant (rtx vals
)
12645 machine_mode mode
= GET_MODE (vals
);
12646 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12647 int n_elts
= GET_MODE_NUNITS (mode
);
12648 bool all_same
= true;
12652 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12655 for (i
= 0; i
< n_elts
; ++i
)
12657 x
= XVECEXP (vals
, 0, i
);
12658 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12663 /* The elements are not all the same. We could handle repeating
12664 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12665 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12669 /* We can load this constant by using VDUP and a constant in a
12670 single ARM register. This will be cheaper than a vector
12673 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12674 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12677 /* Generate code to load VALS, which is a PARALLEL containing only
12678 constants (for vec_init) or CONST_VECTOR, efficiently into a
12679 register. Returns an RTX to copy into the register, or NULL_RTX
12680 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12683 neon_make_constant (rtx vals
)
12685 machine_mode mode
= GET_MODE (vals
);
12687 rtx const_vec
= NULL_RTX
;
12688 int n_elts
= GET_MODE_NUNITS (mode
);
12692 if (GET_CODE (vals
) == CONST_VECTOR
)
12694 else if (GET_CODE (vals
) == PARALLEL
)
12696 /* A CONST_VECTOR must contain only CONST_INTs and
12697 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12698 Only store valid constants in a CONST_VECTOR. */
12699 for (i
= 0; i
< n_elts
; ++i
)
12701 rtx x
= XVECEXP (vals
, 0, i
);
12702 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12705 if (n_const
== n_elts
)
12706 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12709 gcc_unreachable ();
12711 if (const_vec
!= NULL
12712 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12713 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12715 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12716 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12717 pipeline cycle; creating the constant takes one or two ARM
12718 pipeline cycles. */
12720 else if (const_vec
!= NULL_RTX
)
12721 /* Load from constant pool. On Cortex-A8 this takes two cycles
12722 (for either double or quad vectors). We can not take advantage
12723 of single-cycle VLD1 because we need a PC-relative addressing
12727 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12728 We can not construct an initializer. */
12732 /* Initialize vector TARGET to VALS. */
12735 neon_expand_vector_init (rtx target
, rtx vals
)
12737 machine_mode mode
= GET_MODE (target
);
12738 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12739 int n_elts
= GET_MODE_NUNITS (mode
);
12740 int n_var
= 0, one_var
= -1;
12741 bool all_same
= true;
12745 for (i
= 0; i
< n_elts
; ++i
)
12747 x
= XVECEXP (vals
, 0, i
);
12748 if (!CONSTANT_P (x
))
12749 ++n_var
, one_var
= i
;
12751 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12757 rtx constant
= neon_make_constant (vals
);
12758 if (constant
!= NULL_RTX
)
12760 emit_move_insn (target
, constant
);
12765 /* Splat a single non-constant element if we can. */
12766 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12768 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12769 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12770 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12774 /* One field is non-constant. Load constant then overwrite varying
12775 field. This is more efficient than using the stack. */
12778 rtx copy
= copy_rtx (vals
);
12779 rtx index
= GEN_INT (one_var
);
12781 /* Load constant part of vector, substitute neighboring value for
12782 varying element. */
12783 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12784 neon_expand_vector_init (target
, copy
);
12786 /* Insert variable. */
12787 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12791 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12794 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12797 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12800 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12803 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12806 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12809 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12812 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12815 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12818 gcc_unreachable ();
12823 /* Construct the vector in memory one field at a time
12824 and load the whole vector. */
12825 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12826 for (i
= 0; i
< n_elts
; i
++)
12827 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12828 i
* GET_MODE_SIZE (inner_mode
)),
12829 XVECEXP (vals
, 0, i
));
12830 emit_move_insn (target
, mem
);
12833 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12834 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12835 reported source locations are bogus. */
12838 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12841 HOST_WIDE_INT lane
;
12843 gcc_assert (CONST_INT_P (operand
));
12845 lane
= INTVAL (operand
);
12847 if (lane
< low
|| lane
>= high
)
12851 /* Bounds-check lanes. */
12854 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12856 bounds_check (operand
, low
, high
, "lane out of range");
12859 /* Bounds-check constants. */
12862 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12864 bounds_check (operand
, low
, high
, "constant out of range");
12868 neon_element_bits (machine_mode mode
)
12870 if (mode
== DImode
)
12871 return GET_MODE_BITSIZE (mode
);
12873 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12877 /* Predicates for `match_operand' and `match_operator'. */
12879 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12880 WB is true if full writeback address modes are allowed and is false
12881 if limited writeback address modes (POST_INC and PRE_DEC) are
12885 arm_coproc_mem_operand (rtx op
, bool wb
)
12889 /* Reject eliminable registers. */
12890 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12891 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12892 || reg_mentioned_p (arg_pointer_rtx
, op
)
12893 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12894 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12895 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12896 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12899 /* Constants are converted into offsets from labels. */
12903 ind
= XEXP (op
, 0);
12905 if (reload_completed
12906 && (GET_CODE (ind
) == LABEL_REF
12907 || (GET_CODE (ind
) == CONST
12908 && GET_CODE (XEXP (ind
, 0)) == PLUS
12909 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12910 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12913 /* Match: (mem (reg)). */
12915 return arm_address_register_rtx_p (ind
, 0);
12917 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12918 acceptable in any case (subject to verification by
12919 arm_address_register_rtx_p). We need WB to be true to accept
12920 PRE_INC and POST_DEC. */
12921 if (GET_CODE (ind
) == POST_INC
12922 || GET_CODE (ind
) == PRE_DEC
12924 && (GET_CODE (ind
) == PRE_INC
12925 || GET_CODE (ind
) == POST_DEC
)))
12926 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12929 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12930 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12931 && GET_CODE (XEXP (ind
, 1)) == PLUS
12932 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12933 ind
= XEXP (ind
, 1);
12938 if (GET_CODE (ind
) == PLUS
12939 && REG_P (XEXP (ind
, 0))
12940 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12941 && CONST_INT_P (XEXP (ind
, 1))
12942 && INTVAL (XEXP (ind
, 1)) > -1024
12943 && INTVAL (XEXP (ind
, 1)) < 1024
12944 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12950 /* Return TRUE if OP is a memory operand which we can load or store a vector
12951 to/from. TYPE is one of the following values:
12952 0 - Vector load/stor (vldr)
12953 1 - Core registers (ldm)
12954 2 - Element/structure loads (vld1)
12957 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12961 /* Reject eliminable registers. */
12962 if (! (reload_in_progress
|| reload_completed
)
12963 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12964 || reg_mentioned_p (arg_pointer_rtx
, op
)
12965 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12966 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12967 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12968 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12971 /* Constants are converted into offsets from labels. */
12975 ind
= XEXP (op
, 0);
12977 if (reload_completed
12978 && (GET_CODE (ind
) == LABEL_REF
12979 || (GET_CODE (ind
) == CONST
12980 && GET_CODE (XEXP (ind
, 0)) == PLUS
12981 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12982 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12985 /* Match: (mem (reg)). */
12987 return arm_address_register_rtx_p (ind
, 0);
12989 /* Allow post-increment with Neon registers. */
12990 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12991 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12992 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12994 /* Allow post-increment by register for VLDn */
12995 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12996 && GET_CODE (XEXP (ind
, 1)) == PLUS
12997 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13004 && GET_CODE (ind
) == PLUS
13005 && REG_P (XEXP (ind
, 0))
13006 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13007 && CONST_INT_P (XEXP (ind
, 1))
13008 && INTVAL (XEXP (ind
, 1)) > -1024
13009 /* For quad modes, we restrict the constant offset to be slightly less
13010 than what the instruction format permits. We have no such constraint
13011 on double mode offsets. (This must match arm_legitimate_index_p.) */
13012 && (INTVAL (XEXP (ind
, 1))
13013 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13014 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13020 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13023 neon_struct_mem_operand (rtx op
)
13027 /* Reject eliminable registers. */
13028 if (! (reload_in_progress
|| reload_completed
)
13029 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13030 || reg_mentioned_p (arg_pointer_rtx
, op
)
13031 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13032 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13033 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13034 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13037 /* Constants are converted into offsets from labels. */
13041 ind
= XEXP (op
, 0);
13043 if (reload_completed
13044 && (GET_CODE (ind
) == LABEL_REF
13045 || (GET_CODE (ind
) == CONST
13046 && GET_CODE (XEXP (ind
, 0)) == PLUS
13047 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13048 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13051 /* Match: (mem (reg)). */
13053 return arm_address_register_rtx_p (ind
, 0);
13055 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13056 if (GET_CODE (ind
) == POST_INC
13057 || GET_CODE (ind
) == PRE_DEC
)
13058 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13063 /* Return true if X is a register that will be eliminated later on. */
13065 arm_eliminable_register (rtx x
)
13067 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13068 || REGNO (x
) == ARG_POINTER_REGNUM
13069 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13070 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13073 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13074 coprocessor registers. Otherwise return NO_REGS. */
13077 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13079 if (mode
== HFmode
)
13081 if (!TARGET_NEON_FP16
)
13082 return GENERAL_REGS
;
13083 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13085 return GENERAL_REGS
;
13088 /* The neon move patterns handle all legitimate vector and struct
13091 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13092 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13093 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13094 || VALID_NEON_STRUCT_MODE (mode
)))
13097 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13100 return GENERAL_REGS
;
13103 /* Values which must be returned in the most-significant end of the return
13107 arm_return_in_msb (const_tree valtype
)
13109 return (TARGET_AAPCS_BASED
13110 && BYTES_BIG_ENDIAN
13111 && (AGGREGATE_TYPE_P (valtype
)
13112 || TREE_CODE (valtype
) == COMPLEX_TYPE
13113 || FIXED_POINT_TYPE_P (valtype
)));
13116 /* Return TRUE if X references a SYMBOL_REF. */
13118 symbol_mentioned_p (rtx x
)
13123 if (GET_CODE (x
) == SYMBOL_REF
)
13126 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13127 are constant offsets, not symbols. */
13128 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13131 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13133 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13139 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13140 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13143 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13150 /* Return TRUE if X references a LABEL_REF. */
13152 label_mentioned_p (rtx x
)
13157 if (GET_CODE (x
) == LABEL_REF
)
13160 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13161 instruction, but they are constant offsets, not symbols. */
13162 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13165 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13166 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13172 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13173 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13176 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13184 tls_mentioned_p (rtx x
)
13186 switch (GET_CODE (x
))
13189 return tls_mentioned_p (XEXP (x
, 0));
13192 if (XINT (x
, 1) == UNSPEC_TLS
)
13200 /* Must not copy any rtx that uses a pc-relative address. */
13203 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
13205 if (GET_CODE (*x
) == UNSPEC
13206 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
13207 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
13213 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13215 /* The tls call insn cannot be copied, as it is paired with a data
13217 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13220 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
13224 minmax_code (rtx x
)
13226 enum rtx_code code
= GET_CODE (x
);
13239 gcc_unreachable ();
13243 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13246 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13247 int *mask
, bool *signed_sat
)
13249 /* The high bound must be a power of two minus one. */
13250 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13254 /* The low bound is either zero (for usat) or one less than the
13255 negation of the high bound (for ssat). */
13256 if (INTVAL (lo_bound
) == 0)
13261 *signed_sat
= false;
13266 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13271 *signed_sat
= true;
13279 /* Return 1 if memory locations are adjacent. */
13281 adjacent_mem_locations (rtx a
, rtx b
)
13283 /* We don't guarantee to preserve the order of these memory refs. */
13284 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13287 if ((REG_P (XEXP (a
, 0))
13288 || (GET_CODE (XEXP (a
, 0)) == PLUS
13289 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13290 && (REG_P (XEXP (b
, 0))
13291 || (GET_CODE (XEXP (b
, 0)) == PLUS
13292 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13294 HOST_WIDE_INT val0
= 0, val1
= 0;
13298 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13300 reg0
= XEXP (XEXP (a
, 0), 0);
13301 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13304 reg0
= XEXP (a
, 0);
13306 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13308 reg1
= XEXP (XEXP (b
, 0), 0);
13309 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13312 reg1
= XEXP (b
, 0);
13314 /* Don't accept any offset that will require multiple
13315 instructions to handle, since this would cause the
13316 arith_adjacentmem pattern to output an overlong sequence. */
13317 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13320 /* Don't allow an eliminable register: register elimination can make
13321 the offset too large. */
13322 if (arm_eliminable_register (reg0
))
13325 val_diff
= val1
- val0
;
13329 /* If the target has load delay slots, then there's no benefit
13330 to using an ldm instruction unless the offset is zero and
13331 we are optimizing for size. */
13332 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13333 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13334 && (val_diff
== 4 || val_diff
== -4));
13337 return ((REGNO (reg0
) == REGNO (reg1
))
13338 && (val_diff
== 4 || val_diff
== -4));
13344 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13345 for load operations, false for store operations. CONSECUTIVE is true
13346 if the register numbers in the operation must be consecutive in the register
13347 bank. RETURN_PC is true if value is to be loaded in PC.
13348 The pattern we are trying to match for load is:
13349 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13350 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13353 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13356 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13357 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13358 3. If consecutive is TRUE, then for kth register being loaded,
13359 REGNO (R_dk) = REGNO (R_d0) + k.
13360 The pattern for store is similar. */
13362 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13363 bool consecutive
, bool return_pc
)
13365 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13366 rtx reg
, mem
, addr
;
13368 unsigned first_regno
;
13369 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13371 bool addr_reg_in_reglist
= false;
13372 bool update
= false;
13377 /* If not in SImode, then registers must be consecutive
13378 (e.g., VLDM instructions for DFmode). */
13379 gcc_assert ((mode
== SImode
) || consecutive
);
13380 /* Setting return_pc for stores is illegal. */
13381 gcc_assert (!return_pc
|| load
);
13383 /* Set up the increments and the regs per val based on the mode. */
13384 reg_increment
= GET_MODE_SIZE (mode
);
13385 regs_per_val
= reg_increment
/ 4;
13386 offset_adj
= return_pc
? 1 : 0;
13389 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13390 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13393 /* Check if this is a write-back. */
13394 elt
= XVECEXP (op
, 0, offset_adj
);
13395 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13401 /* The offset adjustment must be the number of registers being
13402 popped times the size of a single register. */
13403 if (!REG_P (SET_DEST (elt
))
13404 || !REG_P (XEXP (SET_SRC (elt
), 0))
13405 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13406 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13407 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13408 ((count
- 1 - offset_adj
) * reg_increment
))
13412 i
= i
+ offset_adj
;
13413 base
= base
+ offset_adj
;
13414 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13415 success depends on the type: VLDM can do just one reg,
13416 LDM must do at least two. */
13417 if ((count
<= i
) && (mode
== SImode
))
13420 elt
= XVECEXP (op
, 0, i
- 1);
13421 if (GET_CODE (elt
) != SET
)
13426 reg
= SET_DEST (elt
);
13427 mem
= SET_SRC (elt
);
13431 reg
= SET_SRC (elt
);
13432 mem
= SET_DEST (elt
);
13435 if (!REG_P (reg
) || !MEM_P (mem
))
13438 regno
= REGNO (reg
);
13439 first_regno
= regno
;
13440 addr
= XEXP (mem
, 0);
13441 if (GET_CODE (addr
) == PLUS
)
13443 if (!CONST_INT_P (XEXP (addr
, 1)))
13446 offset
= INTVAL (XEXP (addr
, 1));
13447 addr
= XEXP (addr
, 0);
13453 /* Don't allow SP to be loaded unless it is also the base register. It
13454 guarantees that SP is reset correctly when an LDM instruction
13455 is interrupted. Otherwise, we might end up with a corrupt stack. */
13456 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13459 for (; i
< count
; i
++)
13461 elt
= XVECEXP (op
, 0, i
);
13462 if (GET_CODE (elt
) != SET
)
13467 reg
= SET_DEST (elt
);
13468 mem
= SET_SRC (elt
);
13472 reg
= SET_SRC (elt
);
13473 mem
= SET_DEST (elt
);
13477 || GET_MODE (reg
) != mode
13478 || REGNO (reg
) <= regno
13481 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13482 /* Don't allow SP to be loaded unless it is also the base register. It
13483 guarantees that SP is reset correctly when an LDM instruction
13484 is interrupted. Otherwise, we might end up with a corrupt stack. */
13485 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13487 || GET_MODE (mem
) != mode
13488 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13489 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13490 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13491 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13492 offset
+ (i
- base
) * reg_increment
))
13493 && (!REG_P (XEXP (mem
, 0))
13494 || offset
+ (i
- base
) * reg_increment
!= 0)))
13497 regno
= REGNO (reg
);
13498 if (regno
== REGNO (addr
))
13499 addr_reg_in_reglist
= true;
13504 if (update
&& addr_reg_in_reglist
)
13507 /* For Thumb-1, address register is always modified - either by write-back
13508 or by explicit load. If the pattern does not describe an update,
13509 then the address register must be in the list of loaded registers. */
13511 return update
|| addr_reg_in_reglist
;
13517 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13518 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13519 instruction. ADD_OFFSET is nonzero if the base address register needs
13520 to be modified with an add instruction before we can use it. */
13523 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13524 int nops
, HOST_WIDE_INT add_offset
)
13526 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13527 if the offset isn't small enough. The reason 2 ldrs are faster
13528 is because these ARMs are able to do more than one cache access
13529 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13530 whilst the ARM8 has a double bandwidth cache. This means that
13531 these cores can do both an instruction fetch and a data fetch in
13532 a single cycle, so the trick of calculating the address into a
13533 scratch register (one of the result regs) and then doing a load
13534 multiple actually becomes slower (and no smaller in code size).
13535 That is the transformation
13537 ldr rd1, [rbase + offset]
13538 ldr rd2, [rbase + offset + 4]
13542 add rd1, rbase, offset
13543 ldmia rd1, {rd1, rd2}
13545 produces worse code -- '3 cycles + any stalls on rd2' instead of
13546 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13547 access per cycle, the first sequence could never complete in less
13548 than 6 cycles, whereas the ldm sequence would only take 5 and
13549 would make better use of sequential accesses if not hitting the
13552 We cheat here and test 'arm_ld_sched' which we currently know to
13553 only be true for the ARM8, ARM9 and StrongARM. If this ever
13554 changes, then the test below needs to be reworked. */
13555 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13558 /* XScale has load-store double instructions, but they have stricter
13559 alignment requirements than load-store multiple, so we cannot
13562 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13563 the pipeline until completion.
13571 An ldr instruction takes 1-3 cycles, but does not block the
13580 Best case ldr will always win. However, the more ldr instructions
13581 we issue, the less likely we are to be able to schedule them well.
13582 Using ldr instructions also increases code size.
13584 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13585 for counts of 3 or 4 regs. */
13586 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13591 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13592 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13593 an array ORDER which describes the sequence to use when accessing the
13594 offsets that produces an ascending order. In this sequence, each
13595 offset must be larger by exactly 4 than the previous one. ORDER[0]
13596 must have been filled in with the lowest offset by the caller.
13597 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13598 we use to verify that ORDER produces an ascending order of registers.
13599 Return true if it was possible to construct such an order, false if
13603 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13604 int *unsorted_regs
)
13607 for (i
= 1; i
< nops
; i
++)
13611 order
[i
] = order
[i
- 1];
13612 for (j
= 0; j
< nops
; j
++)
13613 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13615 /* We must find exactly one offset that is higher than the
13616 previous one by 4. */
13617 if (order
[i
] != order
[i
- 1])
13621 if (order
[i
] == order
[i
- 1])
13623 /* The register numbers must be ascending. */
13624 if (unsorted_regs
!= NULL
13625 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13631 /* Used to determine in a peephole whether a sequence of load
13632 instructions can be changed into a load-multiple instruction.
13633 NOPS is the number of separate load instructions we are examining. The
13634 first NOPS entries in OPERANDS are the destination registers, the
13635 next NOPS entries are memory operands. If this function is
13636 successful, *BASE is set to the common base register of the memory
13637 accesses; *LOAD_OFFSET is set to the first memory location's offset
13638 from that base register.
13639 REGS is an array filled in with the destination register numbers.
13640 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13641 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13642 the sequence of registers in REGS matches the loads from ascending memory
13643 locations, and the function verifies that the register numbers are
13644 themselves ascending. If CHECK_REGS is false, the register numbers
13645 are stored in the order they are found in the operands. */
13647 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13648 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13650 int unsorted_regs
[MAX_LDM_STM_OPS
];
13651 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13652 int order
[MAX_LDM_STM_OPS
];
13653 rtx base_reg_rtx
= NULL
;
13657 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13658 easily extended if required. */
13659 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13661 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13663 /* Loop over the operands and check that the memory references are
13664 suitable (i.e. immediate offsets from the same base register). At
13665 the same time, extract the target register, and the memory
13667 for (i
= 0; i
< nops
; i
++)
13672 /* Convert a subreg of a mem into the mem itself. */
13673 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13674 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13676 gcc_assert (MEM_P (operands
[nops
+ i
]));
13678 /* Don't reorder volatile memory references; it doesn't seem worth
13679 looking for the case where the order is ok anyway. */
13680 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13683 offset
= const0_rtx
;
13685 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13686 || (GET_CODE (reg
) == SUBREG
13687 && REG_P (reg
= SUBREG_REG (reg
))))
13688 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13689 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13690 || (GET_CODE (reg
) == SUBREG
13691 && REG_P (reg
= SUBREG_REG (reg
))))
13692 && (CONST_INT_P (offset
13693 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13697 base_reg
= REGNO (reg
);
13698 base_reg_rtx
= reg
;
13699 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13702 else if (base_reg
!= (int) REGNO (reg
))
13703 /* Not addressed from the same base register. */
13706 unsorted_regs
[i
] = (REG_P (operands
[i
])
13707 ? REGNO (operands
[i
])
13708 : REGNO (SUBREG_REG (operands
[i
])));
13710 /* If it isn't an integer register, or if it overwrites the
13711 base register but isn't the last insn in the list, then
13712 we can't do this. */
13713 if (unsorted_regs
[i
] < 0
13714 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13715 || unsorted_regs
[i
] > 14
13716 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13719 /* Don't allow SP to be loaded unless it is also the base
13720 register. It guarantees that SP is reset correctly when
13721 an LDM instruction is interrupted. Otherwise, we might
13722 end up with a corrupt stack. */
13723 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13726 unsorted_offsets
[i
] = INTVAL (offset
);
13727 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13731 /* Not a suitable memory address. */
13735 /* All the useful information has now been extracted from the
13736 operands into unsorted_regs and unsorted_offsets; additionally,
13737 order[0] has been set to the lowest offset in the list. Sort
13738 the offsets into order, verifying that they are adjacent, and
13739 check that the register numbers are ascending. */
13740 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13741 check_regs
? unsorted_regs
: NULL
))
13745 memcpy (saved_order
, order
, sizeof order
);
13751 for (i
= 0; i
< nops
; i
++)
13752 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13754 *load_offset
= unsorted_offsets
[order
[0]];
13758 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13761 if (unsorted_offsets
[order
[0]] == 0)
13762 ldm_case
= 1; /* ldmia */
13763 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13764 ldm_case
= 2; /* ldmib */
13765 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13766 ldm_case
= 3; /* ldmda */
13767 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13768 ldm_case
= 4; /* ldmdb */
13769 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13770 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13775 if (!multiple_operation_profitable_p (false, nops
,
13777 ? unsorted_offsets
[order
[0]] : 0))
13783 /* Used to determine in a peephole whether a sequence of store instructions can
13784 be changed into a store-multiple instruction.
13785 NOPS is the number of separate store instructions we are examining.
13786 NOPS_TOTAL is the total number of instructions recognized by the peephole
13788 The first NOPS entries in OPERANDS are the source registers, the next
13789 NOPS entries are memory operands. If this function is successful, *BASE is
13790 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13791 to the first memory location's offset from that base register. REGS is an
13792 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13793 likewise filled with the corresponding rtx's.
13794 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13795 numbers to an ascending order of stores.
13796 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13797 from ascending memory locations, and the function verifies that the register
13798 numbers are themselves ascending. If CHECK_REGS is false, the register
13799 numbers are stored in the order they are found in the operands. */
13801 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13802 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13803 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13805 int unsorted_regs
[MAX_LDM_STM_OPS
];
13806 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13807 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13808 int order
[MAX_LDM_STM_OPS
];
13810 rtx base_reg_rtx
= NULL
;
13813 /* Write back of base register is currently only supported for Thumb 1. */
13814 int base_writeback
= TARGET_THUMB1
;
13816 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13817 easily extended if required. */
13818 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13820 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13822 /* Loop over the operands and check that the memory references are
13823 suitable (i.e. immediate offsets from the same base register). At
13824 the same time, extract the target register, and the memory
13826 for (i
= 0; i
< nops
; i
++)
13831 /* Convert a subreg of a mem into the mem itself. */
13832 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13833 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13835 gcc_assert (MEM_P (operands
[nops
+ i
]));
13837 /* Don't reorder volatile memory references; it doesn't seem worth
13838 looking for the case where the order is ok anyway. */
13839 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13842 offset
= const0_rtx
;
13844 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13845 || (GET_CODE (reg
) == SUBREG
13846 && REG_P (reg
= SUBREG_REG (reg
))))
13847 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13848 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13849 || (GET_CODE (reg
) == SUBREG
13850 && REG_P (reg
= SUBREG_REG (reg
))))
13851 && (CONST_INT_P (offset
13852 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13854 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13855 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13856 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13860 base_reg
= REGNO (reg
);
13861 base_reg_rtx
= reg
;
13862 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13865 else if (base_reg
!= (int) REGNO (reg
))
13866 /* Not addressed from the same base register. */
13869 /* If it isn't an integer register, then we can't do this. */
13870 if (unsorted_regs
[i
] < 0
13871 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13872 /* The effects are unpredictable if the base register is
13873 both updated and stored. */
13874 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13875 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13876 || unsorted_regs
[i
] > 14)
13879 unsorted_offsets
[i
] = INTVAL (offset
);
13880 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13884 /* Not a suitable memory address. */
13888 /* All the useful information has now been extracted from the
13889 operands into unsorted_regs and unsorted_offsets; additionally,
13890 order[0] has been set to the lowest offset in the list. Sort
13891 the offsets into order, verifying that they are adjacent, and
13892 check that the register numbers are ascending. */
13893 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13894 check_regs
? unsorted_regs
: NULL
))
13898 memcpy (saved_order
, order
, sizeof order
);
13904 for (i
= 0; i
< nops
; i
++)
13906 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13908 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13911 *load_offset
= unsorted_offsets
[order
[0]];
13915 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13918 if (unsorted_offsets
[order
[0]] == 0)
13919 stm_case
= 1; /* stmia */
13920 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13921 stm_case
= 2; /* stmib */
13922 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13923 stm_case
= 3; /* stmda */
13924 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13925 stm_case
= 4; /* stmdb */
13929 if (!multiple_operation_profitable_p (false, nops
, 0))
13935 /* Routines for use in generating RTL. */
13937 /* Generate a load-multiple instruction. COUNT is the number of loads in
13938 the instruction; REGS and MEMS are arrays containing the operands.
13939 BASEREG is the base register to be used in addressing the memory operands.
13940 WBACK_OFFSET is nonzero if the instruction should update the base
13944 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13945 HOST_WIDE_INT wback_offset
)
13950 if (!multiple_operation_profitable_p (false, count
, 0))
13956 for (i
= 0; i
< count
; i
++)
13957 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13959 if (wback_offset
!= 0)
13960 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13962 seq
= get_insns ();
13968 result
= gen_rtx_PARALLEL (VOIDmode
,
13969 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13970 if (wback_offset
!= 0)
13972 XVECEXP (result
, 0, 0)
13973 = gen_rtx_SET (VOIDmode
, basereg
,
13974 plus_constant (Pmode
, basereg
, wback_offset
));
13979 for (j
= 0; i
< count
; i
++, j
++)
13980 XVECEXP (result
, 0, i
)
13981 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13986 /* Generate a store-multiple instruction. COUNT is the number of stores in
13987 the instruction; REGS and MEMS are arrays containing the operands.
13988 BASEREG is the base register to be used in addressing the memory operands.
13989 WBACK_OFFSET is nonzero if the instruction should update the base
13993 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13994 HOST_WIDE_INT wback_offset
)
13999 if (GET_CODE (basereg
) == PLUS
)
14000 basereg
= XEXP (basereg
, 0);
14002 if (!multiple_operation_profitable_p (false, count
, 0))
14008 for (i
= 0; i
< count
; i
++)
14009 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14011 if (wback_offset
!= 0)
14012 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14014 seq
= get_insns ();
14020 result
= gen_rtx_PARALLEL (VOIDmode
,
14021 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14022 if (wback_offset
!= 0)
14024 XVECEXP (result
, 0, 0)
14025 = gen_rtx_SET (VOIDmode
, basereg
,
14026 plus_constant (Pmode
, basereg
, wback_offset
));
14031 for (j
= 0; i
< count
; i
++, j
++)
14032 XVECEXP (result
, 0, i
)
14033 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14038 /* Generate either a load-multiple or a store-multiple instruction. This
14039 function can be used in situations where we can start with a single MEM
14040 rtx and adjust its address upwards.
14041 COUNT is the number of operations in the instruction, not counting a
14042 possible update of the base register. REGS is an array containing the
14044 BASEREG is the base register to be used in addressing the memory operands,
14045 which are constructed from BASEMEM.
14046 WRITE_BACK specifies whether the generated instruction should include an
14047 update of the base register.
14048 OFFSETP is used to pass an offset to and from this function; this offset
14049 is not used when constructing the address (instead BASEMEM should have an
14050 appropriate offset in its address), it is used only for setting
14051 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14054 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14055 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14057 rtx mems
[MAX_LDM_STM_OPS
];
14058 HOST_WIDE_INT offset
= *offsetp
;
14061 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14063 if (GET_CODE (basereg
) == PLUS
)
14064 basereg
= XEXP (basereg
, 0);
14066 for (i
= 0; i
< count
; i
++)
14068 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14069 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14077 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14078 write_back
? 4 * count
: 0);
14080 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14081 write_back
? 4 * count
: 0);
14085 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14086 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14088 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14093 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14094 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14096 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14100 /* Called from a peephole2 expander to turn a sequence of loads into an
14101 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14102 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14103 is true if we can reorder the registers because they are used commutatively
14105 Returns true iff we could generate a new instruction. */
14108 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14110 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14111 rtx mems
[MAX_LDM_STM_OPS
];
14112 int i
, j
, base_reg
;
14114 HOST_WIDE_INT offset
;
14115 int write_back
= FALSE
;
14119 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14120 &base_reg
, &offset
, !sort_regs
);
14126 for (i
= 0; i
< nops
- 1; i
++)
14127 for (j
= i
+ 1; j
< nops
; j
++)
14128 if (regs
[i
] > regs
[j
])
14134 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14138 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14139 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14145 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14146 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14148 if (!TARGET_THUMB1
)
14150 base_reg
= regs
[0];
14151 base_reg_rtx
= newbase
;
14155 for (i
= 0; i
< nops
; i
++)
14157 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14158 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14161 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14162 write_back
? offset
+ i
* 4 : 0));
14166 /* Called from a peephole2 expander to turn a sequence of stores into an
14167 STM instruction. OPERANDS are the operands found by the peephole matcher;
14168 NOPS indicates how many separate stores we are trying to combine.
14169 Returns true iff we could generate a new instruction. */
14172 gen_stm_seq (rtx
*operands
, int nops
)
14175 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14176 rtx mems
[MAX_LDM_STM_OPS
];
14179 HOST_WIDE_INT offset
;
14180 int write_back
= FALSE
;
14183 bool base_reg_dies
;
14185 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14186 mem_order
, &base_reg
, &offset
, true);
14191 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14193 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14196 gcc_assert (base_reg_dies
);
14202 gcc_assert (base_reg_dies
);
14203 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14207 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14209 for (i
= 0; i
< nops
; i
++)
14211 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14212 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14215 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14216 write_back
? offset
+ i
* 4 : 0));
14220 /* Called from a peephole2 expander to turn a sequence of stores that are
14221 preceded by constant loads into an STM instruction. OPERANDS are the
14222 operands found by the peephole matcher; NOPS indicates how many
14223 separate stores we are trying to combine; there are 2 * NOPS
14224 instructions in the peephole.
14225 Returns true iff we could generate a new instruction. */
14228 gen_const_stm_seq (rtx
*operands
, int nops
)
14230 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14231 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14232 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14233 rtx mems
[MAX_LDM_STM_OPS
];
14236 HOST_WIDE_INT offset
;
14237 int write_back
= FALSE
;
14240 bool base_reg_dies
;
14242 HARD_REG_SET allocated
;
14244 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14245 mem_order
, &base_reg
, &offset
, false);
14250 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14252 /* If the same register is used more than once, try to find a free
14254 CLEAR_HARD_REG_SET (allocated
);
14255 for (i
= 0; i
< nops
; i
++)
14257 for (j
= i
+ 1; j
< nops
; j
++)
14258 if (regs
[i
] == regs
[j
])
14260 rtx t
= peep2_find_free_register (0, nops
* 2,
14261 TARGET_THUMB1
? "l" : "r",
14262 SImode
, &allocated
);
14266 regs
[i
] = REGNO (t
);
14270 /* Compute an ordering that maps the register numbers to an ascending
14273 for (i
= 0; i
< nops
; i
++)
14274 if (regs
[i
] < regs
[reg_order
[0]])
14277 for (i
= 1; i
< nops
; i
++)
14279 int this_order
= reg_order
[i
- 1];
14280 for (j
= 0; j
< nops
; j
++)
14281 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14282 && (this_order
== reg_order
[i
- 1]
14283 || regs
[j
] < regs
[this_order
]))
14285 reg_order
[i
] = this_order
;
14288 /* Ensure that registers that must be live after the instruction end
14289 up with the correct value. */
14290 for (i
= 0; i
< nops
; i
++)
14292 int this_order
= reg_order
[i
];
14293 if ((this_order
!= mem_order
[i
]
14294 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14295 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14299 /* Load the constants. */
14300 for (i
= 0; i
< nops
; i
++)
14302 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14303 sorted_regs
[i
] = regs
[reg_order
[i
]];
14304 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14307 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14309 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14312 gcc_assert (base_reg_dies
);
14318 gcc_assert (base_reg_dies
);
14319 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14323 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14325 for (i
= 0; i
< nops
; i
++)
14327 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14328 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14331 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14332 write_back
? offset
+ i
* 4 : 0));
14336 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14337 unaligned copies on processors which support unaligned semantics for those
14338 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14339 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14340 An interleave factor of 1 (the minimum) will perform no interleaving.
14341 Load/store multiple are used for aligned addresses where possible. */
14344 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14345 HOST_WIDE_INT length
,
14346 unsigned int interleave_factor
)
14348 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14349 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14350 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14351 HOST_WIDE_INT i
, j
;
14352 HOST_WIDE_INT remaining
= length
, words
;
14353 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14355 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14356 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14357 HOST_WIDE_INT srcoffset
, dstoffset
;
14358 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14361 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14363 /* Use hard registers if we have aligned source or destination so we can use
14364 load/store multiple with contiguous registers. */
14365 if (dst_aligned
|| src_aligned
)
14366 for (i
= 0; i
< interleave_factor
; i
++)
14367 regs
[i
] = gen_rtx_REG (SImode
, i
);
14369 for (i
= 0; i
< interleave_factor
; i
++)
14370 regs
[i
] = gen_reg_rtx (SImode
);
14372 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14373 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14375 srcoffset
= dstoffset
= 0;
14377 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14378 For copying the last bytes we want to subtract this offset again. */
14379 src_autoinc
= dst_autoinc
= 0;
14381 for (i
= 0; i
< interleave_factor
; i
++)
14384 /* Copy BLOCK_SIZE_BYTES chunks. */
14386 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14389 if (src_aligned
&& interleave_factor
> 1)
14391 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14392 TRUE
, srcbase
, &srcoffset
));
14393 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14397 for (j
= 0; j
< interleave_factor
; j
++)
14399 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14401 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14402 srcoffset
+ j
* UNITS_PER_WORD
);
14403 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14405 srcoffset
+= block_size_bytes
;
14409 if (dst_aligned
&& interleave_factor
> 1)
14411 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14412 TRUE
, dstbase
, &dstoffset
));
14413 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14417 for (j
= 0; j
< interleave_factor
; j
++)
14419 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14421 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14422 dstoffset
+ j
* UNITS_PER_WORD
);
14423 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14425 dstoffset
+= block_size_bytes
;
14428 remaining
-= block_size_bytes
;
14431 /* Copy any whole words left (note these aren't interleaved with any
14432 subsequent halfword/byte load/stores in the interests of simplicity). */
14434 words
= remaining
/ UNITS_PER_WORD
;
14436 gcc_assert (words
< interleave_factor
);
14438 if (src_aligned
&& words
> 1)
14440 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14442 src_autoinc
+= UNITS_PER_WORD
* words
;
14446 for (j
= 0; j
< words
; j
++)
14448 addr
= plus_constant (Pmode
, src
,
14449 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14450 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14451 srcoffset
+ j
* UNITS_PER_WORD
);
14452 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14454 srcoffset
+= words
* UNITS_PER_WORD
;
14457 if (dst_aligned
&& words
> 1)
14459 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14461 dst_autoinc
+= words
* UNITS_PER_WORD
;
14465 for (j
= 0; j
< words
; j
++)
14467 addr
= plus_constant (Pmode
, dst
,
14468 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14469 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14470 dstoffset
+ j
* UNITS_PER_WORD
);
14471 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14473 dstoffset
+= words
* UNITS_PER_WORD
;
14476 remaining
-= words
* UNITS_PER_WORD
;
14478 gcc_assert (remaining
< 4);
14480 /* Copy a halfword if necessary. */
14482 if (remaining
>= 2)
14484 halfword_tmp
= gen_reg_rtx (SImode
);
14486 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14487 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14488 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14490 /* Either write out immediately, or delay until we've loaded the last
14491 byte, depending on interleave factor. */
14492 if (interleave_factor
== 1)
14494 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14495 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14496 emit_insn (gen_unaligned_storehi (mem
,
14497 gen_lowpart (HImode
, halfword_tmp
)));
14498 halfword_tmp
= NULL
;
14506 gcc_assert (remaining
< 2);
14508 /* Copy last byte. */
14510 if ((remaining
& 1) != 0)
14512 byte_tmp
= gen_reg_rtx (SImode
);
14514 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14515 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14516 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14518 if (interleave_factor
== 1)
14520 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14521 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14522 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14531 /* Store last halfword if we haven't done so already. */
14535 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14536 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14537 emit_insn (gen_unaligned_storehi (mem
,
14538 gen_lowpart (HImode
, halfword_tmp
)));
14542 /* Likewise for last byte. */
14546 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14547 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14548 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14552 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14555 /* From mips_adjust_block_mem:
14557 Helper function for doing a loop-based block operation on memory
14558 reference MEM. Each iteration of the loop will operate on LENGTH
14561 Create a new base register for use within the loop and point it to
14562 the start of MEM. Create a new memory reference that uses this
14563 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14566 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14569 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14571 /* Although the new mem does not refer to a known location,
14572 it does keep up to LENGTH bytes of alignment. */
14573 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14574 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14577 /* From mips_block_move_loop:
14579 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14580 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14581 the memory regions do not overlap. */
14584 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14585 unsigned int interleave_factor
,
14586 HOST_WIDE_INT bytes_per_iter
)
14588 rtx src_reg
, dest_reg
, final_src
, test
;
14589 HOST_WIDE_INT leftover
;
14591 leftover
= length
% bytes_per_iter
;
14592 length
-= leftover
;
14594 /* Create registers and memory references for use within the loop. */
14595 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14596 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14598 /* Calculate the value that SRC_REG should have after the last iteration of
14600 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14601 0, 0, OPTAB_WIDEN
);
14603 /* Emit the start of the loop. */
14604 rtx_code_label
*label
= gen_label_rtx ();
14605 emit_label (label
);
14607 /* Emit the loop body. */
14608 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14609 interleave_factor
);
14611 /* Move on to the next block. */
14612 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14613 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14615 /* Emit the loop condition. */
14616 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14617 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14619 /* Mop up any left-over bytes. */
14621 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14624 /* Emit a block move when either the source or destination is unaligned (not
14625 aligned to a four-byte boundary). This may need further tuning depending on
14626 core type, optimize_size setting, etc. */
14629 arm_movmemqi_unaligned (rtx
*operands
)
14631 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14635 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14636 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14637 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14638 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14639 or dst_aligned though: allow more interleaving in those cases since the
14640 resulting code can be smaller. */
14641 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14642 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14645 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14646 interleave_factor
, bytes_per_iter
);
14648 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14649 interleave_factor
);
14653 /* Note that the loop created by arm_block_move_unaligned_loop may be
14654 subject to loop unrolling, which makes tuning this condition a little
14657 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14659 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14666 arm_gen_movmemqi (rtx
*operands
)
14668 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14669 HOST_WIDE_INT srcoffset
, dstoffset
;
14671 rtx src
, dst
, srcbase
, dstbase
;
14672 rtx part_bytes_reg
= NULL
;
14675 if (!CONST_INT_P (operands
[2])
14676 || !CONST_INT_P (operands
[3])
14677 || INTVAL (operands
[2]) > 64)
14680 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14681 return arm_movmemqi_unaligned (operands
);
14683 if (INTVAL (operands
[3]) & 3)
14686 dstbase
= operands
[0];
14687 srcbase
= operands
[1];
14689 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14690 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14692 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14693 out_words_to_go
= INTVAL (operands
[2]) / 4;
14694 last_bytes
= INTVAL (operands
[2]) & 3;
14695 dstoffset
= srcoffset
= 0;
14697 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14698 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14700 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14702 if (in_words_to_go
> 4)
14703 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14704 TRUE
, srcbase
, &srcoffset
));
14706 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14707 src
, FALSE
, srcbase
,
14710 if (out_words_to_go
)
14712 if (out_words_to_go
> 4)
14713 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14714 TRUE
, dstbase
, &dstoffset
));
14715 else if (out_words_to_go
!= 1)
14716 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14717 out_words_to_go
, dst
,
14720 dstbase
, &dstoffset
));
14723 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14724 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14725 if (last_bytes
!= 0)
14727 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14733 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14734 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14737 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14738 if (out_words_to_go
)
14742 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14743 sreg
= copy_to_reg (mem
);
14745 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14746 emit_move_insn (mem
, sreg
);
14749 gcc_assert (!in_words_to_go
); /* Sanity check */
14752 if (in_words_to_go
)
14754 gcc_assert (in_words_to_go
> 0);
14756 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14757 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14760 gcc_assert (!last_bytes
|| part_bytes_reg
);
14762 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14764 rtx tmp
= gen_reg_rtx (SImode
);
14766 /* The bytes we want are in the top end of the word. */
14767 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14768 GEN_INT (8 * (4 - last_bytes
))));
14769 part_bytes_reg
= tmp
;
14773 mem
= adjust_automodify_address (dstbase
, QImode
,
14774 plus_constant (Pmode
, dst
,
14776 dstoffset
+ last_bytes
- 1);
14777 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14781 tmp
= gen_reg_rtx (SImode
);
14782 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14783 part_bytes_reg
= tmp
;
14790 if (last_bytes
> 1)
14792 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14793 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14797 rtx tmp
= gen_reg_rtx (SImode
);
14798 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14799 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14800 part_bytes_reg
= tmp
;
14807 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14808 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14815 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14818 next_consecutive_mem (rtx mem
)
14820 machine_mode mode
= GET_MODE (mem
);
14821 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14822 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14824 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14827 /* Copy using LDRD/STRD instructions whenever possible.
14828 Returns true upon success. */
14830 gen_movmem_ldrd_strd (rtx
*operands
)
14832 unsigned HOST_WIDE_INT len
;
14833 HOST_WIDE_INT align
;
14834 rtx src
, dst
, base
;
14836 bool src_aligned
, dst_aligned
;
14837 bool src_volatile
, dst_volatile
;
14839 gcc_assert (CONST_INT_P (operands
[2]));
14840 gcc_assert (CONST_INT_P (operands
[3]));
14842 len
= UINTVAL (operands
[2]);
14846 /* Maximum alignment we can assume for both src and dst buffers. */
14847 align
= INTVAL (operands
[3]);
14849 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14852 /* Place src and dst addresses in registers
14853 and update the corresponding mem rtx. */
14855 dst_volatile
= MEM_VOLATILE_P (dst
);
14856 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14857 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14858 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14861 src_volatile
= MEM_VOLATILE_P (src
);
14862 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14863 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14864 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14866 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14869 if (src_volatile
|| dst_volatile
)
14872 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14873 if (!(dst_aligned
|| src_aligned
))
14874 return arm_gen_movmemqi (operands
);
14876 src
= adjust_address (src
, DImode
, 0);
14877 dst
= adjust_address (dst
, DImode
, 0);
14881 reg0
= gen_reg_rtx (DImode
);
14883 emit_move_insn (reg0
, src
);
14885 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14888 emit_move_insn (dst
, reg0
);
14890 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14892 src
= next_consecutive_mem (src
);
14893 dst
= next_consecutive_mem (dst
);
14896 gcc_assert (len
< 8);
14899 /* More than a word but less than a double-word to copy. Copy a word. */
14900 reg0
= gen_reg_rtx (SImode
);
14901 src
= adjust_address (src
, SImode
, 0);
14902 dst
= adjust_address (dst
, SImode
, 0);
14904 emit_move_insn (reg0
, src
);
14906 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14909 emit_move_insn (dst
, reg0
);
14911 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14913 src
= next_consecutive_mem (src
);
14914 dst
= next_consecutive_mem (dst
);
14921 /* Copy the remaining bytes. */
14924 dst
= adjust_address (dst
, HImode
, 0);
14925 src
= adjust_address (src
, HImode
, 0);
14926 reg0
= gen_reg_rtx (SImode
);
14928 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14930 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14933 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14935 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14937 src
= next_consecutive_mem (src
);
14938 dst
= next_consecutive_mem (dst
);
14943 dst
= adjust_address (dst
, QImode
, 0);
14944 src
= adjust_address (src
, QImode
, 0);
14945 reg0
= gen_reg_rtx (QImode
);
14946 emit_move_insn (reg0
, src
);
14947 emit_move_insn (dst
, reg0
);
14951 /* Select a dominance comparison mode if possible for a test of the general
14952 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14953 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14954 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14955 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14956 In all cases OP will be either EQ or NE, but we don't need to know which
14957 here. If we are unable to support a dominance comparison we return
14958 CC mode. This will then fail to match for the RTL expressions that
14959 generate this call. */
14961 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14963 enum rtx_code cond1
, cond2
;
14966 /* Currently we will probably get the wrong result if the individual
14967 comparisons are not simple. This also ensures that it is safe to
14968 reverse a comparison if necessary. */
14969 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14971 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14975 /* The if_then_else variant of this tests the second condition if the
14976 first passes, but is true if the first fails. Reverse the first
14977 condition to get a true "inclusive-or" expression. */
14978 if (cond_or
== DOM_CC_NX_OR_Y
)
14979 cond1
= reverse_condition (cond1
);
14981 /* If the comparisons are not equal, and one doesn't dominate the other,
14982 then we can't do this. */
14984 && !comparison_dominates_p (cond1
, cond2
)
14985 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14990 enum rtx_code temp
= cond1
;
14998 if (cond_or
== DOM_CC_X_AND_Y
)
15003 case EQ
: return CC_DEQmode
;
15004 case LE
: return CC_DLEmode
;
15005 case LEU
: return CC_DLEUmode
;
15006 case GE
: return CC_DGEmode
;
15007 case GEU
: return CC_DGEUmode
;
15008 default: gcc_unreachable ();
15012 if (cond_or
== DOM_CC_X_AND_Y
)
15024 gcc_unreachable ();
15028 if (cond_or
== DOM_CC_X_AND_Y
)
15040 gcc_unreachable ();
15044 if (cond_or
== DOM_CC_X_AND_Y
)
15045 return CC_DLTUmode
;
15050 return CC_DLTUmode
;
15052 return CC_DLEUmode
;
15056 gcc_unreachable ();
15060 if (cond_or
== DOM_CC_X_AND_Y
)
15061 return CC_DGTUmode
;
15066 return CC_DGTUmode
;
15068 return CC_DGEUmode
;
15072 gcc_unreachable ();
15075 /* The remaining cases only occur when both comparisons are the
15078 gcc_assert (cond1
== cond2
);
15082 gcc_assert (cond1
== cond2
);
15086 gcc_assert (cond1
== cond2
);
15090 gcc_assert (cond1
== cond2
);
15091 return CC_DLEUmode
;
15094 gcc_assert (cond1
== cond2
);
15095 return CC_DGEUmode
;
15098 gcc_unreachable ();
15103 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15105 /* All floating point compares return CCFP if it is an equality
15106 comparison, and CCFPE otherwise. */
15107 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15130 gcc_unreachable ();
15134 /* A compare with a shifted operand. Because of canonicalization, the
15135 comparison will have to be swapped when we emit the assembler. */
15136 if (GET_MODE (y
) == SImode
15137 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15138 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15139 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15140 || GET_CODE (x
) == ROTATERT
))
15143 /* This operation is performed swapped, but since we only rely on the Z
15144 flag we don't need an additional mode. */
15145 if (GET_MODE (y
) == SImode
15146 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15147 && GET_CODE (x
) == NEG
15148 && (op
== EQ
|| op
== NE
))
15151 /* This is a special case that is used by combine to allow a
15152 comparison of a shifted byte load to be split into a zero-extend
15153 followed by a comparison of the shifted integer (only valid for
15154 equalities and unsigned inequalities). */
15155 if (GET_MODE (x
) == SImode
15156 && GET_CODE (x
) == ASHIFT
15157 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15158 && GET_CODE (XEXP (x
, 0)) == SUBREG
15159 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15160 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15161 && (op
== EQ
|| op
== NE
15162 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15163 && CONST_INT_P (y
))
15166 /* A construct for a conditional compare, if the false arm contains
15167 0, then both conditions must be true, otherwise either condition
15168 must be true. Not all conditions are possible, so CCmode is
15169 returned if it can't be done. */
15170 if (GET_CODE (x
) == IF_THEN_ELSE
15171 && (XEXP (x
, 2) == const0_rtx
15172 || XEXP (x
, 2) == const1_rtx
)
15173 && COMPARISON_P (XEXP (x
, 0))
15174 && COMPARISON_P (XEXP (x
, 1)))
15175 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15176 INTVAL (XEXP (x
, 2)));
15178 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15179 if (GET_CODE (x
) == AND
15180 && (op
== EQ
|| op
== NE
)
15181 && COMPARISON_P (XEXP (x
, 0))
15182 && COMPARISON_P (XEXP (x
, 1)))
15183 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15186 if (GET_CODE (x
) == IOR
15187 && (op
== EQ
|| op
== NE
)
15188 && COMPARISON_P (XEXP (x
, 0))
15189 && COMPARISON_P (XEXP (x
, 1)))
15190 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15193 /* An operation (on Thumb) where we want to test for a single bit.
15194 This is done by shifting that bit up into the top bit of a
15195 scratch register; we can then branch on the sign bit. */
15197 && GET_MODE (x
) == SImode
15198 && (op
== EQ
|| op
== NE
)
15199 && GET_CODE (x
) == ZERO_EXTRACT
15200 && XEXP (x
, 1) == const1_rtx
)
15203 /* An operation that sets the condition codes as a side-effect, the
15204 V flag is not set correctly, so we can only use comparisons where
15205 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15207 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15208 if (GET_MODE (x
) == SImode
15210 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15211 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15212 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15213 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15214 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15215 || GET_CODE (x
) == LSHIFTRT
15216 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15217 || GET_CODE (x
) == ROTATERT
15218 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15219 return CC_NOOVmode
;
15221 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15224 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15225 && GET_CODE (x
) == PLUS
15226 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15229 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15235 /* A DImode comparison against zero can be implemented by
15236 or'ing the two halves together. */
15237 if (y
== const0_rtx
)
15240 /* We can do an equality test in three Thumb instructions. */
15250 /* DImode unsigned comparisons can be implemented by cmp +
15251 cmpeq without a scratch register. Not worth doing in
15262 /* DImode signed and unsigned comparisons can be implemented
15263 by cmp + sbcs with a scratch register, but that does not
15264 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15265 gcc_assert (op
!= EQ
&& op
!= NE
);
15269 gcc_unreachable ();
15273 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15274 return GET_MODE (x
);
15279 /* X and Y are two things to compare using CODE. Emit the compare insn and
15280 return the rtx for register 0 in the proper mode. FP means this is a
15281 floating point compare: I don't think that it is needed on the arm. */
15283 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15287 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15289 /* We might have X as a constant, Y as a register because of the predicates
15290 used for cmpdi. If so, force X to a register here. */
15291 if (dimode_comparison
&& !REG_P (x
))
15292 x
= force_reg (DImode
, x
);
15294 mode
= SELECT_CC_MODE (code
, x
, y
);
15295 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15297 if (dimode_comparison
15298 && mode
!= CC_CZmode
)
15302 /* To compare two non-zero values for equality, XOR them and
15303 then compare against zero. Not used for ARM mode; there
15304 CC_CZmode is cheaper. */
15305 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15307 gcc_assert (!reload_completed
);
15308 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15312 /* A scratch register is required. */
15313 if (reload_completed
)
15314 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15316 scratch
= gen_rtx_SCRATCH (SImode
);
15318 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15319 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15320 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15323 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15328 /* Generate a sequence of insns that will generate the correct return
15329 address mask depending on the physical architecture that the program
15332 arm_gen_return_addr_mask (void)
15334 rtx reg
= gen_reg_rtx (Pmode
);
15336 emit_insn (gen_return_addr_mask (reg
));
15341 arm_reload_in_hi (rtx
*operands
)
15343 rtx ref
= operands
[1];
15345 HOST_WIDE_INT offset
= 0;
15347 if (GET_CODE (ref
) == SUBREG
)
15349 offset
= SUBREG_BYTE (ref
);
15350 ref
= SUBREG_REG (ref
);
15355 /* We have a pseudo which has been spilt onto the stack; there
15356 are two cases here: the first where there is a simple
15357 stack-slot replacement and a second where the stack-slot is
15358 out of range, or is used as a subreg. */
15359 if (reg_equiv_mem (REGNO (ref
)))
15361 ref
= reg_equiv_mem (REGNO (ref
));
15362 base
= find_replacement (&XEXP (ref
, 0));
15365 /* The slot is out of range, or was dressed up in a SUBREG. */
15366 base
= reg_equiv_address (REGNO (ref
));
15369 base
= find_replacement (&XEXP (ref
, 0));
15371 /* Handle the case where the address is too complex to be offset by 1. */
15372 if (GET_CODE (base
) == MINUS
15373 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15375 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15377 emit_set_insn (base_plus
, base
);
15380 else if (GET_CODE (base
) == PLUS
)
15382 /* The addend must be CONST_INT, or we would have dealt with it above. */
15383 HOST_WIDE_INT hi
, lo
;
15385 offset
+= INTVAL (XEXP (base
, 1));
15386 base
= XEXP (base
, 0);
15388 /* Rework the address into a legal sequence of insns. */
15389 /* Valid range for lo is -4095 -> 4095 */
15392 : -((-offset
) & 0xfff));
15394 /* Corner case, if lo is the max offset then we would be out of range
15395 once we have added the additional 1 below, so bump the msb into the
15396 pre-loading insn(s). */
15400 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15401 ^ (HOST_WIDE_INT
) 0x80000000)
15402 - (HOST_WIDE_INT
) 0x80000000);
15404 gcc_assert (hi
+ lo
== offset
);
15408 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15410 /* Get the base address; addsi3 knows how to handle constants
15411 that require more than one insn. */
15412 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15418 /* Operands[2] may overlap operands[0] (though it won't overlap
15419 operands[1]), that's why we asked for a DImode reg -- so we can
15420 use the bit that does not overlap. */
15421 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15422 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15424 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15426 emit_insn (gen_zero_extendqisi2 (scratch
,
15427 gen_rtx_MEM (QImode
,
15428 plus_constant (Pmode
, base
,
15430 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15431 gen_rtx_MEM (QImode
,
15432 plus_constant (Pmode
, base
,
15434 if (!BYTES_BIG_ENDIAN
)
15435 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15436 gen_rtx_IOR (SImode
,
15439 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15443 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15444 gen_rtx_IOR (SImode
,
15445 gen_rtx_ASHIFT (SImode
, scratch
,
15447 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15450 /* Handle storing a half-word to memory during reload by synthesizing as two
15451 byte stores. Take care not to clobber the input values until after we
15452 have moved them somewhere safe. This code assumes that if the DImode
15453 scratch in operands[2] overlaps either the input value or output address
15454 in some way, then that value must die in this insn (we absolutely need
15455 two scratch registers for some corner cases). */
15457 arm_reload_out_hi (rtx
*operands
)
15459 rtx ref
= operands
[0];
15460 rtx outval
= operands
[1];
15462 HOST_WIDE_INT offset
= 0;
15464 if (GET_CODE (ref
) == SUBREG
)
15466 offset
= SUBREG_BYTE (ref
);
15467 ref
= SUBREG_REG (ref
);
15472 /* We have a pseudo which has been spilt onto the stack; there
15473 are two cases here: the first where there is a simple
15474 stack-slot replacement and a second where the stack-slot is
15475 out of range, or is used as a subreg. */
15476 if (reg_equiv_mem (REGNO (ref
)))
15478 ref
= reg_equiv_mem (REGNO (ref
));
15479 base
= find_replacement (&XEXP (ref
, 0));
15482 /* The slot is out of range, or was dressed up in a SUBREG. */
15483 base
= reg_equiv_address (REGNO (ref
));
15486 base
= find_replacement (&XEXP (ref
, 0));
15488 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15490 /* Handle the case where the address is too complex to be offset by 1. */
15491 if (GET_CODE (base
) == MINUS
15492 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15494 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15496 /* Be careful not to destroy OUTVAL. */
15497 if (reg_overlap_mentioned_p (base_plus
, outval
))
15499 /* Updating base_plus might destroy outval, see if we can
15500 swap the scratch and base_plus. */
15501 if (!reg_overlap_mentioned_p (scratch
, outval
))
15504 scratch
= base_plus
;
15509 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15511 /* Be conservative and copy OUTVAL into the scratch now,
15512 this should only be necessary if outval is a subreg
15513 of something larger than a word. */
15514 /* XXX Might this clobber base? I can't see how it can,
15515 since scratch is known to overlap with OUTVAL, and
15516 must be wider than a word. */
15517 emit_insn (gen_movhi (scratch_hi
, outval
));
15518 outval
= scratch_hi
;
15522 emit_set_insn (base_plus
, base
);
15525 else if (GET_CODE (base
) == PLUS
)
15527 /* The addend must be CONST_INT, or we would have dealt with it above. */
15528 HOST_WIDE_INT hi
, lo
;
15530 offset
+= INTVAL (XEXP (base
, 1));
15531 base
= XEXP (base
, 0);
15533 /* Rework the address into a legal sequence of insns. */
15534 /* Valid range for lo is -4095 -> 4095 */
15537 : -((-offset
) & 0xfff));
15539 /* Corner case, if lo is the max offset then we would be out of range
15540 once we have added the additional 1 below, so bump the msb into the
15541 pre-loading insn(s). */
15545 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15546 ^ (HOST_WIDE_INT
) 0x80000000)
15547 - (HOST_WIDE_INT
) 0x80000000);
15549 gcc_assert (hi
+ lo
== offset
);
15553 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15555 /* Be careful not to destroy OUTVAL. */
15556 if (reg_overlap_mentioned_p (base_plus
, outval
))
15558 /* Updating base_plus might destroy outval, see if we
15559 can swap the scratch and base_plus. */
15560 if (!reg_overlap_mentioned_p (scratch
, outval
))
15563 scratch
= base_plus
;
15568 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15570 /* Be conservative and copy outval into scratch now,
15571 this should only be necessary if outval is a
15572 subreg of something larger than a word. */
15573 /* XXX Might this clobber base? I can't see how it
15574 can, since scratch is known to overlap with
15576 emit_insn (gen_movhi (scratch_hi
, outval
));
15577 outval
= scratch_hi
;
15581 /* Get the base address; addsi3 knows how to handle constants
15582 that require more than one insn. */
15583 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15589 if (BYTES_BIG_ENDIAN
)
15591 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15592 plus_constant (Pmode
, base
,
15594 gen_lowpart (QImode
, outval
)));
15595 emit_insn (gen_lshrsi3 (scratch
,
15596 gen_rtx_SUBREG (SImode
, outval
, 0),
15598 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15600 gen_lowpart (QImode
, scratch
)));
15604 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15606 gen_lowpart (QImode
, outval
)));
15607 emit_insn (gen_lshrsi3 (scratch
,
15608 gen_rtx_SUBREG (SImode
, outval
, 0),
15610 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15611 plus_constant (Pmode
, base
,
15613 gen_lowpart (QImode
, scratch
)));
15617 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15618 (padded to the size of a word) should be passed in a register. */
15621 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15623 if (TARGET_AAPCS_BASED
)
15624 return must_pass_in_stack_var_size (mode
, type
);
15626 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15630 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15631 Return true if an argument passed on the stack should be padded upwards,
15632 i.e. if the least-significant byte has useful data.
15633 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15634 aggregate types are placed in the lowest memory address. */
15637 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15639 if (!TARGET_AAPCS_BASED
)
15640 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15642 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15649 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15650 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15651 register has useful data, and return the opposite if the most
15652 significant byte does. */
15655 arm_pad_reg_upward (machine_mode mode
,
15656 tree type
, int first ATTRIBUTE_UNUSED
)
15658 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15660 /* For AAPCS, small aggregates, small fixed-point types,
15661 and small complex types are always padded upwards. */
15664 if ((AGGREGATE_TYPE_P (type
)
15665 || TREE_CODE (type
) == COMPLEX_TYPE
15666 || FIXED_POINT_TYPE_P (type
))
15667 && int_size_in_bytes (type
) <= 4)
15672 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15673 && GET_MODE_SIZE (mode
) <= 4)
15678 /* Otherwise, use default padding. */
15679 return !BYTES_BIG_ENDIAN
;
15682 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15683 assuming that the address in the base register is word aligned. */
15685 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15687 HOST_WIDE_INT max_offset
;
15689 /* Offset must be a multiple of 4 in Thumb mode. */
15690 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15695 else if (TARGET_ARM
)
15700 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15703 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15704 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15705 Assumes that the address in the base register RN is word aligned. Pattern
15706 guarantees that both memory accesses use the same base register,
15707 the offsets are constants within the range, and the gap between the offsets is 4.
15708 If preload complete then check that registers are legal. WBACK indicates whether
15709 address is updated. LOAD indicates whether memory access is load or store. */
15711 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15712 bool wback
, bool load
)
15714 unsigned int t
, t2
, n
;
15716 if (!reload_completed
)
15719 if (!offset_ok_for_ldrd_strd (offset
))
15726 if ((TARGET_THUMB2
)
15727 && ((wback
&& (n
== t
|| n
== t2
))
15728 || (t
== SP_REGNUM
)
15729 || (t
== PC_REGNUM
)
15730 || (t2
== SP_REGNUM
)
15731 || (t2
== PC_REGNUM
)
15732 || (!load
&& (n
== PC_REGNUM
))
15733 || (load
&& (t
== t2
))
15734 /* Triggers Cortex-M3 LDRD errata. */
15735 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15739 && ((wback
&& (n
== t
|| n
== t2
))
15740 || (t2
== PC_REGNUM
)
15741 || (t
% 2 != 0) /* First destination register is not even. */
15743 /* PC can be used as base register (for offset addressing only),
15744 but it is depricated. */
15745 || (n
== PC_REGNUM
)))
15751 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15752 operand MEM's address contains an immediate offset from the base
15753 register and has no side effects, in which case it sets BASE and
15754 OFFSET accordingly. */
15756 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15760 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15762 /* TODO: Handle more general memory operand patterns, such as
15763 PRE_DEC and PRE_INC. */
15765 if (side_effects_p (mem
))
15768 /* Can't deal with subregs. */
15769 if (GET_CODE (mem
) == SUBREG
)
15772 gcc_assert (MEM_P (mem
));
15774 *offset
= const0_rtx
;
15776 addr
= XEXP (mem
, 0);
15778 /* If addr isn't valid for DImode, then we can't handle it. */
15779 if (!arm_legitimate_address_p (DImode
, addr
,
15780 reload_in_progress
|| reload_completed
))
15788 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15790 *base
= XEXP (addr
, 0);
15791 *offset
= XEXP (addr
, 1);
15792 return (REG_P (*base
) && CONST_INT_P (*offset
));
15798 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15800 /* Called from a peephole2 to replace two word-size accesses with a
15801 single LDRD/STRD instruction. Returns true iff we can generate a
15802 new instruction sequence. That is, both accesses use the same base
15803 register and the gap between constant offsets is 4. This function
15804 may reorder its operands to match ldrd/strd RTL templates.
15805 OPERANDS are the operands found by the peephole matcher;
15806 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15807 corresponding memory operands. LOAD indicaates whether the access
15808 is load or store. CONST_STORE indicates a store of constant
15809 integer values held in OPERANDS[4,5] and assumes that the pattern
15810 is of length 4 insn, for the purpose of checking dead registers.
15811 COMMUTE indicates that register operands may be reordered. */
15813 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15814 bool const_store
, bool commute
)
15817 HOST_WIDE_INT offsets
[2], offset
;
15818 rtx base
= NULL_RTX
;
15819 rtx cur_base
, cur_offset
, tmp
;
15821 HARD_REG_SET regset
;
15823 gcc_assert (!const_store
|| !load
);
15824 /* Check that the memory references are immediate offsets from the
15825 same base register. Extract the base register, the destination
15826 registers, and the corresponding memory offsets. */
15827 for (i
= 0; i
< nops
; i
++)
15829 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15834 else if (REGNO (base
) != REGNO (cur_base
))
15837 offsets
[i
] = INTVAL (cur_offset
);
15838 if (GET_CODE (operands
[i
]) == SUBREG
)
15840 tmp
= SUBREG_REG (operands
[i
]);
15841 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15846 /* Make sure there is no dependency between the individual loads. */
15847 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15848 return false; /* RAW */
15850 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15851 return false; /* WAW */
15853 /* If the same input register is used in both stores
15854 when storing different constants, try to find a free register.
15855 For example, the code
15860 can be transformed into
15863 in Thumb mode assuming that r1 is free. */
15865 && REGNO (operands
[0]) == REGNO (operands
[1])
15866 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15870 CLEAR_HARD_REG_SET (regset
);
15871 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15872 if (tmp
== NULL_RTX
)
15875 /* Use the new register in the first load to ensure that
15876 if the original input register is not dead after peephole,
15877 then it will have the correct constant value. */
15880 else if (TARGET_ARM
)
15883 int regno
= REGNO (operands
[0]);
15884 if (!peep2_reg_dead_p (4, operands
[0]))
15886 /* When the input register is even and is not dead after the
15887 pattern, it has to hold the second constant but we cannot
15888 form a legal STRD in ARM mode with this register as the second
15890 if (regno
% 2 == 0)
15893 /* Is regno-1 free? */
15894 SET_HARD_REG_SET (regset
);
15895 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15896 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15897 if (tmp
== NULL_RTX
)
15904 /* Find a DImode register. */
15905 CLEAR_HARD_REG_SET (regset
);
15906 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15907 if (tmp
!= NULL_RTX
)
15909 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15910 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15914 /* Can we use the input register to form a DI register? */
15915 SET_HARD_REG_SET (regset
);
15916 CLEAR_HARD_REG_BIT(regset
,
15917 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15918 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15919 if (tmp
== NULL_RTX
)
15921 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15925 gcc_assert (operands
[0] != NULL_RTX
);
15926 gcc_assert (operands
[1] != NULL_RTX
);
15927 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15928 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15932 /* Make sure the instructions are ordered with lower memory access first. */
15933 if (offsets
[0] > offsets
[1])
15935 gap
= offsets
[0] - offsets
[1];
15936 offset
= offsets
[1];
15938 /* Swap the instructions such that lower memory is accessed first. */
15939 SWAP_RTX (operands
[0], operands
[1]);
15940 SWAP_RTX (operands
[2], operands
[3]);
15942 SWAP_RTX (operands
[4], operands
[5]);
15946 gap
= offsets
[1] - offsets
[0];
15947 offset
= offsets
[0];
15950 /* Make sure accesses are to consecutive memory locations. */
15954 /* Make sure we generate legal instructions. */
15955 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15959 /* In Thumb state, where registers are almost unconstrained, there
15960 is little hope to fix it. */
15964 if (load
&& commute
)
15966 /* Try reordering registers. */
15967 SWAP_RTX (operands
[0], operands
[1]);
15968 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15975 /* If input registers are dead after this pattern, they can be
15976 reordered or replaced by other registers that are free in the
15977 current pattern. */
15978 if (!peep2_reg_dead_p (4, operands
[0])
15979 || !peep2_reg_dead_p (4, operands
[1]))
15982 /* Try to reorder the input registers. */
15983 /* For example, the code
15988 can be transformed into
15993 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15996 SWAP_RTX (operands
[0], operands
[1]);
16000 /* Try to find a free DI register. */
16001 CLEAR_HARD_REG_SET (regset
);
16002 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16003 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16006 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16007 if (tmp
== NULL_RTX
)
16010 /* DREG must be an even-numbered register in DImode.
16011 Split it into SI registers. */
16012 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16013 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16014 gcc_assert (operands
[0] != NULL_RTX
);
16015 gcc_assert (operands
[1] != NULL_RTX
);
16016 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16017 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16019 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16032 /* Print a symbolic form of X to the debug file, F. */
16034 arm_print_value (FILE *f
, rtx x
)
16036 switch (GET_CODE (x
))
16039 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16043 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16051 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16053 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16054 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16062 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16066 fprintf (f
, "`%s'", XSTR (x
, 0));
16070 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16074 arm_print_value (f
, XEXP (x
, 0));
16078 arm_print_value (f
, XEXP (x
, 0));
16080 arm_print_value (f
, XEXP (x
, 1));
16088 fprintf (f
, "????");
16093 /* Routines for manipulation of the constant pool. */
16095 /* Arm instructions cannot load a large constant directly into a
16096 register; they have to come from a pc relative load. The constant
16097 must therefore be placed in the addressable range of the pc
16098 relative load. Depending on the precise pc relative load
16099 instruction the range is somewhere between 256 bytes and 4k. This
16100 means that we often have to dump a constant inside a function, and
16101 generate code to branch around it.
16103 It is important to minimize this, since the branches will slow
16104 things down and make the code larger.
16106 Normally we can hide the table after an existing unconditional
16107 branch so that there is no interruption of the flow, but in the
16108 worst case the code looks like this:
16126 We fix this by performing a scan after scheduling, which notices
16127 which instructions need to have their operands fetched from the
16128 constant table and builds the table.
16130 The algorithm starts by building a table of all the constants that
16131 need fixing up and all the natural barriers in the function (places
16132 where a constant table can be dropped without breaking the flow).
16133 For each fixup we note how far the pc-relative replacement will be
16134 able to reach and the offset of the instruction into the function.
16136 Having built the table we then group the fixes together to form
16137 tables that are as large as possible (subject to addressing
16138 constraints) and emit each table of constants after the last
16139 barrier that is within range of all the instructions in the group.
16140 If a group does not contain a barrier, then we forcibly create one
16141 by inserting a jump instruction into the flow. Once the table has
16142 been inserted, the insns are then modified to reference the
16143 relevant entry in the pool.
16145 Possible enhancements to the algorithm (not implemented) are:
16147 1) For some processors and object formats, there may be benefit in
16148 aligning the pools to the start of cache lines; this alignment
16149 would need to be taken into account when calculating addressability
16152 /* These typedefs are located at the start of this file, so that
16153 they can be used in the prototypes there. This comment is to
16154 remind readers of that fact so that the following structures
16155 can be understood more easily.
16157 typedef struct minipool_node Mnode;
16158 typedef struct minipool_fixup Mfix; */
16160 struct minipool_node
16162 /* Doubly linked chain of entries. */
16165 /* The maximum offset into the code that this entry can be placed. While
16166 pushing fixes for forward references, all entries are sorted in order
16167 of increasing max_address. */
16168 HOST_WIDE_INT max_address
;
16169 /* Similarly for an entry inserted for a backwards ref. */
16170 HOST_WIDE_INT min_address
;
16171 /* The number of fixes referencing this entry. This can become zero
16172 if we "unpush" an entry. In this case we ignore the entry when we
16173 come to emit the code. */
16175 /* The offset from the start of the minipool. */
16176 HOST_WIDE_INT offset
;
16177 /* The value in table. */
16179 /* The mode of value. */
16181 /* The size of the value. With iWMMXt enabled
16182 sizes > 4 also imply an alignment of 8-bytes. */
16186 struct minipool_fixup
16190 HOST_WIDE_INT address
;
16196 HOST_WIDE_INT forwards
;
16197 HOST_WIDE_INT backwards
;
16200 /* Fixes less than a word need padding out to a word boundary. */
16201 #define MINIPOOL_FIX_SIZE(mode) \
16202 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16204 static Mnode
* minipool_vector_head
;
16205 static Mnode
* minipool_vector_tail
;
16206 static rtx_code_label
*minipool_vector_label
;
16207 static int minipool_pad
;
16209 /* The linked list of all minipool fixes required for this function. */
16210 Mfix
* minipool_fix_head
;
16211 Mfix
* minipool_fix_tail
;
16212 /* The fix entry for the current minipool, once it has been placed. */
16213 Mfix
* minipool_barrier
;
16215 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16216 #define JUMP_TABLES_IN_TEXT_SECTION 0
16219 static HOST_WIDE_INT
16220 get_jump_table_size (rtx_jump_table_data
*insn
)
16222 /* ADDR_VECs only take room if read-only data does into the text
16224 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16226 rtx body
= PATTERN (insn
);
16227 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16228 HOST_WIDE_INT size
;
16229 HOST_WIDE_INT modesize
;
16231 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16232 size
= modesize
* XVECLEN (body
, elt
);
16236 /* Round up size of TBB table to a halfword boundary. */
16237 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16240 /* No padding necessary for TBH. */
16243 /* Add two bytes for alignment on Thumb. */
16248 gcc_unreachable ();
16256 /* Return the maximum amount of padding that will be inserted before
16259 static HOST_WIDE_INT
16260 get_label_padding (rtx label
)
16262 HOST_WIDE_INT align
, min_insn_size
;
16264 align
= 1 << label_to_alignment (label
);
16265 min_insn_size
= TARGET_THUMB
? 2 : 4;
16266 return align
> min_insn_size
? align
- min_insn_size
: 0;
16269 /* Move a minipool fix MP from its current location to before MAX_MP.
16270 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16271 constraints may need updating. */
16273 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16274 HOST_WIDE_INT max_address
)
16276 /* The code below assumes these are different. */
16277 gcc_assert (mp
!= max_mp
);
16279 if (max_mp
== NULL
)
16281 if (max_address
< mp
->max_address
)
16282 mp
->max_address
= max_address
;
16286 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16287 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16289 mp
->max_address
= max_address
;
16291 /* Unlink MP from its current position. Since max_mp is non-null,
16292 mp->prev must be non-null. */
16293 mp
->prev
->next
= mp
->next
;
16294 if (mp
->next
!= NULL
)
16295 mp
->next
->prev
= mp
->prev
;
16297 minipool_vector_tail
= mp
->prev
;
16299 /* Re-insert it before MAX_MP. */
16301 mp
->prev
= max_mp
->prev
;
16304 if (mp
->prev
!= NULL
)
16305 mp
->prev
->next
= mp
;
16307 minipool_vector_head
= mp
;
16310 /* Save the new entry. */
16313 /* Scan over the preceding entries and adjust their addresses as
16315 while (mp
->prev
!= NULL
16316 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16318 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16325 /* Add a constant to the minipool for a forward reference. Returns the
16326 node added or NULL if the constant will not fit in this pool. */
16328 add_minipool_forward_ref (Mfix
*fix
)
16330 /* If set, max_mp is the first pool_entry that has a lower
16331 constraint than the one we are trying to add. */
16332 Mnode
* max_mp
= NULL
;
16333 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16336 /* If the minipool starts before the end of FIX->INSN then this FIX
16337 can not be placed into the current pool. Furthermore, adding the
16338 new constant pool entry may cause the pool to start FIX_SIZE bytes
16340 if (minipool_vector_head
&&
16341 (fix
->address
+ get_attr_length (fix
->insn
)
16342 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16345 /* Scan the pool to see if a constant with the same value has
16346 already been added. While we are doing this, also note the
16347 location where we must insert the constant if it doesn't already
16349 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16351 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16352 && fix
->mode
== mp
->mode
16353 && (!LABEL_P (fix
->value
)
16354 || (CODE_LABEL_NUMBER (fix
->value
)
16355 == CODE_LABEL_NUMBER (mp
->value
)))
16356 && rtx_equal_p (fix
->value
, mp
->value
))
16358 /* More than one fix references this entry. */
16360 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16363 /* Note the insertion point if necessary. */
16365 && mp
->max_address
> max_address
)
16368 /* If we are inserting an 8-bytes aligned quantity and
16369 we have not already found an insertion point, then
16370 make sure that all such 8-byte aligned quantities are
16371 placed at the start of the pool. */
16372 if (ARM_DOUBLEWORD_ALIGN
16374 && fix
->fix_size
>= 8
16375 && mp
->fix_size
< 8)
16378 max_address
= mp
->max_address
;
16382 /* The value is not currently in the minipool, so we need to create
16383 a new entry for it. If MAX_MP is NULL, the entry will be put on
16384 the end of the list since the placement is less constrained than
16385 any existing entry. Otherwise, we insert the new fix before
16386 MAX_MP and, if necessary, adjust the constraints on the other
16389 mp
->fix_size
= fix
->fix_size
;
16390 mp
->mode
= fix
->mode
;
16391 mp
->value
= fix
->value
;
16393 /* Not yet required for a backwards ref. */
16394 mp
->min_address
= -65536;
16396 if (max_mp
== NULL
)
16398 mp
->max_address
= max_address
;
16400 mp
->prev
= minipool_vector_tail
;
16402 if (mp
->prev
== NULL
)
16404 minipool_vector_head
= mp
;
16405 minipool_vector_label
= gen_label_rtx ();
16408 mp
->prev
->next
= mp
;
16410 minipool_vector_tail
= mp
;
16414 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16415 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16417 mp
->max_address
= max_address
;
16420 mp
->prev
= max_mp
->prev
;
16422 if (mp
->prev
!= NULL
)
16423 mp
->prev
->next
= mp
;
16425 minipool_vector_head
= mp
;
16428 /* Save the new entry. */
16431 /* Scan over the preceding entries and adjust their addresses as
16433 while (mp
->prev
!= NULL
16434 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16436 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16444 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16445 HOST_WIDE_INT min_address
)
16447 HOST_WIDE_INT offset
;
16449 /* The code below assumes these are different. */
16450 gcc_assert (mp
!= min_mp
);
16452 if (min_mp
== NULL
)
16454 if (min_address
> mp
->min_address
)
16455 mp
->min_address
= min_address
;
16459 /* We will adjust this below if it is too loose. */
16460 mp
->min_address
= min_address
;
16462 /* Unlink MP from its current position. Since min_mp is non-null,
16463 mp->next must be non-null. */
16464 mp
->next
->prev
= mp
->prev
;
16465 if (mp
->prev
!= NULL
)
16466 mp
->prev
->next
= mp
->next
;
16468 minipool_vector_head
= mp
->next
;
16470 /* Reinsert it after MIN_MP. */
16472 mp
->next
= min_mp
->next
;
16474 if (mp
->next
!= NULL
)
16475 mp
->next
->prev
= mp
;
16477 minipool_vector_tail
= mp
;
16483 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16485 mp
->offset
= offset
;
16486 if (mp
->refcount
> 0)
16487 offset
+= mp
->fix_size
;
16489 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16490 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16496 /* Add a constant to the minipool for a backward reference. Returns the
16497 node added or NULL if the constant will not fit in this pool.
16499 Note that the code for insertion for a backwards reference can be
16500 somewhat confusing because the calculated offsets for each fix do
16501 not take into account the size of the pool (which is still under
16504 add_minipool_backward_ref (Mfix
*fix
)
16506 /* If set, min_mp is the last pool_entry that has a lower constraint
16507 than the one we are trying to add. */
16508 Mnode
*min_mp
= NULL
;
16509 /* This can be negative, since it is only a constraint. */
16510 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16513 /* If we can't reach the current pool from this insn, or if we can't
16514 insert this entry at the end of the pool without pushing other
16515 fixes out of range, then we don't try. This ensures that we
16516 can't fail later on. */
16517 if (min_address
>= minipool_barrier
->address
16518 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16519 >= minipool_barrier
->address
))
16522 /* Scan the pool to see if a constant with the same value has
16523 already been added. While we are doing this, also note the
16524 location where we must insert the constant if it doesn't already
16526 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16528 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16529 && fix
->mode
== mp
->mode
16530 && (!LABEL_P (fix
->value
)
16531 || (CODE_LABEL_NUMBER (fix
->value
)
16532 == CODE_LABEL_NUMBER (mp
->value
)))
16533 && rtx_equal_p (fix
->value
, mp
->value
)
16534 /* Check that there is enough slack to move this entry to the
16535 end of the table (this is conservative). */
16536 && (mp
->max_address
16537 > (minipool_barrier
->address
16538 + minipool_vector_tail
->offset
16539 + minipool_vector_tail
->fix_size
)))
16542 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16545 if (min_mp
!= NULL
)
16546 mp
->min_address
+= fix
->fix_size
;
16549 /* Note the insertion point if necessary. */
16550 if (mp
->min_address
< min_address
)
16552 /* For now, we do not allow the insertion of 8-byte alignment
16553 requiring nodes anywhere but at the start of the pool. */
16554 if (ARM_DOUBLEWORD_ALIGN
16555 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16560 else if (mp
->max_address
16561 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16563 /* Inserting before this entry would push the fix beyond
16564 its maximum address (which can happen if we have
16565 re-located a forwards fix); force the new fix to come
16567 if (ARM_DOUBLEWORD_ALIGN
16568 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16573 min_address
= mp
->min_address
+ fix
->fix_size
;
16576 /* Do not insert a non-8-byte aligned quantity before 8-byte
16577 aligned quantities. */
16578 else if (ARM_DOUBLEWORD_ALIGN
16579 && fix
->fix_size
< 8
16580 && mp
->fix_size
>= 8)
16583 min_address
= mp
->min_address
+ fix
->fix_size
;
16588 /* We need to create a new entry. */
16590 mp
->fix_size
= fix
->fix_size
;
16591 mp
->mode
= fix
->mode
;
16592 mp
->value
= fix
->value
;
16594 mp
->max_address
= minipool_barrier
->address
+ 65536;
16596 mp
->min_address
= min_address
;
16598 if (min_mp
== NULL
)
16601 mp
->next
= minipool_vector_head
;
16603 if (mp
->next
== NULL
)
16605 minipool_vector_tail
= mp
;
16606 minipool_vector_label
= gen_label_rtx ();
16609 mp
->next
->prev
= mp
;
16611 minipool_vector_head
= mp
;
16615 mp
->next
= min_mp
->next
;
16619 if (mp
->next
!= NULL
)
16620 mp
->next
->prev
= mp
;
16622 minipool_vector_tail
= mp
;
16625 /* Save the new entry. */
16633 /* Scan over the following entries and adjust their offsets. */
16634 while (mp
->next
!= NULL
)
16636 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16637 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16640 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16642 mp
->next
->offset
= mp
->offset
;
16651 assign_minipool_offsets (Mfix
*barrier
)
16653 HOST_WIDE_INT offset
= 0;
16656 minipool_barrier
= barrier
;
16658 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16660 mp
->offset
= offset
;
16662 if (mp
->refcount
> 0)
16663 offset
+= mp
->fix_size
;
16667 /* Output the literal table */
16669 dump_minipool (rtx_insn
*scan
)
16675 if (ARM_DOUBLEWORD_ALIGN
)
16676 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16677 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16684 fprintf (dump_file
,
16685 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16686 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16688 scan
= emit_label_after (gen_label_rtx (), scan
);
16689 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16690 scan
= emit_label_after (minipool_vector_label
, scan
);
16692 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16694 if (mp
->refcount
> 0)
16698 fprintf (dump_file
,
16699 ";; Offset %u, min %ld, max %ld ",
16700 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16701 (unsigned long) mp
->max_address
);
16702 arm_print_value (dump_file
, mp
->value
);
16703 fputc ('\n', dump_file
);
16706 switch (mp
->fix_size
)
16708 #ifdef HAVE_consttable_1
16710 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16714 #ifdef HAVE_consttable_2
16716 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16720 #ifdef HAVE_consttable_4
16722 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16726 #ifdef HAVE_consttable_8
16728 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16732 #ifdef HAVE_consttable_16
16734 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16739 gcc_unreachable ();
16747 minipool_vector_head
= minipool_vector_tail
= NULL
;
16748 scan
= emit_insn_after (gen_consttable_end (), scan
);
16749 scan
= emit_barrier_after (scan
);
16752 /* Return the cost of forcibly inserting a barrier after INSN. */
16754 arm_barrier_cost (rtx insn
)
16756 /* Basing the location of the pool on the loop depth is preferable,
16757 but at the moment, the basic block information seems to be
16758 corrupt by this stage of the compilation. */
16759 int base_cost
= 50;
16760 rtx next
= next_nonnote_insn (insn
);
16762 if (next
!= NULL
&& LABEL_P (next
))
16765 switch (GET_CODE (insn
))
16768 /* It will always be better to place the table before the label, rather
16777 return base_cost
- 10;
16780 return base_cost
+ 10;
16784 /* Find the best place in the insn stream in the range
16785 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16786 Create the barrier by inserting a jump and add a new fix entry for
16789 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16791 HOST_WIDE_INT count
= 0;
16792 rtx_barrier
*barrier
;
16793 rtx_insn
*from
= fix
->insn
;
16794 /* The instruction after which we will insert the jump. */
16795 rtx_insn
*selected
= NULL
;
16797 /* The address at which the jump instruction will be placed. */
16798 HOST_WIDE_INT selected_address
;
16800 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16801 rtx_code_label
*label
= gen_label_rtx ();
16803 selected_cost
= arm_barrier_cost (from
);
16804 selected_address
= fix
->address
;
16806 while (from
&& count
< max_count
)
16808 rtx_jump_table_data
*tmp
;
16811 /* This code shouldn't have been called if there was a natural barrier
16813 gcc_assert (!BARRIER_P (from
));
16815 /* Count the length of this insn. This must stay in sync with the
16816 code that pushes minipool fixes. */
16817 if (LABEL_P (from
))
16818 count
+= get_label_padding (from
);
16820 count
+= get_attr_length (from
);
16822 /* If there is a jump table, add its length. */
16823 if (tablejump_p (from
, NULL
, &tmp
))
16825 count
+= get_jump_table_size (tmp
);
16827 /* Jump tables aren't in a basic block, so base the cost on
16828 the dispatch insn. If we select this location, we will
16829 still put the pool after the table. */
16830 new_cost
= arm_barrier_cost (from
);
16832 if (count
< max_count
16833 && (!selected
|| new_cost
<= selected_cost
))
16836 selected_cost
= new_cost
;
16837 selected_address
= fix
->address
+ count
;
16840 /* Continue after the dispatch table. */
16841 from
= NEXT_INSN (tmp
);
16845 new_cost
= arm_barrier_cost (from
);
16847 if (count
< max_count
16848 && (!selected
|| new_cost
<= selected_cost
))
16851 selected_cost
= new_cost
;
16852 selected_address
= fix
->address
+ count
;
16855 from
= NEXT_INSN (from
);
16858 /* Make sure that we found a place to insert the jump. */
16859 gcc_assert (selected
);
16861 /* Make sure we do not split a call and its corresponding
16862 CALL_ARG_LOCATION note. */
16863 if (CALL_P (selected
))
16865 rtx_insn
*next
= NEXT_INSN (selected
);
16866 if (next
&& NOTE_P (next
)
16867 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16871 /* Create a new JUMP_INSN that branches around a barrier. */
16872 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16873 JUMP_LABEL (from
) = label
;
16874 barrier
= emit_barrier_after (from
);
16875 emit_label_after (label
, barrier
);
16877 /* Create a minipool barrier entry for the new barrier. */
16878 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16879 new_fix
->insn
= barrier
;
16880 new_fix
->address
= selected_address
;
16881 new_fix
->next
= fix
->next
;
16882 fix
->next
= new_fix
;
16887 /* Record that there is a natural barrier in the insn stream at
16890 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16892 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16895 fix
->address
= address
;
16898 if (minipool_fix_head
!= NULL
)
16899 minipool_fix_tail
->next
= fix
;
16901 minipool_fix_head
= fix
;
16903 minipool_fix_tail
= fix
;
16906 /* Record INSN, which will need fixing up to load a value from the
16907 minipool. ADDRESS is the offset of the insn since the start of the
16908 function; LOC is a pointer to the part of the insn which requires
16909 fixing; VALUE is the constant that must be loaded, which is of type
16912 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16913 machine_mode mode
, rtx value
)
16915 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16918 fix
->address
= address
;
16921 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16922 fix
->value
= value
;
16923 fix
->forwards
= get_attr_pool_range (insn
);
16924 fix
->backwards
= get_attr_neg_pool_range (insn
);
16925 fix
->minipool
= NULL
;
16927 /* If an insn doesn't have a range defined for it, then it isn't
16928 expecting to be reworked by this code. Better to stop now than
16929 to generate duff assembly code. */
16930 gcc_assert (fix
->forwards
|| fix
->backwards
);
16932 /* If an entry requires 8-byte alignment then assume all constant pools
16933 require 4 bytes of padding. Trying to do this later on a per-pool
16934 basis is awkward because existing pool entries have to be modified. */
16935 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16940 fprintf (dump_file
,
16941 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16942 GET_MODE_NAME (mode
),
16943 INSN_UID (insn
), (unsigned long) address
,
16944 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16945 arm_print_value (dump_file
, fix
->value
);
16946 fprintf (dump_file
, "\n");
16949 /* Add it to the chain of fixes. */
16952 if (minipool_fix_head
!= NULL
)
16953 minipool_fix_tail
->next
= fix
;
16955 minipool_fix_head
= fix
;
16957 minipool_fix_tail
= fix
;
16960 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16961 Returns the number of insns needed, or 99 if we always want to synthesize
16964 arm_max_const_double_inline_cost ()
16966 /* Let the value get synthesized to avoid the use of literal pools. */
16967 if (arm_disable_literal_pool
)
16970 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16973 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16974 Returns the number of insns needed, or 99 if we don't know how to
16977 arm_const_double_inline_cost (rtx val
)
16979 rtx lowpart
, highpart
;
16982 mode
= GET_MODE (val
);
16984 if (mode
== VOIDmode
)
16987 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16989 lowpart
= gen_lowpart (SImode
, val
);
16990 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16992 gcc_assert (CONST_INT_P (lowpart
));
16993 gcc_assert (CONST_INT_P (highpart
));
16995 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16996 NULL_RTX
, NULL_RTX
, 0, 0)
16997 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16998 NULL_RTX
, NULL_RTX
, 0, 0));
17001 /* Cost of loading a SImode constant. */
17003 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17005 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17006 NULL_RTX
, NULL_RTX
, 1, 0);
17009 /* Return true if it is worthwhile to split a 64-bit constant into two
17010 32-bit operations. This is the case if optimizing for size, or
17011 if we have load delay slots, or if one 32-bit part can be done with
17012 a single data operation. */
17014 arm_const_double_by_parts (rtx val
)
17016 machine_mode mode
= GET_MODE (val
);
17019 if (optimize_size
|| arm_ld_sched
)
17022 if (mode
== VOIDmode
)
17025 part
= gen_highpart_mode (SImode
, mode
, val
);
17027 gcc_assert (CONST_INT_P (part
));
17029 if (const_ok_for_arm (INTVAL (part
))
17030 || const_ok_for_arm (~INTVAL (part
)))
17033 part
= gen_lowpart (SImode
, val
);
17035 gcc_assert (CONST_INT_P (part
));
17037 if (const_ok_for_arm (INTVAL (part
))
17038 || const_ok_for_arm (~INTVAL (part
)))
17044 /* Return true if it is possible to inline both the high and low parts
17045 of a 64-bit constant into 32-bit data processing instructions. */
17047 arm_const_double_by_immediates (rtx val
)
17049 machine_mode mode
= GET_MODE (val
);
17052 if (mode
== VOIDmode
)
17055 part
= gen_highpart_mode (SImode
, mode
, val
);
17057 gcc_assert (CONST_INT_P (part
));
17059 if (!const_ok_for_arm (INTVAL (part
)))
17062 part
= gen_lowpart (SImode
, val
);
17064 gcc_assert (CONST_INT_P (part
));
17066 if (!const_ok_for_arm (INTVAL (part
)))
17072 /* Scan INSN and note any of its operands that need fixing.
17073 If DO_PUSHES is false we do not actually push any of the fixups
17076 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17080 extract_constrain_insn (insn
);
17082 if (recog_data
.n_alternatives
== 0)
17085 /* Fill in recog_op_alt with information about the constraints of
17087 preprocess_constraints (insn
);
17089 const operand_alternative
*op_alt
= which_op_alt ();
17090 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17092 /* Things we need to fix can only occur in inputs. */
17093 if (recog_data
.operand_type
[opno
] != OP_IN
)
17096 /* If this alternative is a memory reference, then any mention
17097 of constants in this alternative is really to fool reload
17098 into allowing us to accept one there. We need to fix them up
17099 now so that we output the right code. */
17100 if (op_alt
[opno
].memory_ok
)
17102 rtx op
= recog_data
.operand
[opno
];
17104 if (CONSTANT_P (op
))
17107 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17108 recog_data
.operand_mode
[opno
], op
);
17110 else if (MEM_P (op
)
17111 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17112 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17116 rtx cop
= avoid_constant_pool_reference (op
);
17118 /* Casting the address of something to a mode narrower
17119 than a word can cause avoid_constant_pool_reference()
17120 to return the pool reference itself. That's no good to
17121 us here. Lets just hope that we can use the
17122 constant pool value directly. */
17124 cop
= get_pool_constant (XEXP (op
, 0));
17126 push_minipool_fix (insn
, address
,
17127 recog_data
.operand_loc
[opno
],
17128 recog_data
.operand_mode
[opno
], cop
);
17138 /* Rewrite move insn into subtract of 0 if the condition codes will
17139 be useful in next conditional jump insn. */
17142 thumb1_reorg (void)
17146 FOR_EACH_BB_FN (bb
, cfun
)
17149 rtx pat
, op0
, set
= NULL
;
17150 rtx_insn
*prev
, *insn
= BB_END (bb
);
17151 bool insn_clobbered
= false;
17153 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17154 insn
= PREV_INSN (insn
);
17156 /* Find the last cbranchsi4_insn in basic block BB. */
17157 if (insn
== BB_HEAD (bb
)
17158 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17161 /* Get the register with which we are comparing. */
17162 pat
= PATTERN (insn
);
17163 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17165 /* Find the first flag setting insn before INSN in basic block BB. */
17166 gcc_assert (insn
!= BB_HEAD (bb
));
17167 for (prev
= PREV_INSN (insn
);
17169 && prev
!= BB_HEAD (bb
)
17171 || DEBUG_INSN_P (prev
)
17172 || ((set
= single_set (prev
)) != NULL
17173 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17174 prev
= PREV_INSN (prev
))
17176 if (reg_set_p (op0
, prev
))
17177 insn_clobbered
= true;
17180 /* Skip if op0 is clobbered by insn other than prev. */
17181 if (insn_clobbered
)
17187 dest
= SET_DEST (set
);
17188 src
= SET_SRC (set
);
17189 if (!low_register_operand (dest
, SImode
)
17190 || !low_register_operand (src
, SImode
))
17193 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17194 in INSN. Both src and dest of the move insn are checked. */
17195 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17197 dest
= copy_rtx (dest
);
17198 src
= copy_rtx (src
);
17199 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17200 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17201 INSN_CODE (prev
) = -1;
17202 /* Set test register in INSN to dest. */
17203 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17204 INSN_CODE (insn
) = -1;
17209 /* Convert instructions to their cc-clobbering variant if possible, since
17210 that allows us to use smaller encodings. */
17213 thumb2_reorg (void)
17218 INIT_REG_SET (&live
);
17220 /* We are freeing block_for_insn in the toplev to keep compatibility
17221 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17222 compute_bb_for_insn ();
17225 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17227 FOR_EACH_BB_FN (bb
, cfun
)
17229 if (current_tune
->disparage_flag_setting_t16_encodings
17230 && optimize_bb_for_speed_p (bb
))
17234 Convert_Action action
= SKIP
;
17235 Convert_Action action_for_partial_flag_setting
17236 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17237 && optimize_bb_for_speed_p (bb
))
17240 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17241 df_simulate_initialize_backwards (bb
, &live
);
17242 FOR_BB_INSNS_REVERSE (bb
, insn
)
17244 if (NONJUMP_INSN_P (insn
)
17245 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17246 && GET_CODE (PATTERN (insn
)) == SET
)
17249 rtx pat
= PATTERN (insn
);
17250 rtx dst
= XEXP (pat
, 0);
17251 rtx src
= XEXP (pat
, 1);
17252 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17254 if (!OBJECT_P (src
))
17255 op0
= XEXP (src
, 0);
17257 if (BINARY_P (src
))
17258 op1
= XEXP (src
, 1);
17260 if (low_register_operand (dst
, SImode
))
17262 switch (GET_CODE (src
))
17265 /* Adding two registers and storing the result
17266 in the first source is already a 16-bit
17268 if (rtx_equal_p (dst
, op0
)
17269 && register_operand (op1
, SImode
))
17272 if (low_register_operand (op0
, SImode
))
17274 /* ADDS <Rd>,<Rn>,<Rm> */
17275 if (low_register_operand (op1
, SImode
))
17277 /* ADDS <Rdn>,#<imm8> */
17278 /* SUBS <Rdn>,#<imm8> */
17279 else if (rtx_equal_p (dst
, op0
)
17280 && CONST_INT_P (op1
)
17281 && IN_RANGE (INTVAL (op1
), -255, 255))
17283 /* ADDS <Rd>,<Rn>,#<imm3> */
17284 /* SUBS <Rd>,<Rn>,#<imm3> */
17285 else if (CONST_INT_P (op1
)
17286 && IN_RANGE (INTVAL (op1
), -7, 7))
17289 /* ADCS <Rd>, <Rn> */
17290 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17291 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17292 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17294 && COMPARISON_P (op1
)
17295 && cc_register (XEXP (op1
, 0), VOIDmode
)
17296 && maybe_get_arm_condition_code (op1
) == ARM_CS
17297 && XEXP (op1
, 1) == const0_rtx
)
17302 /* RSBS <Rd>,<Rn>,#0
17303 Not handled here: see NEG below. */
17304 /* SUBS <Rd>,<Rn>,#<imm3>
17306 Not handled here: see PLUS above. */
17307 /* SUBS <Rd>,<Rn>,<Rm> */
17308 if (low_register_operand (op0
, SImode
)
17309 && low_register_operand (op1
, SImode
))
17314 /* MULS <Rdm>,<Rn>,<Rdm>
17315 As an exception to the rule, this is only used
17316 when optimizing for size since MULS is slow on all
17317 known implementations. We do not even want to use
17318 MULS in cold code, if optimizing for speed, so we
17319 test the global flag here. */
17320 if (!optimize_size
)
17322 /* else fall through. */
17326 /* ANDS <Rdn>,<Rm> */
17327 if (rtx_equal_p (dst
, op0
)
17328 && low_register_operand (op1
, SImode
))
17329 action
= action_for_partial_flag_setting
;
17330 else if (rtx_equal_p (dst
, op1
)
17331 && low_register_operand (op0
, SImode
))
17332 action
= action_for_partial_flag_setting
== SKIP
17333 ? SKIP
: SWAP_CONV
;
17339 /* ASRS <Rdn>,<Rm> */
17340 /* LSRS <Rdn>,<Rm> */
17341 /* LSLS <Rdn>,<Rm> */
17342 if (rtx_equal_p (dst
, op0
)
17343 && low_register_operand (op1
, SImode
))
17344 action
= action_for_partial_flag_setting
;
17345 /* ASRS <Rd>,<Rm>,#<imm5> */
17346 /* LSRS <Rd>,<Rm>,#<imm5> */
17347 /* LSLS <Rd>,<Rm>,#<imm5> */
17348 else if (low_register_operand (op0
, SImode
)
17349 && CONST_INT_P (op1
)
17350 && IN_RANGE (INTVAL (op1
), 0, 31))
17351 action
= action_for_partial_flag_setting
;
17355 /* RORS <Rdn>,<Rm> */
17356 if (rtx_equal_p (dst
, op0
)
17357 && low_register_operand (op1
, SImode
))
17358 action
= action_for_partial_flag_setting
;
17362 /* MVNS <Rd>,<Rm> */
17363 if (low_register_operand (op0
, SImode
))
17364 action
= action_for_partial_flag_setting
;
17368 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17369 if (low_register_operand (op0
, SImode
))
17374 /* MOVS <Rd>,#<imm8> */
17375 if (CONST_INT_P (src
)
17376 && IN_RANGE (INTVAL (src
), 0, 255))
17377 action
= action_for_partial_flag_setting
;
17381 /* MOVS and MOV<c> with registers have different
17382 encodings, so are not relevant here. */
17390 if (action
!= SKIP
)
17392 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17393 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17396 if (action
== SWAP_CONV
)
17398 src
= copy_rtx (src
);
17399 XEXP (src
, 0) = op1
;
17400 XEXP (src
, 1) = op0
;
17401 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17402 vec
= gen_rtvec (2, pat
, clobber
);
17404 else /* action == CONV */
17405 vec
= gen_rtvec (2, pat
, clobber
);
17407 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17408 INSN_CODE (insn
) = -1;
17412 if (NONDEBUG_INSN_P (insn
))
17413 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17417 CLEAR_REG_SET (&live
);
17420 /* Gcc puts the pool in the wrong place for ARM, since we can only
17421 load addresses a limited distance around the pc. We do some
17422 special munging to move the constant pool values to the correct
17423 point in the code. */
17428 HOST_WIDE_INT address
= 0;
17433 else if (TARGET_THUMB2
)
17436 /* Ensure all insns that must be split have been split at this point.
17437 Otherwise, the pool placement code below may compute incorrect
17438 insn lengths. Note that when optimizing, all insns have already
17439 been split at this point. */
17441 split_all_insns_noflow ();
17443 minipool_fix_head
= minipool_fix_tail
= NULL
;
17445 /* The first insn must always be a note, or the code below won't
17446 scan it properly. */
17447 insn
= get_insns ();
17448 gcc_assert (NOTE_P (insn
));
17451 /* Scan all the insns and record the operands that will need fixing. */
17452 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17454 if (BARRIER_P (insn
))
17455 push_minipool_barrier (insn
, address
);
17456 else if (INSN_P (insn
))
17458 rtx_jump_table_data
*table
;
17460 note_invalid_constants (insn
, address
, true);
17461 address
+= get_attr_length (insn
);
17463 /* If the insn is a vector jump, add the size of the table
17464 and skip the table. */
17465 if (tablejump_p (insn
, NULL
, &table
))
17467 address
+= get_jump_table_size (table
);
17471 else if (LABEL_P (insn
))
17472 /* Add the worst-case padding due to alignment. We don't add
17473 the _current_ padding because the minipool insertions
17474 themselves might change it. */
17475 address
+= get_label_padding (insn
);
17478 fix
= minipool_fix_head
;
17480 /* Now scan the fixups and perform the required changes. */
17485 Mfix
* last_added_fix
;
17486 Mfix
* last_barrier
= NULL
;
17489 /* Skip any further barriers before the next fix. */
17490 while (fix
&& BARRIER_P (fix
->insn
))
17493 /* No more fixes. */
17497 last_added_fix
= NULL
;
17499 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17501 if (BARRIER_P (ftmp
->insn
))
17503 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17506 last_barrier
= ftmp
;
17508 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17511 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17514 /* If we found a barrier, drop back to that; any fixes that we
17515 could have reached but come after the barrier will now go in
17516 the next mini-pool. */
17517 if (last_barrier
!= NULL
)
17519 /* Reduce the refcount for those fixes that won't go into this
17521 for (fdel
= last_barrier
->next
;
17522 fdel
&& fdel
!= ftmp
;
17525 fdel
->minipool
->refcount
--;
17526 fdel
->minipool
= NULL
;
17529 ftmp
= last_barrier
;
17533 /* ftmp is first fix that we can't fit into this pool and
17534 there no natural barriers that we could use. Insert a
17535 new barrier in the code somewhere between the previous
17536 fix and this one, and arrange to jump around it. */
17537 HOST_WIDE_INT max_address
;
17539 /* The last item on the list of fixes must be a barrier, so
17540 we can never run off the end of the list of fixes without
17541 last_barrier being set. */
17544 max_address
= minipool_vector_head
->max_address
;
17545 /* Check that there isn't another fix that is in range that
17546 we couldn't fit into this pool because the pool was
17547 already too large: we need to put the pool before such an
17548 instruction. The pool itself may come just after the
17549 fix because create_fix_barrier also allows space for a
17550 jump instruction. */
17551 if (ftmp
->address
< max_address
)
17552 max_address
= ftmp
->address
+ 1;
17554 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17557 assign_minipool_offsets (last_barrier
);
17561 if (!BARRIER_P (ftmp
->insn
)
17562 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17569 /* Scan over the fixes we have identified for this pool, fixing them
17570 up and adding the constants to the pool itself. */
17571 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17572 this_fix
= this_fix
->next
)
17573 if (!BARRIER_P (this_fix
->insn
))
17576 = plus_constant (Pmode
,
17577 gen_rtx_LABEL_REF (VOIDmode
,
17578 minipool_vector_label
),
17579 this_fix
->minipool
->offset
);
17580 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17583 dump_minipool (last_barrier
->insn
);
17587 /* From now on we must synthesize any constants that we can't handle
17588 directly. This can happen if the RTL gets split during final
17589 instruction generation. */
17590 cfun
->machine
->after_arm_reorg
= 1;
17592 /* Free the minipool memory. */
17593 obstack_free (&minipool_obstack
, minipool_startobj
);
17596 /* Routines to output assembly language. */
17598 /* Return string representation of passed in real value. */
17599 static const char *
17600 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17602 if (!fp_consts_inited
)
17605 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17609 /* OPERANDS[0] is the entire list of insns that constitute pop,
17610 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17611 is in the list, UPDATE is true iff the list contains explicit
17612 update of base register. */
17614 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17620 const char *conditional
;
17621 int num_saves
= XVECLEN (operands
[0], 0);
17622 unsigned int regno
;
17623 unsigned int regno_base
= REGNO (operands
[1]);
17626 offset
+= update
? 1 : 0;
17627 offset
+= return_pc
? 1 : 0;
17629 /* Is the base register in the list? */
17630 for (i
= offset
; i
< num_saves
; i
++)
17632 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17633 /* If SP is in the list, then the base register must be SP. */
17634 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17635 /* If base register is in the list, there must be no explicit update. */
17636 if (regno
== regno_base
)
17637 gcc_assert (!update
);
17640 conditional
= reverse
? "%?%D0" : "%?%d0";
17641 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17643 /* Output pop (not stmfd) because it has a shorter encoding. */
17644 gcc_assert (update
);
17645 sprintf (pattern
, "pop%s\t{", conditional
);
17649 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17650 It's just a convention, their semantics are identical. */
17651 if (regno_base
== SP_REGNUM
)
17652 sprintf (pattern
, "ldm%sfd\t", conditional
);
17653 else if (TARGET_UNIFIED_ASM
)
17654 sprintf (pattern
, "ldmia%s\t", conditional
);
17656 sprintf (pattern
, "ldm%sia\t", conditional
);
17658 strcat (pattern
, reg_names
[regno_base
]);
17660 strcat (pattern
, "!, {");
17662 strcat (pattern
, ", {");
17665 /* Output the first destination register. */
17667 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17669 /* Output the rest of the destination registers. */
17670 for (i
= offset
+ 1; i
< num_saves
; i
++)
17672 strcat (pattern
, ", ");
17674 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17677 strcat (pattern
, "}");
17679 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17680 strcat (pattern
, "^");
17682 output_asm_insn (pattern
, &cond
);
17686 /* Output the assembly for a store multiple. */
17689 vfp_output_vstmd (rtx
* operands
)
17695 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17696 ? XEXP (operands
[0], 0)
17697 : XEXP (XEXP (operands
[0], 0), 0);
17698 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17701 strcpy (pattern
, "vpush%?.64\t{%P1");
17703 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17705 p
= strlen (pattern
);
17707 gcc_assert (REG_P (operands
[1]));
17709 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17710 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17712 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17714 strcpy (&pattern
[p
], "}");
17716 output_asm_insn (pattern
, operands
);
17721 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17722 number of bytes pushed. */
17725 vfp_emit_fstmd (int base_reg
, int count
)
17732 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17733 register pairs are stored by a store multiple insn. We avoid this
17734 by pushing an extra pair. */
17735 if (count
== 2 && !arm_arch6
)
17737 if (base_reg
== LAST_VFP_REGNUM
- 3)
17742 /* FSTMD may not store more than 16 doubleword registers at once. Split
17743 larger stores into multiple parts (up to a maximum of two, in
17748 /* NOTE: base_reg is an internal register number, so each D register
17750 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17751 saved
+= vfp_emit_fstmd (base_reg
, 16);
17755 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17756 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17758 reg
= gen_rtx_REG (DFmode
, base_reg
);
17761 XVECEXP (par
, 0, 0)
17762 = gen_rtx_SET (VOIDmode
,
17765 gen_rtx_PRE_MODIFY (Pmode
,
17768 (Pmode
, stack_pointer_rtx
,
17771 gen_rtx_UNSPEC (BLKmode
,
17772 gen_rtvec (1, reg
),
17773 UNSPEC_PUSH_MULT
));
17775 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17776 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17777 RTX_FRAME_RELATED_P (tmp
) = 1;
17778 XVECEXP (dwarf
, 0, 0) = tmp
;
17780 tmp
= gen_rtx_SET (VOIDmode
,
17781 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17783 RTX_FRAME_RELATED_P (tmp
) = 1;
17784 XVECEXP (dwarf
, 0, 1) = tmp
;
17786 for (i
= 1; i
< count
; i
++)
17788 reg
= gen_rtx_REG (DFmode
, base_reg
);
17790 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17792 tmp
= gen_rtx_SET (VOIDmode
,
17793 gen_frame_mem (DFmode
,
17794 plus_constant (Pmode
,
17798 RTX_FRAME_RELATED_P (tmp
) = 1;
17799 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17802 par
= emit_insn (par
);
17803 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17804 RTX_FRAME_RELATED_P (par
) = 1;
17809 /* Emit a call instruction with pattern PAT. ADDR is the address of
17810 the call target. */
17813 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17817 insn
= emit_call_insn (pat
);
17819 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17820 If the call might use such an entry, add a use of the PIC register
17821 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17822 if (TARGET_VXWORKS_RTP
17825 && GET_CODE (addr
) == SYMBOL_REF
17826 && (SYMBOL_REF_DECL (addr
)
17827 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17828 : !SYMBOL_REF_LOCAL_P (addr
)))
17830 require_pic_register ();
17831 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17834 if (TARGET_AAPCS_BASED
)
17836 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17837 linker. We need to add an IP clobber to allow setting
17838 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17839 is not needed since it's a fixed register. */
17840 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17841 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17845 /* Output a 'call' insn. */
17847 output_call (rtx
*operands
)
17849 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17851 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17852 if (REGNO (operands
[0]) == LR_REGNUM
)
17854 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17855 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17858 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17860 if (TARGET_INTERWORK
|| arm_arch4t
)
17861 output_asm_insn ("bx%?\t%0", operands
);
17863 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17868 /* Output a 'call' insn that is a reference in memory. This is
17869 disabled for ARMv5 and we prefer a blx instead because otherwise
17870 there's a significant performance overhead. */
17872 output_call_mem (rtx
*operands
)
17874 gcc_assert (!arm_arch5
);
17875 if (TARGET_INTERWORK
)
17877 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17878 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17879 output_asm_insn ("bx%?\t%|ip", operands
);
17881 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17883 /* LR is used in the memory address. We load the address in the
17884 first instruction. It's safe to use IP as the target of the
17885 load since the call will kill it anyway. */
17886 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17887 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17889 output_asm_insn ("bx%?\t%|ip", operands
);
17891 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17895 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17896 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17903 /* Output a move from arm registers to arm registers of a long double
17904 OPERANDS[0] is the destination.
17905 OPERANDS[1] is the source. */
17907 output_mov_long_double_arm_from_arm (rtx
*operands
)
17909 /* We have to be careful here because the two might overlap. */
17910 int dest_start
= REGNO (operands
[0]);
17911 int src_start
= REGNO (operands
[1]);
17915 if (dest_start
< src_start
)
17917 for (i
= 0; i
< 3; i
++)
17919 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17920 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17921 output_asm_insn ("mov%?\t%0, %1", ops
);
17926 for (i
= 2; i
>= 0; i
--)
17928 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17929 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17930 output_asm_insn ("mov%?\t%0, %1", ops
);
17938 arm_emit_movpair (rtx dest
, rtx src
)
17940 /* If the src is an immediate, simplify it. */
17941 if (CONST_INT_P (src
))
17943 HOST_WIDE_INT val
= INTVAL (src
);
17944 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17945 if ((val
>> 16) & 0x0000ffff)
17946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17948 GEN_INT ((val
>> 16) & 0x0000ffff));
17951 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17952 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17955 /* Output a move between double words. It must be REG<-MEM
17958 output_move_double (rtx
*operands
, bool emit
, int *count
)
17960 enum rtx_code code0
= GET_CODE (operands
[0]);
17961 enum rtx_code code1
= GET_CODE (operands
[1]);
17966 /* The only case when this might happen is when
17967 you are looking at the length of a DImode instruction
17968 that has an invalid constant in it. */
17969 if (code0
== REG
&& code1
!= MEM
)
17971 gcc_assert (!emit
);
17978 unsigned int reg0
= REGNO (operands
[0]);
17980 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17982 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17984 switch (GET_CODE (XEXP (operands
[1], 0)))
17991 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17992 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17994 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17999 gcc_assert (TARGET_LDRD
);
18001 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
18008 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
18010 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
18018 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
18020 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
18025 gcc_assert (TARGET_LDRD
);
18027 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
18032 /* Autoicrement addressing modes should never have overlapping
18033 base and destination registers, and overlapping index registers
18034 are already prohibited, so this doesn't need to worry about
18036 otherops
[0] = operands
[0];
18037 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18038 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18040 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18042 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18044 /* Registers overlap so split out the increment. */
18047 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18048 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18055 /* Use a single insn if we can.
18056 FIXME: IWMMXT allows offsets larger than ldrd can
18057 handle, fix these up with a pair of ldr. */
18059 || !CONST_INT_P (otherops
[2])
18060 || (INTVAL (otherops
[2]) > -256
18061 && INTVAL (otherops
[2]) < 256))
18064 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18070 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18071 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18081 /* Use a single insn if we can.
18082 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18083 fix these up with a pair of ldr. */
18085 || !CONST_INT_P (otherops
[2])
18086 || (INTVAL (otherops
[2]) > -256
18087 && INTVAL (otherops
[2]) < 256))
18090 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18096 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18097 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18107 /* We might be able to use ldrd %0, %1 here. However the range is
18108 different to ldr/adr, and it is broken on some ARMv7-M
18109 implementations. */
18110 /* Use the second register of the pair to avoid problematic
18112 otherops
[1] = operands
[1];
18114 output_asm_insn ("adr%?\t%0, %1", otherops
);
18115 operands
[1] = otherops
[0];
18119 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18121 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18128 /* ??? This needs checking for thumb2. */
18130 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18131 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18133 otherops
[0] = operands
[0];
18134 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18135 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18137 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18139 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18141 switch ((int) INTVAL (otherops
[2]))
18145 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18151 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18157 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18161 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18162 operands
[1] = otherops
[0];
18164 && (REG_P (otherops
[2])
18166 || (CONST_INT_P (otherops
[2])
18167 && INTVAL (otherops
[2]) > -256
18168 && INTVAL (otherops
[2]) < 256)))
18170 if (reg_overlap_mentioned_p (operands
[0],
18174 /* Swap base and index registers over to
18175 avoid a conflict. */
18177 otherops
[1] = otherops
[2];
18180 /* If both registers conflict, it will usually
18181 have been fixed by a splitter. */
18182 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18183 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18187 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18188 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18195 otherops
[0] = operands
[0];
18197 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18202 if (CONST_INT_P (otherops
[2]))
18206 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18207 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18209 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18215 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18221 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18228 return "ldr%(d%)\t%0, [%1]";
18230 return "ldm%(ia%)\t%1, %M0";
18234 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18235 /* Take care of overlapping base/data reg. */
18236 if (reg_mentioned_p (operands
[0], operands
[1]))
18240 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18241 output_asm_insn ("ldr%?\t%0, %1", operands
);
18251 output_asm_insn ("ldr%?\t%0, %1", operands
);
18252 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18262 /* Constraints should ensure this. */
18263 gcc_assert (code0
== MEM
&& code1
== REG
);
18264 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18265 || (TARGET_ARM
&& TARGET_LDRD
));
18267 switch (GET_CODE (XEXP (operands
[0], 0)))
18273 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18275 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18280 gcc_assert (TARGET_LDRD
);
18282 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18289 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18291 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18299 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18301 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18306 gcc_assert (TARGET_LDRD
);
18308 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18313 otherops
[0] = operands
[1];
18314 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18315 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18317 /* IWMMXT allows offsets larger than ldrd can handle,
18318 fix these up with a pair of ldr. */
18320 && CONST_INT_P (otherops
[2])
18321 && (INTVAL(otherops
[2]) <= -256
18322 || INTVAL(otherops
[2]) >= 256))
18324 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18328 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18329 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18338 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18339 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18345 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18348 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18353 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18358 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18359 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18361 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18365 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18372 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18379 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18384 && (REG_P (otherops
[2])
18386 || (CONST_INT_P (otherops
[2])
18387 && INTVAL (otherops
[2]) > -256
18388 && INTVAL (otherops
[2]) < 256)))
18390 otherops
[0] = operands
[1];
18391 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18393 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18399 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18400 otherops
[1] = operands
[1];
18403 output_asm_insn ("str%?\t%1, %0", operands
);
18404 output_asm_insn ("str%?\t%H1, %0", otherops
);
18414 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18415 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18418 output_move_quad (rtx
*operands
)
18420 if (REG_P (operands
[0]))
18422 /* Load, or reg->reg move. */
18424 if (MEM_P (operands
[1]))
18426 switch (GET_CODE (XEXP (operands
[1], 0)))
18429 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18434 output_asm_insn ("adr%?\t%0, %1", operands
);
18435 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18439 gcc_unreachable ();
18447 gcc_assert (REG_P (operands
[1]));
18449 dest
= REGNO (operands
[0]);
18450 src
= REGNO (operands
[1]);
18452 /* This seems pretty dumb, but hopefully GCC won't try to do it
18455 for (i
= 0; i
< 4; i
++)
18457 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18458 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18459 output_asm_insn ("mov%?\t%0, %1", ops
);
18462 for (i
= 3; i
>= 0; i
--)
18464 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18465 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18466 output_asm_insn ("mov%?\t%0, %1", ops
);
18472 gcc_assert (MEM_P (operands
[0]));
18473 gcc_assert (REG_P (operands
[1]));
18474 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18476 switch (GET_CODE (XEXP (operands
[0], 0)))
18479 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18483 gcc_unreachable ();
18490 /* Output a VFP load or store instruction. */
18493 output_move_vfp (rtx
*operands
)
18495 rtx reg
, mem
, addr
, ops
[2];
18496 int load
= REG_P (operands
[0]);
18497 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18498 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18503 reg
= operands
[!load
];
18504 mem
= operands
[load
];
18506 mode
= GET_MODE (reg
);
18508 gcc_assert (REG_P (reg
));
18509 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18510 gcc_assert (mode
== SFmode
18514 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18515 gcc_assert (MEM_P (mem
));
18517 addr
= XEXP (mem
, 0);
18519 switch (GET_CODE (addr
))
18522 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18523 ops
[0] = XEXP (addr
, 0);
18528 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18529 ops
[0] = XEXP (addr
, 0);
18534 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18540 sprintf (buff
, templ
,
18541 load
? "ld" : "st",
18544 integer_p
? "\t%@ int" : "");
18545 output_asm_insn (buff
, ops
);
18550 /* Output a Neon double-word or quad-word load or store, or a load
18551 or store for larger structure modes.
18553 WARNING: The ordering of elements is weird in big-endian mode,
18554 because the EABI requires that vectors stored in memory appear
18555 as though they were stored by a VSTM, as required by the EABI.
18556 GCC RTL defines element ordering based on in-memory order.
18557 This can be different from the architectural ordering of elements
18558 within a NEON register. The intrinsics defined in arm_neon.h use the
18559 NEON register element ordering, not the GCC RTL element ordering.
18561 For example, the in-memory ordering of a big-endian a quadword
18562 vector with 16-bit elements when stored from register pair {d0,d1}
18563 will be (lowest address first, d0[N] is NEON register element N):
18565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18567 When necessary, quadword registers (dN, dN+1) are moved to ARM
18568 registers from rN in the order:
18570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18572 So that STM/LDM can be used on vectors in ARM registers, and the
18573 same memory layout will result as if VSTM/VLDM were used.
18575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18576 possible, which allows use of appropriate alignment tags.
18577 Note that the choice of "64" is independent of the actual vector
18578 element size; this size simply ensures that the behavior is
18579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18581 Due to limitations of those instructions, use of VST1.64/VLD1.64
18582 is not possible if:
18583 - the address contains PRE_DEC, or
18584 - the mode refers to more than 4 double-word registers
18586 In those cases, it would be possible to replace VSTM/VLDM by a
18587 sequence of instructions; this is not currently implemented since
18588 this is not certain to actually improve performance. */
18591 output_move_neon (rtx
*operands
)
18593 rtx reg
, mem
, addr
, ops
[2];
18594 int regno
, nregs
, load
= REG_P (operands
[0]);
18599 reg
= operands
[!load
];
18600 mem
= operands
[load
];
18602 mode
= GET_MODE (reg
);
18604 gcc_assert (REG_P (reg
));
18605 regno
= REGNO (reg
);
18606 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18608 || NEON_REGNO_OK_FOR_QUAD (regno
));
18609 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18610 || VALID_NEON_QREG_MODE (mode
)
18611 || VALID_NEON_STRUCT_MODE (mode
));
18612 gcc_assert (MEM_P (mem
));
18614 addr
= XEXP (mem
, 0);
18616 /* Strip off const from addresses like (const (plus (...))). */
18617 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18618 addr
= XEXP (addr
, 0);
18620 switch (GET_CODE (addr
))
18623 /* We have to use vldm / vstm for too-large modes. */
18626 templ
= "v%smia%%?\t%%0!, %%h1";
18627 ops
[0] = XEXP (addr
, 0);
18631 templ
= "v%s1.64\t%%h1, %%A0";
18638 /* We have to use vldm / vstm in this case, since there is no
18639 pre-decrement form of the vld1 / vst1 instructions. */
18640 templ
= "v%smdb%%?\t%%0!, %%h1";
18641 ops
[0] = XEXP (addr
, 0);
18646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18647 gcc_unreachable ();
18650 /* We have to use vldm / vstm for too-large modes. */
18654 templ
= "v%smia%%?\t%%m0, %%h1";
18656 templ
= "v%s1.64\t%%h1, %%A0";
18662 /* Fall through. */
18668 for (i
= 0; i
< nregs
; i
++)
18670 /* We're only using DImode here because it's a convenient size. */
18671 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18672 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18673 if (reg_overlap_mentioned_p (ops
[0], mem
))
18675 gcc_assert (overlap
== -1);
18680 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18681 output_asm_insn (buff
, ops
);
18686 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18687 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18688 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18689 output_asm_insn (buff
, ops
);
18696 gcc_unreachable ();
18699 sprintf (buff
, templ
, load
? "ld" : "st");
18700 output_asm_insn (buff
, ops
);
18705 /* Compute and return the length of neon_mov<mode>, where <mode> is
18706 one of VSTRUCT modes: EI, OI, CI or XI. */
18708 arm_attr_length_move_neon (rtx_insn
*insn
)
18710 rtx reg
, mem
, addr
;
18714 extract_insn_cached (insn
);
18716 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18718 mode
= GET_MODE (recog_data
.operand
[0]);
18729 gcc_unreachable ();
18733 load
= REG_P (recog_data
.operand
[0]);
18734 reg
= recog_data
.operand
[!load
];
18735 mem
= recog_data
.operand
[load
];
18737 gcc_assert (MEM_P (mem
));
18739 mode
= GET_MODE (reg
);
18740 addr
= XEXP (mem
, 0);
18742 /* Strip off const from addresses like (const (plus (...))). */
18743 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18744 addr
= XEXP (addr
, 0);
18746 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18748 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18759 arm_address_offset_is_imm (rtx_insn
*insn
)
18763 extract_insn_cached (insn
);
18765 if (REG_P (recog_data
.operand
[0]))
18768 mem
= recog_data
.operand
[0];
18770 gcc_assert (MEM_P (mem
));
18772 addr
= XEXP (mem
, 0);
18775 || (GET_CODE (addr
) == PLUS
18776 && REG_P (XEXP (addr
, 0))
18777 && CONST_INT_P (XEXP (addr
, 1))))
18783 /* Output an ADD r, s, #n where n may be too big for one instruction.
18784 If adding zero to one register, output nothing. */
18786 output_add_immediate (rtx
*operands
)
18788 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18790 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18793 output_multi_immediate (operands
,
18794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18797 output_multi_immediate (operands
,
18798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18805 /* Output a multiple immediate operation.
18806 OPERANDS is the vector of operands referred to in the output patterns.
18807 INSTR1 is the output pattern to use for the first constant.
18808 INSTR2 is the output pattern to use for subsequent constants.
18809 IMMED_OP is the index of the constant slot in OPERANDS.
18810 N is the constant value. */
18811 static const char *
18812 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18813 int immed_op
, HOST_WIDE_INT n
)
18815 #if HOST_BITS_PER_WIDE_INT > 32
18821 /* Quick and easy output. */
18822 operands
[immed_op
] = const0_rtx
;
18823 output_asm_insn (instr1
, operands
);
18828 const char * instr
= instr1
;
18830 /* Note that n is never zero here (which would give no output). */
18831 for (i
= 0; i
< 32; i
+= 2)
18835 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18836 output_asm_insn (instr
, operands
);
18846 /* Return the name of a shifter operation. */
18847 static const char *
18848 arm_shift_nmem(enum rtx_code code
)
18853 return ARM_LSL_NAME
;
18869 /* Return the appropriate ARM instruction for the operation code.
18870 The returned result should not be overwritten. OP is the rtx of the
18871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18874 arithmetic_instr (rtx op
, int shift_first_arg
)
18876 switch (GET_CODE (op
))
18882 return shift_first_arg
? "rsb" : "sub";
18897 return arm_shift_nmem(GET_CODE(op
));
18900 gcc_unreachable ();
18904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18905 for the operation code. The returned result should not be overwritten.
18906 OP is the rtx code of the shift.
18907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18909 static const char *
18910 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18913 enum rtx_code code
= GET_CODE (op
);
18918 if (!CONST_INT_P (XEXP (op
, 1)))
18920 output_operand_lossage ("invalid shift operand");
18925 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18933 mnem
= arm_shift_nmem(code
);
18934 if (CONST_INT_P (XEXP (op
, 1)))
18936 *amountp
= INTVAL (XEXP (op
, 1));
18938 else if (REG_P (XEXP (op
, 1)))
18945 output_operand_lossage ("invalid shift operand");
18951 /* We never have to worry about the amount being other than a
18952 power of 2, since this case can never be reloaded from a reg. */
18953 if (!CONST_INT_P (XEXP (op
, 1)))
18955 output_operand_lossage ("invalid shift operand");
18959 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18961 /* Amount must be a power of two. */
18962 if (*amountp
& (*amountp
- 1))
18964 output_operand_lossage ("invalid shift operand");
18968 *amountp
= int_log2 (*amountp
);
18969 return ARM_LSL_NAME
;
18972 output_operand_lossage ("invalid shift operand");
18976 /* This is not 100% correct, but follows from the desire to merge
18977 multiplication by a power of 2 with the recognizer for a
18978 shift. >=32 is not a valid shift for "lsl", so we must try and
18979 output a shift that produces the correct arithmetical result.
18980 Using lsr #32 is identical except for the fact that the carry bit
18981 is not set correctly if we set the flags; but we never use the
18982 carry bit from such an operation, so we can ignore that. */
18983 if (code
== ROTATERT
)
18984 /* Rotate is just modulo 32. */
18986 else if (*amountp
!= (*amountp
& 31))
18988 if (code
== ASHIFT
)
18993 /* Shifts of 0 are no-ops. */
19000 /* Obtain the shift from the POWER of two. */
19002 static HOST_WIDE_INT
19003 int_log2 (HOST_WIDE_INT power
)
19005 HOST_WIDE_INT shift
= 0;
19007 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
19009 gcc_assert (shift
<= 31);
19016 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19017 because /bin/as is horribly restrictive. The judgement about
19018 whether or not each character is 'printable' (and can be output as
19019 is) or not (and must be printed with an octal escape) must be made
19020 with reference to the *host* character set -- the situation is
19021 similar to that discussed in the comments above pp_c_char in
19022 c-pretty-print.c. */
19024 #define MAX_ASCII_LEN 51
19027 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19030 int len_so_far
= 0;
19032 fputs ("\t.ascii\t\"", stream
);
19034 for (i
= 0; i
< len
; i
++)
19038 if (len_so_far
>= MAX_ASCII_LEN
)
19040 fputs ("\"\n\t.ascii\t\"", stream
);
19046 if (c
== '\\' || c
== '\"')
19048 putc ('\\', stream
);
19056 fprintf (stream
, "\\%03o", c
);
19061 fputs ("\"\n", stream
);
19064 /* Compute the register save mask for registers 0 through 12
19065 inclusive. This code is used by arm_compute_save_reg_mask. */
19067 static unsigned long
19068 arm_compute_save_reg0_reg12_mask (void)
19070 unsigned long func_type
= arm_current_func_type ();
19071 unsigned long save_reg_mask
= 0;
19074 if (IS_INTERRUPT (func_type
))
19076 unsigned int max_reg
;
19077 /* Interrupt functions must not corrupt any registers,
19078 even call clobbered ones. If this is a leaf function
19079 we can just examine the registers used by the RTL, but
19080 otherwise we have to assume that whatever function is
19081 called might clobber anything, and so we have to save
19082 all the call-clobbered registers as well. */
19083 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19084 /* FIQ handlers have registers r8 - r12 banked, so
19085 we only need to check r0 - r7, Normal ISRs only
19086 bank r14 and r15, so we must check up to r12.
19087 r13 is the stack pointer which is always preserved,
19088 so we do not need to consider it here. */
19093 for (reg
= 0; reg
<= max_reg
; reg
++)
19094 if (df_regs_ever_live_p (reg
)
19095 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19096 save_reg_mask
|= (1 << reg
);
19098 /* Also save the pic base register if necessary. */
19100 && !TARGET_SINGLE_PIC_BASE
19101 && arm_pic_register
!= INVALID_REGNUM
19102 && crtl
->uses_pic_offset_table
)
19103 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19105 else if (IS_VOLATILE(func_type
))
19107 /* For noreturn functions we historically omitted register saves
19108 altogether. However this really messes up debugging. As a
19109 compromise save just the frame pointers. Combined with the link
19110 register saved elsewhere this should be sufficient to get
19112 if (frame_pointer_needed
)
19113 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19114 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19115 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19116 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19117 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19121 /* In the normal case we only need to save those registers
19122 which are call saved and which are used by this function. */
19123 for (reg
= 0; reg
<= 11; reg
++)
19124 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
19125 save_reg_mask
|= (1 << reg
);
19127 /* Handle the frame pointer as a special case. */
19128 if (frame_pointer_needed
)
19129 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19131 /* If we aren't loading the PIC register,
19132 don't stack it even though it may be live. */
19134 && !TARGET_SINGLE_PIC_BASE
19135 && arm_pic_register
!= INVALID_REGNUM
19136 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19137 || crtl
->uses_pic_offset_table
))
19138 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19140 /* The prologue will copy SP into R0, so save it. */
19141 if (IS_STACKALIGN (func_type
))
19142 save_reg_mask
|= 1;
19145 /* Save registers so the exception handler can modify them. */
19146 if (crtl
->calls_eh_return
)
19152 reg
= EH_RETURN_DATA_REGNO (i
);
19153 if (reg
== INVALID_REGNUM
)
19155 save_reg_mask
|= 1 << reg
;
19159 return save_reg_mask
;
19162 /* Return true if r3 is live at the start of the function. */
19165 arm_r3_live_at_start_p (void)
19167 /* Just look at cfg info, which is still close enough to correct at this
19168 point. This gives false positives for broken functions that might use
19169 uninitialized data that happens to be allocated in r3, but who cares? */
19170 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19173 /* Compute the number of bytes used to store the static chain register on the
19174 stack, above the stack frame. We need to know this accurately to get the
19175 alignment of the rest of the stack frame correct. */
19178 arm_compute_static_chain_stack_bytes (void)
19180 /* See the defining assertion in arm_expand_prologue. */
19181 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19182 && IS_NESTED (arm_current_func_type ())
19183 && arm_r3_live_at_start_p ()
19184 && crtl
->args
.pretend_args_size
== 0)
19190 /* Compute a bit mask of which registers need to be
19191 saved on the stack for the current function.
19192 This is used by arm_get_frame_offsets, which may add extra registers. */
19194 static unsigned long
19195 arm_compute_save_reg_mask (void)
19197 unsigned int save_reg_mask
= 0;
19198 unsigned long func_type
= arm_current_func_type ();
19201 if (IS_NAKED (func_type
))
19202 /* This should never really happen. */
19205 /* If we are creating a stack frame, then we must save the frame pointer,
19206 IP (which will hold the old stack pointer), LR and the PC. */
19207 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19209 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19212 | (1 << PC_REGNUM
);
19214 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19216 /* Decide if we need to save the link register.
19217 Interrupt routines have their own banked link register,
19218 so they never need to save it.
19219 Otherwise if we do not use the link register we do not need to save
19220 it. If we are pushing other registers onto the stack however, we
19221 can save an instruction in the epilogue by pushing the link register
19222 now and then popping it back into the PC. This incurs extra memory
19223 accesses though, so we only do it when optimizing for size, and only
19224 if we know that we will not need a fancy return sequence. */
19225 if (df_regs_ever_live_p (LR_REGNUM
)
19228 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19229 && !crtl
->calls_eh_return
))
19230 save_reg_mask
|= 1 << LR_REGNUM
;
19232 if (cfun
->machine
->lr_save_eliminated
)
19233 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19235 if (TARGET_REALLY_IWMMXT
19236 && ((bit_count (save_reg_mask
)
19237 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19238 arm_compute_static_chain_stack_bytes())
19241 /* The total number of registers that are going to be pushed
19242 onto the stack is odd. We need to ensure that the stack
19243 is 64-bit aligned before we start to save iWMMXt registers,
19244 and also before we start to create locals. (A local variable
19245 might be a double or long long which we will load/store using
19246 an iWMMXt instruction). Therefore we need to push another
19247 ARM register, so that the stack will be 64-bit aligned. We
19248 try to avoid using the arg registers (r0 -r3) as they might be
19249 used to pass values in a tail call. */
19250 for (reg
= 4; reg
<= 12; reg
++)
19251 if ((save_reg_mask
& (1 << reg
)) == 0)
19255 save_reg_mask
|= (1 << reg
);
19258 cfun
->machine
->sibcall_blocked
= 1;
19259 save_reg_mask
|= (1 << 3);
19263 /* We may need to push an additional register for use initializing the
19264 PIC base register. */
19265 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19266 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19268 reg
= thumb_find_work_register (1 << 4);
19269 if (!call_used_regs
[reg
])
19270 save_reg_mask
|= (1 << reg
);
19273 return save_reg_mask
;
19277 /* Compute a bit mask of which registers need to be
19278 saved on the stack for the current function. */
19279 static unsigned long
19280 thumb1_compute_save_reg_mask (void)
19282 unsigned long mask
;
19286 for (reg
= 0; reg
< 12; reg
++)
19287 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19291 && !TARGET_SINGLE_PIC_BASE
19292 && arm_pic_register
!= INVALID_REGNUM
19293 && crtl
->uses_pic_offset_table
)
19294 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19296 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19297 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19298 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19300 /* LR will also be pushed if any lo regs are pushed. */
19301 if (mask
& 0xff || thumb_force_lr_save ())
19302 mask
|= (1 << LR_REGNUM
);
19304 /* Make sure we have a low work register if we need one.
19305 We will need one if we are going to push a high register,
19306 but we are not currently intending to push a low register. */
19307 if ((mask
& 0xff) == 0
19308 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19310 /* Use thumb_find_work_register to choose which register
19311 we will use. If the register is live then we will
19312 have to push it. Use LAST_LO_REGNUM as our fallback
19313 choice for the register to select. */
19314 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19315 /* Make sure the register returned by thumb_find_work_register is
19316 not part of the return value. */
19317 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19318 reg
= LAST_LO_REGNUM
;
19320 if (! call_used_regs
[reg
])
19324 /* The 504 below is 8 bytes less than 512 because there are two possible
19325 alignment words. We can't tell here if they will be present or not so we
19326 have to play it safe and assume that they are. */
19327 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19328 ROUND_UP_WORD (get_frame_size ()) +
19329 crtl
->outgoing_args_size
) >= 504)
19331 /* This is the same as the code in thumb1_expand_prologue() which
19332 determines which register to use for stack decrement. */
19333 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19334 if (mask
& (1 << reg
))
19337 if (reg
> LAST_LO_REGNUM
)
19339 /* Make sure we have a register available for stack decrement. */
19340 mask
|= 1 << LAST_LO_REGNUM
;
19348 /* Return the number of bytes required to save VFP registers. */
19350 arm_get_vfp_saved_size (void)
19352 unsigned int regno
;
19357 /* Space for saved VFP registers. */
19358 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19361 for (regno
= FIRST_VFP_REGNUM
;
19362 regno
< LAST_VFP_REGNUM
;
19365 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19366 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19370 /* Workaround ARM10 VFPr1 bug. */
19371 if (count
== 2 && !arm_arch6
)
19373 saved
+= count
* 8;
19382 if (count
== 2 && !arm_arch6
)
19384 saved
+= count
* 8;
19391 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19392 everything bar the final return instruction. If simple_return is true,
19393 then do not output epilogue, because it has already been emitted in RTL. */
19395 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19396 bool simple_return
)
19398 char conditional
[10];
19401 unsigned long live_regs_mask
;
19402 unsigned long func_type
;
19403 arm_stack_offsets
*offsets
;
19405 func_type
= arm_current_func_type ();
19407 if (IS_NAKED (func_type
))
19410 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19412 /* If this function was declared non-returning, and we have
19413 found a tail call, then we have to trust that the called
19414 function won't return. */
19419 /* Otherwise, trap an attempted return by aborting. */
19421 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19423 assemble_external_libcall (ops
[1]);
19424 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19430 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19432 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19434 cfun
->machine
->return_used_this_function
= 1;
19436 offsets
= arm_get_frame_offsets ();
19437 live_regs_mask
= offsets
->saved_regs_mask
;
19439 if (!simple_return
&& live_regs_mask
)
19441 const char * return_reg
;
19443 /* If we do not have any special requirements for function exit
19444 (e.g. interworking) then we can load the return address
19445 directly into the PC. Otherwise we must load it into LR. */
19447 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19448 return_reg
= reg_names
[PC_REGNUM
];
19450 return_reg
= reg_names
[LR_REGNUM
];
19452 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19454 /* There are three possible reasons for the IP register
19455 being saved. 1) a stack frame was created, in which case
19456 IP contains the old stack pointer, or 2) an ISR routine
19457 corrupted it, or 3) it was saved to align the stack on
19458 iWMMXt. In case 1, restore IP into SP, otherwise just
19460 if (frame_pointer_needed
)
19462 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19463 live_regs_mask
|= (1 << SP_REGNUM
);
19466 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19469 /* On some ARM architectures it is faster to use LDR rather than
19470 LDM to load a single register. On other architectures, the
19471 cost is the same. In 26 bit mode, or for exception handlers,
19472 we have to use LDM to load the PC so that the CPSR is also
19474 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19475 if (live_regs_mask
== (1U << reg
))
19478 if (reg
<= LAST_ARM_REGNUM
19479 && (reg
!= LR_REGNUM
19481 || ! IS_INTERRUPT (func_type
)))
19483 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19484 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19491 /* Generate the load multiple instruction to restore the
19492 registers. Note we can get here, even if
19493 frame_pointer_needed is true, but only if sp already
19494 points to the base of the saved core registers. */
19495 if (live_regs_mask
& (1 << SP_REGNUM
))
19497 unsigned HOST_WIDE_INT stack_adjust
;
19499 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19500 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19502 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19503 if (TARGET_UNIFIED_ASM
)
19504 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19506 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19509 /* If we can't use ldmib (SA110 bug),
19510 then try to pop r3 instead. */
19512 live_regs_mask
|= 1 << 3;
19514 if (TARGET_UNIFIED_ASM
)
19515 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19517 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19521 if (TARGET_UNIFIED_ASM
)
19522 sprintf (instr
, "pop%s\t{", conditional
);
19524 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19526 p
= instr
+ strlen (instr
);
19528 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19529 if (live_regs_mask
& (1 << reg
))
19531 int l
= strlen (reg_names
[reg
]);
19537 memcpy (p
, ", ", 2);
19541 memcpy (p
, "%|", 2);
19542 memcpy (p
+ 2, reg_names
[reg
], l
);
19546 if (live_regs_mask
& (1 << LR_REGNUM
))
19548 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19549 /* If returning from an interrupt, restore the CPSR. */
19550 if (IS_INTERRUPT (func_type
))
19557 output_asm_insn (instr
, & operand
);
19559 /* See if we need to generate an extra instruction to
19560 perform the actual function return. */
19562 && func_type
!= ARM_FT_INTERWORKED
19563 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19565 /* The return has already been handled
19566 by loading the LR into the PC. */
19573 switch ((int) ARM_FUNC_TYPE (func_type
))
19577 /* ??? This is wrong for unified assembly syntax. */
19578 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19581 case ARM_FT_INTERWORKED
:
19582 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19585 case ARM_FT_EXCEPTION
:
19586 /* ??? This is wrong for unified assembly syntax. */
19587 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19591 /* Use bx if it's available. */
19592 if (arm_arch5
|| arm_arch4t
)
19593 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19595 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19599 output_asm_insn (instr
, & operand
);
19605 /* Write the function name into the code section, directly preceding
19606 the function prologue.
19608 Code will be output similar to this:
19610 .ascii "arm_poke_function_name", 0
19613 .word 0xff000000 + (t1 - t0)
19614 arm_poke_function_name
19616 stmfd sp!, {fp, ip, lr, pc}
19619 When performing a stack backtrace, code can inspect the value
19620 of 'pc' stored at 'fp' + 0. If the trace function then looks
19621 at location pc - 12 and the top 8 bits are set, then we know
19622 that there is a function name embedded immediately preceding this
19623 location and has length ((pc[-3]) & 0xff000000).
19625 We assume that pc is declared as a pointer to an unsigned long.
19627 It is of no benefit to output the function name if we are assembling
19628 a leaf function. These function types will not contain a stack
19629 backtrace structure, therefore it is not possible to determine the
19632 arm_poke_function_name (FILE *stream
, const char *name
)
19634 unsigned long alignlength
;
19635 unsigned long length
;
19638 length
= strlen (name
) + 1;
19639 alignlength
= ROUND_UP_WORD (length
);
19641 ASM_OUTPUT_ASCII (stream
, name
, length
);
19642 ASM_OUTPUT_ALIGN (stream
, 2);
19643 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19644 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19647 /* Place some comments into the assembler stream
19648 describing the current function. */
19650 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19652 unsigned long func_type
;
19654 /* ??? Do we want to print some of the below anyway? */
19658 /* Sanity check. */
19659 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19661 func_type
= arm_current_func_type ();
19663 switch ((int) ARM_FUNC_TYPE (func_type
))
19666 case ARM_FT_NORMAL
:
19668 case ARM_FT_INTERWORKED
:
19669 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19672 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19675 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19677 case ARM_FT_EXCEPTION
:
19678 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19682 if (IS_NAKED (func_type
))
19683 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19685 if (IS_VOLATILE (func_type
))
19686 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19688 if (IS_NESTED (func_type
))
19689 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19690 if (IS_STACKALIGN (func_type
))
19691 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19693 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19695 crtl
->args
.pretend_args_size
, frame_size
);
19697 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19698 frame_pointer_needed
,
19699 cfun
->machine
->uses_anonymous_args
);
19701 if (cfun
->machine
->lr_save_eliminated
)
19702 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19704 if (crtl
->calls_eh_return
)
19705 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19710 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19711 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19713 arm_stack_offsets
*offsets
;
19719 /* Emit any call-via-reg trampolines that are needed for v4t support
19720 of call_reg and call_value_reg type insns. */
19721 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19723 rtx label
= cfun
->machine
->call_via
[regno
];
19727 switch_to_section (function_section (current_function_decl
));
19728 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19729 CODE_LABEL_NUMBER (label
));
19730 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19734 /* ??? Probably not safe to set this here, since it assumes that a
19735 function will be emitted as assembly immediately after we generate
19736 RTL for it. This does not happen for inline functions. */
19737 cfun
->machine
->return_used_this_function
= 0;
19739 else /* TARGET_32BIT */
19741 /* We need to take into account any stack-frame rounding. */
19742 offsets
= arm_get_frame_offsets ();
19744 gcc_assert (!use_return_insn (FALSE
, NULL
)
19745 || (cfun
->machine
->return_used_this_function
!= 0)
19746 || offsets
->saved_regs
== offsets
->outgoing_args
19747 || frame_pointer_needed
);
19751 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19752 STR and STRD. If an even number of registers are being pushed, one
19753 or more STRD patterns are created for each register pair. If an
19754 odd number of registers are pushed, emit an initial STR followed by
19755 as many STRD instructions as are needed. This works best when the
19756 stack is initially 64-bit aligned (the normal case), since it
19757 ensures that each STRD is also 64-bit aligned. */
19759 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19764 rtx par
= NULL_RTX
;
19765 rtx dwarf
= NULL_RTX
;
19769 num_regs
= bit_count (saved_regs_mask
);
19771 /* Must be at least one register to save, and can't save SP or PC. */
19772 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19773 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19774 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19776 /* Create sequence for DWARF info. All the frame-related data for
19777 debugging is held in this wrapper. */
19778 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19780 /* Describe the stack adjustment. */
19781 tmp
= gen_rtx_SET (VOIDmode
,
19783 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19784 RTX_FRAME_RELATED_P (tmp
) = 1;
19785 XVECEXP (dwarf
, 0, 0) = tmp
;
19787 /* Find the first register. */
19788 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19793 /* If there's an odd number of registers to push. Start off by
19794 pushing a single register. This ensures that subsequent strd
19795 operations are dword aligned (assuming that SP was originally
19796 64-bit aligned). */
19797 if ((num_regs
& 1) != 0)
19799 rtx reg
, mem
, insn
;
19801 reg
= gen_rtx_REG (SImode
, regno
);
19803 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19804 stack_pointer_rtx
));
19806 mem
= gen_frame_mem (Pmode
,
19808 (Pmode
, stack_pointer_rtx
,
19809 plus_constant (Pmode
, stack_pointer_rtx
,
19812 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19813 RTX_FRAME_RELATED_P (tmp
) = 1;
19814 insn
= emit_insn (tmp
);
19815 RTX_FRAME_RELATED_P (insn
) = 1;
19816 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19817 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19819 RTX_FRAME_RELATED_P (tmp
) = 1;
19822 XVECEXP (dwarf
, 0, i
) = tmp
;
19826 while (i
< num_regs
)
19827 if (saved_regs_mask
& (1 << regno
))
19829 rtx reg1
, reg2
, mem1
, mem2
;
19830 rtx tmp0
, tmp1
, tmp2
;
19833 /* Find the register to pair with this one. */
19834 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19838 reg1
= gen_rtx_REG (SImode
, regno
);
19839 reg2
= gen_rtx_REG (SImode
, regno2
);
19846 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19849 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19851 -4 * (num_regs
- 1)));
19852 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19853 plus_constant (Pmode
, stack_pointer_rtx
,
19855 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19856 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19857 RTX_FRAME_RELATED_P (tmp0
) = 1;
19858 RTX_FRAME_RELATED_P (tmp1
) = 1;
19859 RTX_FRAME_RELATED_P (tmp2
) = 1;
19860 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19861 XVECEXP (par
, 0, 0) = tmp0
;
19862 XVECEXP (par
, 0, 1) = tmp1
;
19863 XVECEXP (par
, 0, 2) = tmp2
;
19864 insn
= emit_insn (par
);
19865 RTX_FRAME_RELATED_P (insn
) = 1;
19866 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19870 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19873 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19876 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19877 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19878 RTX_FRAME_RELATED_P (tmp1
) = 1;
19879 RTX_FRAME_RELATED_P (tmp2
) = 1;
19880 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19881 XVECEXP (par
, 0, 0) = tmp1
;
19882 XVECEXP (par
, 0, 1) = tmp2
;
19886 /* Create unwind information. This is an approximation. */
19887 tmp1
= gen_rtx_SET (VOIDmode
,
19888 gen_frame_mem (Pmode
,
19889 plus_constant (Pmode
,
19893 tmp2
= gen_rtx_SET (VOIDmode
,
19894 gen_frame_mem (Pmode
,
19895 plus_constant (Pmode
,
19900 RTX_FRAME_RELATED_P (tmp1
) = 1;
19901 RTX_FRAME_RELATED_P (tmp2
) = 1;
19902 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19903 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19905 regno
= regno2
+ 1;
19913 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19914 whenever possible, otherwise it emits single-word stores. The first store
19915 also allocates stack space for all saved registers, using writeback with
19916 post-addressing mode. All other stores use offset addressing. If no STRD
19917 can be emitted, this function emits a sequence of single-word stores,
19918 and not an STM as before, because single-word stores provide more freedom
19919 scheduling and can be turned into an STM by peephole optimizations. */
19921 arm_emit_strd_push (unsigned long saved_regs_mask
)
19924 int i
, j
, dwarf_index
= 0;
19926 rtx dwarf
= NULL_RTX
;
19927 rtx insn
= NULL_RTX
;
19930 /* TODO: A more efficient code can be emitted by changing the
19931 layout, e.g., first push all pairs that can use STRD to keep the
19932 stack aligned, and then push all other registers. */
19933 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19934 if (saved_regs_mask
& (1 << i
))
19937 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19938 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19939 gcc_assert (num_regs
> 0);
19941 /* Create sequence for DWARF info. */
19942 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19944 /* For dwarf info, we generate explicit stack update. */
19945 tmp
= gen_rtx_SET (VOIDmode
,
19947 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19948 RTX_FRAME_RELATED_P (tmp
) = 1;
19949 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19951 /* Save registers. */
19952 offset
= - 4 * num_regs
;
19954 while (j
<= LAST_ARM_REGNUM
)
19955 if (saved_regs_mask
& (1 << j
))
19958 && (saved_regs_mask
& (1 << (j
+ 1))))
19960 /* Current register and previous register form register pair for
19961 which STRD can be generated. */
19964 /* Allocate stack space for all saved registers. */
19965 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19966 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19967 mem
= gen_frame_mem (DImode
, tmp
);
19970 else if (offset
> 0)
19971 mem
= gen_frame_mem (DImode
,
19972 plus_constant (Pmode
,
19976 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19978 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19979 RTX_FRAME_RELATED_P (tmp
) = 1;
19980 tmp
= emit_insn (tmp
);
19982 /* Record the first store insn. */
19983 if (dwarf_index
== 1)
19986 /* Generate dwarf info. */
19987 mem
= gen_frame_mem (SImode
,
19988 plus_constant (Pmode
,
19991 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19992 RTX_FRAME_RELATED_P (tmp
) = 1;
19993 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19995 mem
= gen_frame_mem (SImode
,
19996 plus_constant (Pmode
,
19999 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
20000 RTX_FRAME_RELATED_P (tmp
) = 1;
20001 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20008 /* Emit a single word store. */
20011 /* Allocate stack space for all saved registers. */
20012 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20013 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20014 mem
= gen_frame_mem (SImode
, tmp
);
20017 else if (offset
> 0)
20018 mem
= gen_frame_mem (SImode
,
20019 plus_constant (Pmode
,
20023 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20025 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20026 RTX_FRAME_RELATED_P (tmp
) = 1;
20027 tmp
= emit_insn (tmp
);
20029 /* Record the first store insn. */
20030 if (dwarf_index
== 1)
20033 /* Generate dwarf info. */
20034 mem
= gen_frame_mem (SImode
,
20035 plus_constant(Pmode
,
20038 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20039 RTX_FRAME_RELATED_P (tmp
) = 1;
20040 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20049 /* Attach dwarf info to the first insn we generate. */
20050 gcc_assert (insn
!= NULL_RTX
);
20051 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20052 RTX_FRAME_RELATED_P (insn
) = 1;
20055 /* Generate and emit an insn that we will recognize as a push_multi.
20056 Unfortunately, since this insn does not reflect very well the actual
20057 semantics of the operation, we need to annotate the insn for the benefit
20058 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20059 MASK for registers that should be annotated for DWARF2 frame unwind
20062 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20065 int num_dwarf_regs
= 0;
20069 int dwarf_par_index
;
20072 /* We don't record the PC in the dwarf frame information. */
20073 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20075 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20077 if (mask
& (1 << i
))
20079 if (dwarf_regs_mask
& (1 << i
))
20083 gcc_assert (num_regs
&& num_regs
<= 16);
20084 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20086 /* For the body of the insn we are going to generate an UNSPEC in
20087 parallel with several USEs. This allows the insn to be recognized
20088 by the push_multi pattern in the arm.md file.
20090 The body of the insn looks something like this:
20093 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20094 (const_int:SI <num>)))
20095 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20101 For the frame note however, we try to be more explicit and actually
20102 show each register being stored into the stack frame, plus a (single)
20103 decrement of the stack pointer. We do it this way in order to be
20104 friendly to the stack unwinding code, which only wants to see a single
20105 stack decrement per instruction. The RTL we generate for the note looks
20106 something like this:
20109 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20110 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20111 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20112 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20116 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20117 instead we'd have a parallel expression detailing all
20118 the stores to the various memory addresses so that debug
20119 information is more up-to-date. Remember however while writing
20120 this to take care of the constraints with the push instruction.
20122 Note also that this has to be taken care of for the VFP registers.
20124 For more see PR43399. */
20126 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20127 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20128 dwarf_par_index
= 1;
20130 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20132 if (mask
& (1 << i
))
20134 reg
= gen_rtx_REG (SImode
, i
);
20136 XVECEXP (par
, 0, 0)
20137 = gen_rtx_SET (VOIDmode
,
20140 gen_rtx_PRE_MODIFY (Pmode
,
20143 (Pmode
, stack_pointer_rtx
,
20146 gen_rtx_UNSPEC (BLKmode
,
20147 gen_rtvec (1, reg
),
20148 UNSPEC_PUSH_MULT
));
20150 if (dwarf_regs_mask
& (1 << i
))
20152 tmp
= gen_rtx_SET (VOIDmode
,
20153 gen_frame_mem (SImode
, stack_pointer_rtx
),
20155 RTX_FRAME_RELATED_P (tmp
) = 1;
20156 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20163 for (j
= 1, i
++; j
< num_regs
; i
++)
20165 if (mask
& (1 << i
))
20167 reg
= gen_rtx_REG (SImode
, i
);
20169 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20171 if (dwarf_regs_mask
& (1 << i
))
20174 = gen_rtx_SET (VOIDmode
,
20177 plus_constant (Pmode
, stack_pointer_rtx
,
20180 RTX_FRAME_RELATED_P (tmp
) = 1;
20181 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20188 par
= emit_insn (par
);
20190 tmp
= gen_rtx_SET (VOIDmode
,
20192 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20193 RTX_FRAME_RELATED_P (tmp
) = 1;
20194 XVECEXP (dwarf
, 0, 0) = tmp
;
20196 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20201 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20202 SIZE is the offset to be adjusted.
20203 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20205 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20209 RTX_FRAME_RELATED_P (insn
) = 1;
20210 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20211 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20214 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20215 SAVED_REGS_MASK shows which registers need to be restored.
20217 Unfortunately, since this insn does not reflect very well the actual
20218 semantics of the operation, we need to annotate the insn for the benefit
20219 of DWARF2 frame unwind information. */
20221 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20226 rtx dwarf
= NULL_RTX
;
20232 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20233 offset_adj
= return_in_pc
? 1 : 0;
20234 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20235 if (saved_regs_mask
& (1 << i
))
20238 gcc_assert (num_regs
&& num_regs
<= 16);
20240 /* If SP is in reglist, then we don't emit SP update insn. */
20241 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20243 /* The parallel needs to hold num_regs SETs
20244 and one SET for the stack update. */
20245 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20250 XVECEXP (par
, 0, 0) = tmp
;
20255 /* Increment the stack pointer, based on there being
20256 num_regs 4-byte registers to restore. */
20257 tmp
= gen_rtx_SET (VOIDmode
,
20259 plus_constant (Pmode
,
20262 RTX_FRAME_RELATED_P (tmp
) = 1;
20263 XVECEXP (par
, 0, offset_adj
) = tmp
;
20266 /* Now restore every reg, which may include PC. */
20267 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20268 if (saved_regs_mask
& (1 << i
))
20270 reg
= gen_rtx_REG (SImode
, i
);
20271 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20273 /* Emit single load with writeback. */
20274 tmp
= gen_frame_mem (SImode
,
20275 gen_rtx_POST_INC (Pmode
,
20276 stack_pointer_rtx
));
20277 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20278 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20282 tmp
= gen_rtx_SET (VOIDmode
,
20286 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20287 RTX_FRAME_RELATED_P (tmp
) = 1;
20288 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20290 /* We need to maintain a sequence for DWARF info too. As dwarf info
20291 should not have PC, skip PC. */
20292 if (i
!= PC_REGNUM
)
20293 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20299 par
= emit_jump_insn (par
);
20301 par
= emit_insn (par
);
20303 REG_NOTES (par
) = dwarf
;
20305 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20306 stack_pointer_rtx
, stack_pointer_rtx
);
20309 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20310 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20312 Unfortunately, since this insn does not reflect very well the actual
20313 semantics of the operation, we need to annotate the insn for the benefit
20314 of DWARF2 frame unwind information. */
20316 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20320 rtx dwarf
= NULL_RTX
;
20323 gcc_assert (num_regs
&& num_regs
<= 32);
20325 /* Workaround ARM10 VFPr1 bug. */
20326 if (num_regs
== 2 && !arm_arch6
)
20328 if (first_reg
== 15)
20334 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20335 there could be up to 32 D-registers to restore.
20336 If there are more than 16 D-registers, make two recursive calls,
20337 each of which emits one pop_multi instruction. */
20340 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20341 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20345 /* The parallel needs to hold num_regs SETs
20346 and one SET for the stack update. */
20347 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20349 /* Increment the stack pointer, based on there being
20350 num_regs 8-byte registers to restore. */
20351 tmp
= gen_rtx_SET (VOIDmode
,
20353 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20354 RTX_FRAME_RELATED_P (tmp
) = 1;
20355 XVECEXP (par
, 0, 0) = tmp
;
20357 /* Now show every reg that will be restored, using a SET for each. */
20358 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20360 reg
= gen_rtx_REG (DFmode
, i
);
20362 tmp
= gen_rtx_SET (VOIDmode
,
20366 plus_constant (Pmode
, base_reg
, 8 * j
)));
20367 RTX_FRAME_RELATED_P (tmp
) = 1;
20368 XVECEXP (par
, 0, j
+ 1) = tmp
;
20370 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20375 par
= emit_insn (par
);
20376 REG_NOTES (par
) = dwarf
;
20378 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20379 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20381 RTX_FRAME_RELATED_P (par
) = 1;
20382 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20385 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20386 base_reg
, base_reg
);
20389 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20390 number of registers are being popped, multiple LDRD patterns are created for
20391 all register pairs. If odd number of registers are popped, last register is
20392 loaded by using LDR pattern. */
20394 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20398 rtx par
= NULL_RTX
;
20399 rtx dwarf
= NULL_RTX
;
20400 rtx tmp
, reg
, tmp1
;
20403 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20404 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20405 if (saved_regs_mask
& (1 << i
))
20408 gcc_assert (num_regs
&& num_regs
<= 16);
20410 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20411 to be popped. So, if num_regs is even, now it will become odd,
20412 and we can generate pop with PC. If num_regs is odd, it will be
20413 even now, and ldr with return can be generated for PC. */
20417 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20419 /* Var j iterates over all the registers to gather all the registers in
20420 saved_regs_mask. Var i gives index of saved registers in stack frame.
20421 A PARALLEL RTX of register-pair is created here, so that pattern for
20422 LDRD can be matched. As PC is always last register to be popped, and
20423 we have already decremented num_regs if PC, we don't have to worry
20424 about PC in this loop. */
20425 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20426 if (saved_regs_mask
& (1 << j
))
20428 /* Create RTX for memory load. */
20429 reg
= gen_rtx_REG (SImode
, j
);
20430 tmp
= gen_rtx_SET (SImode
,
20432 gen_frame_mem (SImode
,
20433 plus_constant (Pmode
,
20434 stack_pointer_rtx
, 4 * i
)));
20435 RTX_FRAME_RELATED_P (tmp
) = 1;
20439 /* When saved-register index (i) is even, the RTX to be emitted is
20440 yet to be created. Hence create it first. The LDRD pattern we
20441 are generating is :
20442 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20443 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20444 where target registers need not be consecutive. */
20445 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20449 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20450 added as 0th element and if i is odd, reg_i is added as 1st element
20451 of LDRD pattern shown above. */
20452 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20453 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20457 /* When saved-register index (i) is odd, RTXs for both the registers
20458 to be loaded are generated in above given LDRD pattern, and the
20459 pattern can be emitted now. */
20460 par
= emit_insn (par
);
20461 REG_NOTES (par
) = dwarf
;
20462 RTX_FRAME_RELATED_P (par
) = 1;
20468 /* If the number of registers pushed is odd AND return_in_pc is false OR
20469 number of registers are even AND return_in_pc is true, last register is
20470 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20471 then LDR with post increment. */
20473 /* Increment the stack pointer, based on there being
20474 num_regs 4-byte registers to restore. */
20475 tmp
= gen_rtx_SET (VOIDmode
,
20477 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20478 RTX_FRAME_RELATED_P (tmp
) = 1;
20479 tmp
= emit_insn (tmp
);
20482 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20483 stack_pointer_rtx
, stack_pointer_rtx
);
20488 if (((num_regs
% 2) == 1 && !return_in_pc
)
20489 || ((num_regs
% 2) == 0 && return_in_pc
))
20491 /* Scan for the single register to be popped. Skip until the saved
20492 register is found. */
20493 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20495 /* Gen LDR with post increment here. */
20496 tmp1
= gen_rtx_MEM (SImode
,
20497 gen_rtx_POST_INC (SImode
,
20498 stack_pointer_rtx
));
20499 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20501 reg
= gen_rtx_REG (SImode
, j
);
20502 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20503 RTX_FRAME_RELATED_P (tmp
) = 1;
20504 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20508 /* If return_in_pc, j must be PC_REGNUM. */
20509 gcc_assert (j
== PC_REGNUM
);
20510 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20511 XVECEXP (par
, 0, 0) = ret_rtx
;
20512 XVECEXP (par
, 0, 1) = tmp
;
20513 par
= emit_jump_insn (par
);
20517 par
= emit_insn (tmp
);
20518 REG_NOTES (par
) = dwarf
;
20519 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20520 stack_pointer_rtx
, stack_pointer_rtx
);
20524 else if ((num_regs
% 2) == 1 && return_in_pc
)
20526 /* There are 2 registers to be popped. So, generate the pattern
20527 pop_multiple_with_stack_update_and_return to pop in PC. */
20528 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20534 /* LDRD in ARM mode needs consecutive registers as operands. This function
20535 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20536 offset addressing and then generates one separate stack udpate. This provides
20537 more scheduling freedom, compared to writeback on every load. However,
20538 if the function returns using load into PC directly
20539 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20540 before the last load. TODO: Add a peephole optimization to recognize
20541 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20542 peephole optimization to merge the load at stack-offset zero
20543 with the stack update instruction using load with writeback
20544 in post-index addressing mode. */
20546 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20550 rtx par
= NULL_RTX
;
20551 rtx dwarf
= NULL_RTX
;
20554 /* Restore saved registers. */
20555 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20557 while (j
<= LAST_ARM_REGNUM
)
20558 if (saved_regs_mask
& (1 << j
))
20561 && (saved_regs_mask
& (1 << (j
+ 1)))
20562 && (j
+ 1) != PC_REGNUM
)
20564 /* Current register and next register form register pair for which
20565 LDRD can be generated. PC is always the last register popped, and
20566 we handle it separately. */
20568 mem
= gen_frame_mem (DImode
,
20569 plus_constant (Pmode
,
20573 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20575 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20576 tmp
= emit_insn (tmp
);
20577 RTX_FRAME_RELATED_P (tmp
) = 1;
20579 /* Generate dwarf info. */
20581 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20582 gen_rtx_REG (SImode
, j
),
20584 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20585 gen_rtx_REG (SImode
, j
+ 1),
20588 REG_NOTES (tmp
) = dwarf
;
20593 else if (j
!= PC_REGNUM
)
20595 /* Emit a single word load. */
20597 mem
= gen_frame_mem (SImode
,
20598 plus_constant (Pmode
,
20602 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20604 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20605 tmp
= emit_insn (tmp
);
20606 RTX_FRAME_RELATED_P (tmp
) = 1;
20608 /* Generate dwarf info. */
20609 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20610 gen_rtx_REG (SImode
, j
),
20616 else /* j == PC_REGNUM */
20622 /* Update the stack. */
20625 tmp
= gen_rtx_SET (Pmode
,
20627 plus_constant (Pmode
,
20630 tmp
= emit_insn (tmp
);
20631 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20632 stack_pointer_rtx
, stack_pointer_rtx
);
20636 if (saved_regs_mask
& (1 << PC_REGNUM
))
20638 /* Only PC is to be popped. */
20639 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20640 XVECEXP (par
, 0, 0) = ret_rtx
;
20641 tmp
= gen_rtx_SET (SImode
,
20642 gen_rtx_REG (SImode
, PC_REGNUM
),
20643 gen_frame_mem (SImode
,
20644 gen_rtx_POST_INC (SImode
,
20645 stack_pointer_rtx
)));
20646 RTX_FRAME_RELATED_P (tmp
) = 1;
20647 XVECEXP (par
, 0, 1) = tmp
;
20648 par
= emit_jump_insn (par
);
20650 /* Generate dwarf info. */
20651 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20652 gen_rtx_REG (SImode
, PC_REGNUM
),
20654 REG_NOTES (par
) = dwarf
;
20655 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20656 stack_pointer_rtx
, stack_pointer_rtx
);
20660 /* Calculate the size of the return value that is passed in registers. */
20662 arm_size_return_regs (void)
20666 if (crtl
->return_rtx
!= 0)
20667 mode
= GET_MODE (crtl
->return_rtx
);
20669 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20671 return GET_MODE_SIZE (mode
);
20674 /* Return true if the current function needs to save/restore LR. */
20676 thumb_force_lr_save (void)
20678 return !cfun
->machine
->lr_save_eliminated
20679 && (!leaf_function_p ()
20680 || thumb_far_jump_used_p ()
20681 || df_regs_ever_live_p (LR_REGNUM
));
20684 /* We do not know if r3 will be available because
20685 we do have an indirect tailcall happening in this
20686 particular case. */
20688 is_indirect_tailcall_p (rtx call
)
20690 rtx pat
= PATTERN (call
);
20692 /* Indirect tail call. */
20693 pat
= XVECEXP (pat
, 0, 0);
20694 if (GET_CODE (pat
) == SET
)
20695 pat
= SET_SRC (pat
);
20697 pat
= XEXP (XEXP (pat
, 0), 0);
20698 return REG_P (pat
);
20701 /* Return true if r3 is used by any of the tail call insns in the
20702 current function. */
20704 any_sibcall_could_use_r3 (void)
20709 if (!crtl
->tail_call_emit
)
20711 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20712 if (e
->flags
& EDGE_SIBCALL
)
20714 rtx call
= BB_END (e
->src
);
20715 if (!CALL_P (call
))
20716 call
= prev_nonnote_nondebug_insn (call
);
20717 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20718 if (find_regno_fusage (call
, USE
, 3)
20719 || is_indirect_tailcall_p (call
))
20726 /* Compute the distance from register FROM to register TO.
20727 These can be the arg pointer (26), the soft frame pointer (25),
20728 the stack pointer (13) or the hard frame pointer (11).
20729 In thumb mode r7 is used as the soft frame pointer, if needed.
20730 Typical stack layout looks like this:
20732 old stack pointer -> | |
20735 | | saved arguments for
20736 | | vararg functions
20739 hard FP & arg pointer -> | | \
20747 soft frame pointer -> | | /
20752 locals base pointer -> | | /
20757 current stack pointer -> | | /
20760 For a given function some or all of these stack components
20761 may not be needed, giving rise to the possibility of
20762 eliminating some of the registers.
20764 The values returned by this function must reflect the behavior
20765 of arm_expand_prologue() and arm_compute_save_reg_mask().
20767 The sign of the number returned reflects the direction of stack
20768 growth, so the values are positive for all eliminations except
20769 from the soft frame pointer to the hard frame pointer.
20771 SFP may point just inside the local variables block to ensure correct
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20779 static arm_stack_offsets
*
20780 arm_get_frame_offsets (void)
20782 struct arm_stack_offsets
*offsets
;
20783 unsigned long func_type
;
20787 HOST_WIDE_INT frame_size
;
20790 offsets
= &cfun
->machine
->stack_offsets
;
20792 /* We need to know if we are a leaf function. Unfortunately, it
20793 is possible to be called after start_sequence has been called,
20794 which causes get_insns to return the insns for the sequence,
20795 not the function, which will cause leaf_function_p to return
20796 the incorrect result.
20798 to know about leaf functions once reload has completed, and the
20799 frame size cannot be changed after that time, so we can safely
20800 use the cached value. */
20802 if (reload_completed
)
20805 /* Initially this is the size of the local variables. It will translated
20806 into an offset once we have determined the size of preceding data. */
20807 frame_size
= ROUND_UP_WORD (get_frame_size ());
20809 leaf
= leaf_function_p ();
20811 /* Space for variadic functions. */
20812 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20814 /* In Thumb mode this is incorrect, but never used. */
20816 = (offsets
->saved_args
20817 + arm_compute_static_chain_stack_bytes ()
20818 + (frame_pointer_needed
? 4 : 0));
20822 unsigned int regno
;
20824 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20825 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20826 saved
= core_saved
;
20828 /* We know that SP will be doubleword aligned on entry, and we must
20829 preserve that condition at any subroutine call. We also require the
20830 soft frame pointer to be doubleword aligned. */
20832 if (TARGET_REALLY_IWMMXT
)
20834 /* Check for the call-saved iWMMXt registers. */
20835 for (regno
= FIRST_IWMMXT_REGNUM
;
20836 regno
<= LAST_IWMMXT_REGNUM
;
20838 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20842 func_type
= arm_current_func_type ();
20843 /* Space for saved VFP registers. */
20844 if (! IS_VOLATILE (func_type
)
20845 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20846 saved
+= arm_get_vfp_saved_size ();
20848 else /* TARGET_THUMB1 */
20850 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20851 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20852 saved
= core_saved
;
20853 if (TARGET_BACKTRACE
)
20857 /* Saved registers include the stack frame. */
20858 offsets
->saved_regs
20859 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20860 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20862 /* A leaf function does not need any stack alignment if it has nothing
20864 if (leaf
&& frame_size
== 0
20865 /* However if it calls alloca(), we have a dynamically allocated
20866 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20867 && ! cfun
->calls_alloca
)
20869 offsets
->outgoing_args
= offsets
->soft_frame
;
20870 offsets
->locals_base
= offsets
->soft_frame
;
20874 /* Ensure SFP has the correct alignment. */
20875 if (ARM_DOUBLEWORD_ALIGN
20876 && (offsets
->soft_frame
& 7))
20878 offsets
->soft_frame
+= 4;
20879 /* Try to align stack by pushing an extra reg. Don't bother doing this
20880 when there is a stack frame as the alignment will be rolled into
20881 the normal stack adjustment. */
20882 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20886 /* Register r3 is caller-saved. Normally it does not need to be
20887 saved on entry by the prologue. However if we choose to save
20888 it for padding then we may confuse the compiler into thinking
20889 a prologue sequence is required when in fact it is not. This
20890 will occur when shrink-wrapping if r3 is used as a scratch
20891 register and there are no other callee-saved writes.
20893 This situation can be avoided when other callee-saved registers
20894 are available and r3 is not mandatory if we choose a callee-saved
20895 register for padding. */
20896 bool prefer_callee_reg_p
= false;
20898 /* If it is safe to use r3, then do so. This sometimes
20899 generates better code on Thumb-2 by avoiding the need to
20900 use 32-bit push/pop instructions. */
20901 if (! any_sibcall_could_use_r3 ()
20902 && arm_size_return_regs () <= 12
20903 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20905 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20908 if (!TARGET_THUMB2
)
20909 prefer_callee_reg_p
= true;
20912 || prefer_callee_reg_p
)
20914 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20916 /* Avoid fixed registers; they may be changed at
20917 arbitrary times so it's unsafe to restore them
20918 during the epilogue. */
20920 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20930 offsets
->saved_regs
+= 4;
20931 offsets
->saved_regs_mask
|= (1 << reg
);
20936 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20937 offsets
->outgoing_args
= (offsets
->locals_base
20938 + crtl
->outgoing_args_size
);
20940 if (ARM_DOUBLEWORD_ALIGN
)
20942 /* Ensure SP remains doubleword aligned. */
20943 if (offsets
->outgoing_args
& 7)
20944 offsets
->outgoing_args
+= 4;
20945 gcc_assert (!(offsets
->outgoing_args
& 7));
20952 /* Calculate the relative offsets for the different stack pointers. Positive
20953 offsets are in the direction of stack growth. */
20956 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20958 arm_stack_offsets
*offsets
;
20960 offsets
= arm_get_frame_offsets ();
20962 /* OK, now we have enough information to compute the distances.
20963 There must be an entry in these switch tables for each pair
20964 of registers in ELIMINABLE_REGS, even if some of the entries
20965 seem to be redundant or useless. */
20968 case ARG_POINTER_REGNUM
:
20971 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20974 case FRAME_POINTER_REGNUM
:
20975 /* This is the reverse of the soft frame pointer
20976 to hard frame pointer elimination below. */
20977 return offsets
->soft_frame
- offsets
->saved_args
;
20979 case ARM_HARD_FRAME_POINTER_REGNUM
:
20980 /* This is only non-zero in the case where the static chain register
20981 is stored above the frame. */
20982 return offsets
->frame
- offsets
->saved_args
- 4;
20984 case STACK_POINTER_REGNUM
:
20985 /* If nothing has been pushed on the stack at all
20986 then this will return -4. This *is* correct! */
20987 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20990 gcc_unreachable ();
20992 gcc_unreachable ();
20994 case FRAME_POINTER_REGNUM
:
20997 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21000 case ARM_HARD_FRAME_POINTER_REGNUM
:
21001 /* The hard frame pointer points to the top entry in the
21002 stack frame. The soft frame pointer to the bottom entry
21003 in the stack frame. If there is no stack frame at all,
21004 then they are identical. */
21006 return offsets
->frame
- offsets
->soft_frame
;
21008 case STACK_POINTER_REGNUM
:
21009 return offsets
->outgoing_args
- offsets
->soft_frame
;
21012 gcc_unreachable ();
21014 gcc_unreachable ();
21017 /* You cannot eliminate from the stack pointer.
21018 In theory you could eliminate from the hard frame
21019 pointer to the stack pointer, but this will never
21020 happen, since if a stack frame is not needed the
21021 hard frame pointer will never be used. */
21022 gcc_unreachable ();
21026 /* Given FROM and TO register numbers, say whether this elimination is
21027 allowed. Frame pointer elimination is automatically handled.
21029 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21030 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21031 pointer, we must eliminate FRAME_POINTER_REGNUM into
21032 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21033 ARG_POINTER_REGNUM. */
21036 arm_can_eliminate (const int from
, const int to
)
21038 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21039 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21040 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21041 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21045 /* Emit RTL to save coprocessor registers on function entry. Returns the
21046 number of bytes pushed. */
21049 arm_save_coproc_regs(void)
21051 int saved_size
= 0;
21053 unsigned start_reg
;
21056 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21057 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21059 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21060 insn
= gen_rtx_MEM (V2SImode
, insn
);
21061 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21062 RTX_FRAME_RELATED_P (insn
) = 1;
21066 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21068 start_reg
= FIRST_VFP_REGNUM
;
21070 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21072 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21073 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21075 if (start_reg
!= reg
)
21076 saved_size
+= vfp_emit_fstmd (start_reg
,
21077 (reg
- start_reg
) / 2);
21078 start_reg
= reg
+ 2;
21081 if (start_reg
!= reg
)
21082 saved_size
+= vfp_emit_fstmd (start_reg
,
21083 (reg
- start_reg
) / 2);
21089 /* Set the Thumb frame pointer from the stack pointer. */
21092 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21094 HOST_WIDE_INT amount
;
21097 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21099 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21100 stack_pointer_rtx
, GEN_INT (amount
)));
21103 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21104 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21105 expects the first two operands to be the same. */
21108 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21110 hard_frame_pointer_rtx
));
21114 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21115 hard_frame_pointer_rtx
,
21116 stack_pointer_rtx
));
21118 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21119 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21120 RTX_FRAME_RELATED_P (dwarf
) = 1;
21121 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21124 RTX_FRAME_RELATED_P (insn
) = 1;
21127 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21130 arm_expand_prologue (void)
21135 unsigned long live_regs_mask
;
21136 unsigned long func_type
;
21138 int saved_pretend_args
= 0;
21139 int saved_regs
= 0;
21140 unsigned HOST_WIDE_INT args_to_push
;
21141 arm_stack_offsets
*offsets
;
21143 func_type
= arm_current_func_type ();
21145 /* Naked functions don't have prologues. */
21146 if (IS_NAKED (func_type
))
21149 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21150 args_to_push
= crtl
->args
.pretend_args_size
;
21152 /* Compute which register we will have to save onto the stack. */
21153 offsets
= arm_get_frame_offsets ();
21154 live_regs_mask
= offsets
->saved_regs_mask
;
21156 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21158 if (IS_STACKALIGN (func_type
))
21162 /* Handle a word-aligned stack pointer. We generate the following:
21167 <save and restore r0 in normal prologue/epilogue>
21171 The unwinder doesn't need to know about the stack realignment.
21172 Just tell it we saved SP in r0. */
21173 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21175 r0
= gen_rtx_REG (SImode
, 0);
21176 r1
= gen_rtx_REG (SImode
, 1);
21178 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21179 RTX_FRAME_RELATED_P (insn
) = 1;
21180 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21182 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21184 /* ??? The CFA changes here, which may cause GDB to conclude that it
21185 has entered a different function. That said, the unwind info is
21186 correct, individually, before and after this instruction because
21187 we've described the save of SP, which will override the default
21188 handling of SP as restoring from the CFA. */
21189 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21192 /* For APCS frames, if IP register is clobbered
21193 when creating frame, save that register in a special
21195 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21197 if (IS_INTERRUPT (func_type
))
21199 /* Interrupt functions must not corrupt any registers.
21200 Creating a frame pointer however, corrupts the IP
21201 register, so we must push it first. */
21202 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21204 /* Do not set RTX_FRAME_RELATED_P on this insn.
21205 The dwarf stack unwinding code only wants to see one
21206 stack decrement per function, and this is not it. If
21207 this instruction is labeled as being part of the frame
21208 creation sequence then dwarf2out_frame_debug_expr will
21209 die when it encounters the assignment of IP to FP
21210 later on, since the use of SP here establishes SP as
21211 the CFA register and not IP.
21213 Anyway this instruction is not really part of the stack
21214 frame creation although it is part of the prologue. */
21216 else if (IS_NESTED (func_type
))
21218 /* The static chain register is the same as the IP register
21219 used as a scratch register during stack frame creation.
21220 To get around this need to find somewhere to store IP
21221 whilst the frame is being created. We try the following
21224 1. The last argument register r3 if it is available.
21225 2. A slot on the stack above the frame if there are no
21226 arguments to push onto the stack.
21227 3. Register r3 again, after pushing the argument registers
21228 onto the stack, if this is a varargs function.
21229 4. The last slot on the stack created for the arguments to
21230 push, if this isn't a varargs function.
21232 Note - we only need to tell the dwarf2 backend about the SP
21233 adjustment in the second variant; the static chain register
21234 doesn't need to be unwound, as it doesn't contain a value
21235 inherited from the caller. */
21237 if (!arm_r3_live_at_start_p ())
21238 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21239 else if (args_to_push
== 0)
21243 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21246 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21247 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21250 /* Just tell the dwarf backend that we adjusted SP. */
21251 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21252 plus_constant (Pmode
, stack_pointer_rtx
,
21254 RTX_FRAME_RELATED_P (insn
) = 1;
21255 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21259 /* Store the args on the stack. */
21260 if (cfun
->machine
->uses_anonymous_args
)
21263 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21264 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21265 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21266 saved_pretend_args
= 1;
21272 if (args_to_push
== 4)
21273 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21276 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21277 plus_constant (Pmode
,
21281 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21283 /* Just tell the dwarf backend that we adjusted SP. */
21285 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21286 plus_constant (Pmode
, stack_pointer_rtx
,
21288 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21291 RTX_FRAME_RELATED_P (insn
) = 1;
21292 fp_offset
= args_to_push
;
21297 insn
= emit_set_insn (ip_rtx
,
21298 plus_constant (Pmode
, stack_pointer_rtx
,
21300 RTX_FRAME_RELATED_P (insn
) = 1;
21305 /* Push the argument registers, or reserve space for them. */
21306 if (cfun
->machine
->uses_anonymous_args
)
21307 insn
= emit_multi_reg_push
21308 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21309 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21312 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21313 GEN_INT (- args_to_push
)));
21314 RTX_FRAME_RELATED_P (insn
) = 1;
21317 /* If this is an interrupt service routine, and the link register
21318 is going to be pushed, and we're not generating extra
21319 push of IP (needed when frame is needed and frame layout if apcs),
21320 subtracting four from LR now will mean that the function return
21321 can be done with a single instruction. */
21322 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21323 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21324 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21327 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21329 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21332 if (live_regs_mask
)
21334 unsigned long dwarf_regs_mask
= live_regs_mask
;
21336 saved_regs
+= bit_count (live_regs_mask
) * 4;
21337 if (optimize_size
&& !frame_pointer_needed
21338 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21340 /* If no coprocessor registers are being pushed and we don't have
21341 to worry about a frame pointer then push extra registers to
21342 create the stack frame. This is done is a way that does not
21343 alter the frame layout, so is independent of the epilogue. */
21347 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21349 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21350 if (frame
&& n
* 4 >= frame
)
21353 live_regs_mask
|= (1 << n
) - 1;
21354 saved_regs
+= frame
;
21359 && current_tune
->prefer_ldrd_strd
21360 && !optimize_function_for_size_p (cfun
))
21362 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21364 thumb2_emit_strd_push (live_regs_mask
);
21365 else if (TARGET_ARM
21366 && !TARGET_APCS_FRAME
21367 && !IS_INTERRUPT (func_type
))
21368 arm_emit_strd_push (live_regs_mask
);
21371 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21372 RTX_FRAME_RELATED_P (insn
) = 1;
21377 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21378 RTX_FRAME_RELATED_P (insn
) = 1;
21382 if (! IS_VOLATILE (func_type
))
21383 saved_regs
+= arm_save_coproc_regs ();
21385 if (frame_pointer_needed
&& TARGET_ARM
)
21387 /* Create the new frame pointer. */
21388 if (TARGET_APCS_FRAME
)
21390 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21391 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21392 RTX_FRAME_RELATED_P (insn
) = 1;
21394 if (IS_NESTED (func_type
))
21396 /* Recover the static chain register. */
21397 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21398 insn
= gen_rtx_REG (SImode
, 3);
21401 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21402 insn
= gen_frame_mem (SImode
, insn
);
21404 emit_set_insn (ip_rtx
, insn
);
21405 /* Add a USE to stop propagate_one_insn() from barfing. */
21406 emit_insn (gen_force_register_use (ip_rtx
));
21411 insn
= GEN_INT (saved_regs
- 4);
21412 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21413 stack_pointer_rtx
, insn
));
21414 RTX_FRAME_RELATED_P (insn
) = 1;
21418 if (flag_stack_usage_info
)
21419 current_function_static_stack_size
21420 = offsets
->outgoing_args
- offsets
->saved_args
;
21422 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21424 /* This add can produce multiple insns for a large constant, so we
21425 need to get tricky. */
21426 rtx_insn
*last
= get_last_insn ();
21428 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21429 - offsets
->outgoing_args
);
21431 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21435 last
= last
? NEXT_INSN (last
) : get_insns ();
21436 RTX_FRAME_RELATED_P (last
) = 1;
21438 while (last
!= insn
);
21440 /* If the frame pointer is needed, emit a special barrier that
21441 will prevent the scheduler from moving stores to the frame
21442 before the stack adjustment. */
21443 if (frame_pointer_needed
)
21444 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21445 hard_frame_pointer_rtx
));
21449 if (frame_pointer_needed
&& TARGET_THUMB2
)
21450 thumb_set_frame_pointer (offsets
);
21452 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21454 unsigned long mask
;
21456 mask
= live_regs_mask
;
21457 mask
&= THUMB2_WORK_REGS
;
21458 if (!IS_NESTED (func_type
))
21459 mask
|= (1 << IP_REGNUM
);
21460 arm_load_pic_register (mask
);
21463 /* If we are profiling, make sure no instructions are scheduled before
21464 the call to mcount. Similarly if the user has requested no
21465 scheduling in the prolog. Similarly if we want non-call exceptions
21466 using the EABI unwinder, to prevent faulting instructions from being
21467 swapped with a stack adjustment. */
21468 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21469 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21470 && cfun
->can_throw_non_call_exceptions
))
21471 emit_insn (gen_blockage ());
21473 /* If the link register is being kept alive, with the return address in it,
21474 then make sure that it does not get reused by the ce2 pass. */
21475 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21476 cfun
->machine
->lr_save_eliminated
= 1;
21479 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21481 arm_print_condition (FILE *stream
)
21483 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21485 /* Branch conversion is not implemented for Thumb-2. */
21488 output_operand_lossage ("predicated Thumb instruction");
21491 if (current_insn_predicate
!= NULL
)
21493 output_operand_lossage
21494 ("predicated instruction in conditional sequence");
21498 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21500 else if (current_insn_predicate
)
21502 enum arm_cond_code code
;
21506 output_operand_lossage ("predicated Thumb instruction");
21510 code
= get_arm_condition_code (current_insn_predicate
);
21511 fputs (arm_condition_codes
[code
], stream
);
21516 /* Globally reserved letters: acln
21517 Puncutation letters currently used: @_|?().!#
21518 Lower case letters currently used: bcdefhimpqtvwxyz
21519 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21520 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21522 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21524 If CODE is 'd', then the X is a condition operand and the instruction
21525 should only be executed if the condition is true.
21526 if CODE is 'D', then the X is a condition operand and the instruction
21527 should only be executed if the condition is false: however, if the mode
21528 of the comparison is CCFPEmode, then always execute the instruction -- we
21529 do this because in these circumstances !GE does not necessarily imply LT;
21530 in these cases the instruction pattern will take care to make sure that
21531 an instruction containing %d will follow, thereby undoing the effects of
21532 doing this instruction unconditionally.
21533 If CODE is 'N' then X is a floating point operand that must be negated
21535 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21536 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21538 arm_print_operand (FILE *stream
, rtx x
, int code
)
21543 fputs (ASM_COMMENT_START
, stream
);
21547 fputs (user_label_prefix
, stream
);
21551 fputs (REGISTER_PREFIX
, stream
);
21555 arm_print_condition (stream
);
21559 /* Nothing in unified syntax, otherwise the current condition code. */
21560 if (!TARGET_UNIFIED_ASM
)
21561 arm_print_condition (stream
);
21565 /* The current condition code in unified syntax, otherwise nothing. */
21566 if (TARGET_UNIFIED_ASM
)
21567 arm_print_condition (stream
);
21571 /* The current condition code for a condition code setting instruction.
21572 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21573 if (TARGET_UNIFIED_ASM
)
21575 fputc('s', stream
);
21576 arm_print_condition (stream
);
21580 arm_print_condition (stream
);
21581 fputc('s', stream
);
21586 /* If the instruction is conditionally executed then print
21587 the current condition code, otherwise print 's'. */
21588 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21589 if (current_insn_predicate
)
21590 arm_print_condition (stream
);
21592 fputc('s', stream
);
21595 /* %# is a "break" sequence. It doesn't output anything, but is used to
21596 separate e.g. operand numbers from following text, if that text consists
21597 of further digits which we don't want to be part of the operand
21605 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21606 r
= real_value_negate (&r
);
21607 fprintf (stream
, "%s", fp_const_from_val (&r
));
21611 /* An integer or symbol address without a preceding # sign. */
21613 switch (GET_CODE (x
))
21616 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21620 output_addr_const (stream
, x
);
21624 if (GET_CODE (XEXP (x
, 0)) == PLUS
21625 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21627 output_addr_const (stream
, x
);
21630 /* Fall through. */
21633 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21637 /* An integer that we want to print in HEX. */
21639 switch (GET_CODE (x
))
21642 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21646 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21651 if (CONST_INT_P (x
))
21654 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21655 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21659 putc ('~', stream
);
21660 output_addr_const (stream
, x
);
21665 /* Print the log2 of a CONST_INT. */
21669 if (!CONST_INT_P (x
)
21670 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21671 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21673 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21678 /* The low 16 bits of an immediate constant. */
21679 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21683 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21687 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21695 shift
= shift_op (x
, &val
);
21699 fprintf (stream
, ", %s ", shift
);
21701 arm_print_operand (stream
, XEXP (x
, 1), 0);
21703 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21708 /* An explanation of the 'Q', 'R' and 'H' register operands:
21710 In a pair of registers containing a DI or DF value the 'Q'
21711 operand returns the register number of the register containing
21712 the least significant part of the value. The 'R' operand returns
21713 the register number of the register containing the most
21714 significant part of the value.
21716 The 'H' operand returns the higher of the two register numbers.
21717 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21718 same as the 'Q' operand, since the most significant part of the
21719 value is held in the lower number register. The reverse is true
21720 on systems where WORDS_BIG_ENDIAN is false.
21722 The purpose of these operands is to distinguish between cases
21723 where the endian-ness of the values is important (for example
21724 when they are added together), and cases where the endian-ness
21725 is irrelevant, but the order of register operations is important.
21726 For example when loading a value from memory into a register
21727 pair, the endian-ness does not matter. Provided that the value
21728 from the lower memory address is put into the lower numbered
21729 register, and the value from the higher address is put into the
21730 higher numbered register, the load will work regardless of whether
21731 the value being loaded is big-wordian or little-wordian. The
21732 order of the two register loads can matter however, if the address
21733 of the memory location is actually held in one of the registers
21734 being overwritten by the load.
21736 The 'Q' and 'R' constraints are also available for 64-bit
21739 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21741 rtx part
= gen_lowpart (SImode
, x
);
21742 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21746 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21748 output_operand_lossage ("invalid operand for code '%c'", code
);
21752 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21756 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21758 machine_mode mode
= GET_MODE (x
);
21761 if (mode
== VOIDmode
)
21763 part
= gen_highpart_mode (SImode
, mode
, x
);
21764 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21768 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21770 output_operand_lossage ("invalid operand for code '%c'", code
);
21774 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21778 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21780 output_operand_lossage ("invalid operand for code '%c'", code
);
21784 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21788 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21790 output_operand_lossage ("invalid operand for code '%c'", code
);
21794 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21798 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21800 output_operand_lossage ("invalid operand for code '%c'", code
);
21804 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21808 asm_fprintf (stream
, "%r",
21809 REG_P (XEXP (x
, 0))
21810 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21814 asm_fprintf (stream
, "{%r-%r}",
21816 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21819 /* Like 'M', but writing doubleword vector registers, for use by Neon
21823 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21824 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21826 asm_fprintf (stream
, "{d%d}", regno
);
21828 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21833 /* CONST_TRUE_RTX means always -- that's the default. */
21834 if (x
== const_true_rtx
)
21837 if (!COMPARISON_P (x
))
21839 output_operand_lossage ("invalid operand for code '%c'", code
);
21843 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21848 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21849 want to do that. */
21850 if (x
== const_true_rtx
)
21852 output_operand_lossage ("instruction never executed");
21855 if (!COMPARISON_P (x
))
21857 output_operand_lossage ("invalid operand for code '%c'", code
);
21861 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21862 (get_arm_condition_code (x
))],
21872 /* Former Maverick support, removed after GCC-4.7. */
21873 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21878 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21879 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21880 /* Bad value for wCG register number. */
21882 output_operand_lossage ("invalid operand for code '%c'", code
);
21887 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21890 /* Print an iWMMXt control register name. */
21892 if (!CONST_INT_P (x
)
21894 || INTVAL (x
) >= 16)
21895 /* Bad value for wC register number. */
21897 output_operand_lossage ("invalid operand for code '%c'", code
);
21903 static const char * wc_reg_names
[16] =
21905 "wCID", "wCon", "wCSSF", "wCASF",
21906 "wC4", "wC5", "wC6", "wC7",
21907 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21908 "wC12", "wC13", "wC14", "wC15"
21911 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21915 /* Print the high single-precision register of a VFP double-precision
21919 machine_mode mode
= GET_MODE (x
);
21922 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21924 output_operand_lossage ("invalid operand for code '%c'", code
);
21929 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21931 output_operand_lossage ("invalid operand for code '%c'", code
);
21935 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21939 /* Print a VFP/Neon double precision or quad precision register name. */
21943 machine_mode mode
= GET_MODE (x
);
21944 int is_quad
= (code
== 'q');
21947 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21949 output_operand_lossage ("invalid operand for code '%c'", code
);
21954 || !IS_VFP_REGNUM (REGNO (x
)))
21956 output_operand_lossage ("invalid operand for code '%c'", code
);
21961 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21962 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21964 output_operand_lossage ("invalid operand for code '%c'", code
);
21968 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21969 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21973 /* These two codes print the low/high doubleword register of a Neon quad
21974 register, respectively. For pair-structure types, can also print
21975 low/high quadword registers. */
21979 machine_mode mode
= GET_MODE (x
);
21982 if ((GET_MODE_SIZE (mode
) != 16
21983 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21985 output_operand_lossage ("invalid operand for code '%c'", code
);
21990 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21992 output_operand_lossage ("invalid operand for code '%c'", code
);
21996 if (GET_MODE_SIZE (mode
) == 16)
21997 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21998 + (code
== 'f' ? 1 : 0));
22000 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22001 + (code
== 'f' ? 1 : 0));
22005 /* Print a VFPv3 floating-point constant, represented as an integer
22009 int index
= vfp3_const_double_index (x
);
22010 gcc_assert (index
!= -1);
22011 fprintf (stream
, "%d", index
);
22015 /* Print bits representing opcode features for Neon.
22017 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22018 and polynomials as unsigned.
22020 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22022 Bit 2 is 1 for rounding functions, 0 otherwise. */
22024 /* Identify the type as 's', 'u', 'p' or 'f'. */
22027 HOST_WIDE_INT bits
= INTVAL (x
);
22028 fputc ("uspf"[bits
& 3], stream
);
22032 /* Likewise, but signed and unsigned integers are both 'i'. */
22035 HOST_WIDE_INT bits
= INTVAL (x
);
22036 fputc ("iipf"[bits
& 3], stream
);
22040 /* As for 'T', but emit 'u' instead of 'p'. */
22043 HOST_WIDE_INT bits
= INTVAL (x
);
22044 fputc ("usuf"[bits
& 3], stream
);
22048 /* Bit 2: rounding (vs none). */
22051 HOST_WIDE_INT bits
= INTVAL (x
);
22052 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22056 /* Memory operand for vld1/vst1 instruction. */
22060 bool postinc
= FALSE
;
22061 rtx postinc_reg
= NULL
;
22062 unsigned align
, memsize
, align_bits
;
22064 gcc_assert (MEM_P (x
));
22065 addr
= XEXP (x
, 0);
22066 if (GET_CODE (addr
) == POST_INC
)
22069 addr
= XEXP (addr
, 0);
22071 if (GET_CODE (addr
) == POST_MODIFY
)
22073 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22074 addr
= XEXP (addr
, 0);
22076 asm_fprintf (stream
, "[%r", REGNO (addr
));
22078 /* We know the alignment of this access, so we can emit a hint in the
22079 instruction (for some alignments) as an aid to the memory subsystem
22081 align
= MEM_ALIGN (x
) >> 3;
22082 memsize
= MEM_SIZE (x
);
22084 /* Only certain alignment specifiers are supported by the hardware. */
22085 if (memsize
== 32 && (align
% 32) == 0)
22087 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22089 else if (memsize
>= 8 && (align
% 8) == 0)
22094 if (align_bits
!= 0)
22095 asm_fprintf (stream
, ":%d", align_bits
);
22097 asm_fprintf (stream
, "]");
22100 fputs("!", stream
);
22102 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22110 gcc_assert (MEM_P (x
));
22111 addr
= XEXP (x
, 0);
22112 gcc_assert (REG_P (addr
));
22113 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22117 /* Translate an S register number into a D register number and element index. */
22120 machine_mode mode
= GET_MODE (x
);
22123 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22125 output_operand_lossage ("invalid operand for code '%c'", code
);
22130 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22132 output_operand_lossage ("invalid operand for code '%c'", code
);
22136 regno
= regno
- FIRST_VFP_REGNUM
;
22137 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22142 gcc_assert (CONST_DOUBLE_P (x
));
22144 result
= vfp3_const_double_for_fract_bits (x
);
22146 result
= vfp3_const_double_for_bits (x
);
22147 fprintf (stream
, "#%d", result
);
22150 /* Register specifier for vld1.16/vst1.16. Translate the S register
22151 number into a D register number and element index. */
22154 machine_mode mode
= GET_MODE (x
);
22157 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22159 output_operand_lossage ("invalid operand for code '%c'", code
);
22164 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22166 output_operand_lossage ("invalid operand for code '%c'", code
);
22170 regno
= regno
- FIRST_VFP_REGNUM
;
22171 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22178 output_operand_lossage ("missing operand");
22182 switch (GET_CODE (x
))
22185 asm_fprintf (stream
, "%r", REGNO (x
));
22189 output_memory_reference_mode
= GET_MODE (x
);
22190 output_address (XEXP (x
, 0));
22196 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22197 sizeof (fpstr
), 0, 1);
22198 fprintf (stream
, "#%s", fpstr
);
22203 gcc_assert (GET_CODE (x
) != NEG
);
22204 fputc ('#', stream
);
22205 if (GET_CODE (x
) == HIGH
)
22207 fputs (":lower16:", stream
);
22211 output_addr_const (stream
, x
);
22217 /* Target hook for printing a memory address. */
22219 arm_print_operand_address (FILE *stream
, rtx x
)
22223 int is_minus
= GET_CODE (x
) == MINUS
;
22226 asm_fprintf (stream
, "[%r]", REGNO (x
));
22227 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22229 rtx base
= XEXP (x
, 0);
22230 rtx index
= XEXP (x
, 1);
22231 HOST_WIDE_INT offset
= 0;
22233 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22235 /* Ensure that BASE is a register. */
22236 /* (one of them must be). */
22237 /* Also ensure the SP is not used as in index register. */
22242 switch (GET_CODE (index
))
22245 offset
= INTVAL (index
);
22248 asm_fprintf (stream
, "[%r, #%wd]",
22249 REGNO (base
), offset
);
22253 asm_fprintf (stream
, "[%r, %s%r]",
22254 REGNO (base
), is_minus
? "-" : "",
22264 asm_fprintf (stream
, "[%r, %s%r",
22265 REGNO (base
), is_minus
? "-" : "",
22266 REGNO (XEXP (index
, 0)));
22267 arm_print_operand (stream
, index
, 'S');
22268 fputs ("]", stream
);
22273 gcc_unreachable ();
22276 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22277 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22279 extern machine_mode output_memory_reference_mode
;
22281 gcc_assert (REG_P (XEXP (x
, 0)));
22283 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22284 asm_fprintf (stream
, "[%r, #%s%d]!",
22285 REGNO (XEXP (x
, 0)),
22286 GET_CODE (x
) == PRE_DEC
? "-" : "",
22287 GET_MODE_SIZE (output_memory_reference_mode
));
22289 asm_fprintf (stream
, "[%r], #%s%d",
22290 REGNO (XEXP (x
, 0)),
22291 GET_CODE (x
) == POST_DEC
? "-" : "",
22292 GET_MODE_SIZE (output_memory_reference_mode
));
22294 else if (GET_CODE (x
) == PRE_MODIFY
)
22296 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22297 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22298 asm_fprintf (stream
, "#%wd]!",
22299 INTVAL (XEXP (XEXP (x
, 1), 1)));
22301 asm_fprintf (stream
, "%r]!",
22302 REGNO (XEXP (XEXP (x
, 1), 1)));
22304 else if (GET_CODE (x
) == POST_MODIFY
)
22306 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22307 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22308 asm_fprintf (stream
, "#%wd",
22309 INTVAL (XEXP (XEXP (x
, 1), 1)));
22311 asm_fprintf (stream
, "%r",
22312 REGNO (XEXP (XEXP (x
, 1), 1)));
22314 else output_addr_const (stream
, x
);
22319 asm_fprintf (stream
, "[%r]", REGNO (x
));
22320 else if (GET_CODE (x
) == POST_INC
)
22321 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22322 else if (GET_CODE (x
) == PLUS
)
22324 gcc_assert (REG_P (XEXP (x
, 0)));
22325 if (CONST_INT_P (XEXP (x
, 1)))
22326 asm_fprintf (stream
, "[%r, #%wd]",
22327 REGNO (XEXP (x
, 0)),
22328 INTVAL (XEXP (x
, 1)));
22330 asm_fprintf (stream
, "[%r, %r]",
22331 REGNO (XEXP (x
, 0)),
22332 REGNO (XEXP (x
, 1)));
22335 output_addr_const (stream
, x
);
22339 /* Target hook for indicating whether a punctuation character for
22340 TARGET_PRINT_OPERAND is valid. */
22342 arm_print_operand_punct_valid_p (unsigned char code
)
22344 return (code
== '@' || code
== '|' || code
== '.'
22345 || code
== '(' || code
== ')' || code
== '#'
22346 || (TARGET_32BIT
&& (code
== '?'))
22347 || (TARGET_THUMB2
&& (code
== '!'))
22348 || (TARGET_THUMB
&& (code
== '_')));
22351 /* Target hook for assembling integer objects. The ARM version needs to
22352 handle word-sized values specially. */
22354 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22358 if (size
== UNITS_PER_WORD
&& aligned_p
)
22360 fputs ("\t.word\t", asm_out_file
);
22361 output_addr_const (asm_out_file
, x
);
22363 /* Mark symbols as position independent. We only do this in the
22364 .text segment, not in the .data segment. */
22365 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22366 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22368 /* See legitimize_pic_address for an explanation of the
22369 TARGET_VXWORKS_RTP check. */
22370 if (!arm_pic_data_is_text_relative
22371 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22372 fputs ("(GOT)", asm_out_file
);
22374 fputs ("(GOTOFF)", asm_out_file
);
22376 fputc ('\n', asm_out_file
);
22380 mode
= GET_MODE (x
);
22382 if (arm_vector_mode_supported_p (mode
))
22386 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22388 units
= CONST_VECTOR_NUNITS (x
);
22389 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22391 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22392 for (i
= 0; i
< units
; i
++)
22394 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22396 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22399 for (i
= 0; i
< units
; i
++)
22401 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22402 REAL_VALUE_TYPE rval
;
22404 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22407 (rval
, GET_MODE_INNER (mode
),
22408 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22414 return default_assemble_integer (x
, size
, aligned_p
);
22418 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22422 if (!TARGET_AAPCS_BASED
)
22425 default_named_section_asm_out_constructor
22426 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22430 /* Put these in the .init_array section, using a special relocation. */
22431 if (priority
!= DEFAULT_INIT_PRIORITY
)
22434 sprintf (buf
, "%s.%.5u",
22435 is_ctor
? ".init_array" : ".fini_array",
22437 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22444 switch_to_section (s
);
22445 assemble_align (POINTER_SIZE
);
22446 fputs ("\t.word\t", asm_out_file
);
22447 output_addr_const (asm_out_file
, symbol
);
22448 fputs ("(target1)\n", asm_out_file
);
22451 /* Add a function to the list of static constructors. */
22454 arm_elf_asm_constructor (rtx symbol
, int priority
)
22456 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22459 /* Add a function to the list of static destructors. */
22462 arm_elf_asm_destructor (rtx symbol
, int priority
)
22464 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22467 /* A finite state machine takes care of noticing whether or not instructions
22468 can be conditionally executed, and thus decrease execution time and code
22469 size by deleting branch instructions. The fsm is controlled by
22470 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22472 /* The state of the fsm controlling condition codes are:
22473 0: normal, do nothing special
22474 1: make ASM_OUTPUT_OPCODE not output this instruction
22475 2: make ASM_OUTPUT_OPCODE not output this instruction
22476 3: make instructions conditional
22477 4: make instructions conditional
22479 State transitions (state->state by whom under condition):
22480 0 -> 1 final_prescan_insn if the `target' is a label
22481 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22482 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22483 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22484 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22485 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22486 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22487 (the target insn is arm_target_insn).
22489 If the jump clobbers the conditions then we use states 2 and 4.
22491 A similar thing can be done with conditional return insns.
22493 XXX In case the `target' is an unconditional branch, this conditionalising
22494 of the instructions always reduces code size, but not always execution
22495 time. But then, I want to reduce the code size to somewhere near what
22496 /bin/cc produces. */
22498 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22499 instructions. When a COND_EXEC instruction is seen the subsequent
22500 instructions are scanned so that multiple conditional instructions can be
22501 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22502 specify the length and true/false mask for the IT block. These will be
22503 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22505 /* Returns the index of the ARM condition code string in
22506 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22507 COMPARISON should be an rtx like `(eq (...) (...))'. */
22510 maybe_get_arm_condition_code (rtx comparison
)
22512 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22513 enum arm_cond_code code
;
22514 enum rtx_code comp_code
= GET_CODE (comparison
);
22516 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22517 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22518 XEXP (comparison
, 1));
22522 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22523 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22524 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22525 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22526 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22527 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22528 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22529 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22530 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22531 case CC_DLTUmode
: code
= ARM_CC
;
22534 if (comp_code
== EQ
)
22535 return ARM_INVERSE_CONDITION_CODE (code
);
22536 if (comp_code
== NE
)
22543 case NE
: return ARM_NE
;
22544 case EQ
: return ARM_EQ
;
22545 case GE
: return ARM_PL
;
22546 case LT
: return ARM_MI
;
22547 default: return ARM_NV
;
22553 case NE
: return ARM_NE
;
22554 case EQ
: return ARM_EQ
;
22555 default: return ARM_NV
;
22561 case NE
: return ARM_MI
;
22562 case EQ
: return ARM_PL
;
22563 default: return ARM_NV
;
22568 /* We can handle all cases except UNEQ and LTGT. */
22571 case GE
: return ARM_GE
;
22572 case GT
: return ARM_GT
;
22573 case LE
: return ARM_LS
;
22574 case LT
: return ARM_MI
;
22575 case NE
: return ARM_NE
;
22576 case EQ
: return ARM_EQ
;
22577 case ORDERED
: return ARM_VC
;
22578 case UNORDERED
: return ARM_VS
;
22579 case UNLT
: return ARM_LT
;
22580 case UNLE
: return ARM_LE
;
22581 case UNGT
: return ARM_HI
;
22582 case UNGE
: return ARM_PL
;
22583 /* UNEQ and LTGT do not have a representation. */
22584 case UNEQ
: /* Fall through. */
22585 case LTGT
: /* Fall through. */
22586 default: return ARM_NV
;
22592 case NE
: return ARM_NE
;
22593 case EQ
: return ARM_EQ
;
22594 case GE
: return ARM_LE
;
22595 case GT
: return ARM_LT
;
22596 case LE
: return ARM_GE
;
22597 case LT
: return ARM_GT
;
22598 case GEU
: return ARM_LS
;
22599 case GTU
: return ARM_CC
;
22600 case LEU
: return ARM_CS
;
22601 case LTU
: return ARM_HI
;
22602 default: return ARM_NV
;
22608 case LTU
: return ARM_CS
;
22609 case GEU
: return ARM_CC
;
22610 default: return ARM_NV
;
22616 case NE
: return ARM_NE
;
22617 case EQ
: return ARM_EQ
;
22618 case GEU
: return ARM_CS
;
22619 case GTU
: return ARM_HI
;
22620 case LEU
: return ARM_LS
;
22621 case LTU
: return ARM_CC
;
22622 default: return ARM_NV
;
22628 case GE
: return ARM_GE
;
22629 case LT
: return ARM_LT
;
22630 case GEU
: return ARM_CS
;
22631 case LTU
: return ARM_CC
;
22632 default: return ARM_NV
;
22638 case NE
: return ARM_NE
;
22639 case EQ
: return ARM_EQ
;
22640 case GE
: return ARM_GE
;
22641 case GT
: return ARM_GT
;
22642 case LE
: return ARM_LE
;
22643 case LT
: return ARM_LT
;
22644 case GEU
: return ARM_CS
;
22645 case GTU
: return ARM_HI
;
22646 case LEU
: return ARM_LS
;
22647 case LTU
: return ARM_CC
;
22648 default: return ARM_NV
;
22651 default: gcc_unreachable ();
22655 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22656 static enum arm_cond_code
22657 get_arm_condition_code (rtx comparison
)
22659 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22660 gcc_assert (code
!= ARM_NV
);
22664 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22667 thumb2_final_prescan_insn (rtx_insn
*insn
)
22669 rtx_insn
*first_insn
= insn
;
22670 rtx body
= PATTERN (insn
);
22672 enum arm_cond_code code
;
22677 /* max_insns_skipped in the tune was already taken into account in the
22678 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22679 just emit the IT blocks as we can. It does not make sense to split
22681 max
= MAX_INSN_PER_IT_BLOCK
;
22683 /* Remove the previous insn from the count of insns to be output. */
22684 if (arm_condexec_count
)
22685 arm_condexec_count
--;
22687 /* Nothing to do if we are already inside a conditional block. */
22688 if (arm_condexec_count
)
22691 if (GET_CODE (body
) != COND_EXEC
)
22694 /* Conditional jumps are implemented directly. */
22698 predicate
= COND_EXEC_TEST (body
);
22699 arm_current_cc
= get_arm_condition_code (predicate
);
22701 n
= get_attr_ce_count (insn
);
22702 arm_condexec_count
= 1;
22703 arm_condexec_mask
= (1 << n
) - 1;
22704 arm_condexec_masklen
= n
;
22705 /* See if subsequent instructions can be combined into the same block. */
22708 insn
= next_nonnote_insn (insn
);
22710 /* Jumping into the middle of an IT block is illegal, so a label or
22711 barrier terminates the block. */
22712 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22715 body
= PATTERN (insn
);
22716 /* USE and CLOBBER aren't really insns, so just skip them. */
22717 if (GET_CODE (body
) == USE
22718 || GET_CODE (body
) == CLOBBER
)
22721 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22722 if (GET_CODE (body
) != COND_EXEC
)
22724 /* Maximum number of conditionally executed instructions in a block. */
22725 n
= get_attr_ce_count (insn
);
22726 if (arm_condexec_masklen
+ n
> max
)
22729 predicate
= COND_EXEC_TEST (body
);
22730 code
= get_arm_condition_code (predicate
);
22731 mask
= (1 << n
) - 1;
22732 if (arm_current_cc
== code
)
22733 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22734 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22737 arm_condexec_count
++;
22738 arm_condexec_masklen
+= n
;
22740 /* A jump must be the last instruction in a conditional block. */
22744 /* Restore recog_data (getting the attributes of other insns can
22745 destroy this array, but final.c assumes that it remains intact
22746 across this call). */
22747 extract_constrain_insn_cached (first_insn
);
22751 arm_final_prescan_insn (rtx_insn
*insn
)
22753 /* BODY will hold the body of INSN. */
22754 rtx body
= PATTERN (insn
);
22756 /* This will be 1 if trying to repeat the trick, and things need to be
22757 reversed if it appears to fail. */
22760 /* If we start with a return insn, we only succeed if we find another one. */
22761 int seeking_return
= 0;
22762 enum rtx_code return_code
= UNKNOWN
;
22764 /* START_INSN will hold the insn from where we start looking. This is the
22765 first insn after the following code_label if REVERSE is true. */
22766 rtx_insn
*start_insn
= insn
;
22768 /* If in state 4, check if the target branch is reached, in order to
22769 change back to state 0. */
22770 if (arm_ccfsm_state
== 4)
22772 if (insn
== arm_target_insn
)
22774 arm_target_insn
= NULL
;
22775 arm_ccfsm_state
= 0;
22780 /* If in state 3, it is possible to repeat the trick, if this insn is an
22781 unconditional branch to a label, and immediately following this branch
22782 is the previous target label which is only used once, and the label this
22783 branch jumps to is not too far off. */
22784 if (arm_ccfsm_state
== 3)
22786 if (simplejump_p (insn
))
22788 start_insn
= next_nonnote_insn (start_insn
);
22789 if (BARRIER_P (start_insn
))
22791 /* XXX Isn't this always a barrier? */
22792 start_insn
= next_nonnote_insn (start_insn
);
22794 if (LABEL_P (start_insn
)
22795 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22796 && LABEL_NUSES (start_insn
) == 1)
22801 else if (ANY_RETURN_P (body
))
22803 start_insn
= next_nonnote_insn (start_insn
);
22804 if (BARRIER_P (start_insn
))
22805 start_insn
= next_nonnote_insn (start_insn
);
22806 if (LABEL_P (start_insn
)
22807 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22808 && LABEL_NUSES (start_insn
) == 1)
22811 seeking_return
= 1;
22812 return_code
= GET_CODE (body
);
22821 gcc_assert (!arm_ccfsm_state
|| reverse
);
22822 if (!JUMP_P (insn
))
22825 /* This jump might be paralleled with a clobber of the condition codes
22826 the jump should always come first */
22827 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22828 body
= XVECEXP (body
, 0, 0);
22831 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22832 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22835 int fail
= FALSE
, succeed
= FALSE
;
22836 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22837 int then_not_else
= TRUE
;
22838 rtx_insn
*this_insn
= start_insn
;
22841 /* Register the insn jumped to. */
22844 if (!seeking_return
)
22845 label
= XEXP (SET_SRC (body
), 0);
22847 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22848 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22849 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22851 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22852 then_not_else
= FALSE
;
22854 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22856 seeking_return
= 1;
22857 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22859 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22861 seeking_return
= 1;
22862 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22863 then_not_else
= FALSE
;
22866 gcc_unreachable ();
22868 /* See how many insns this branch skips, and what kind of insns. If all
22869 insns are okay, and the label or unconditional branch to the same
22870 label is not too far away, succeed. */
22871 for (insns_skipped
= 0;
22872 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22876 this_insn
= next_nonnote_insn (this_insn
);
22880 switch (GET_CODE (this_insn
))
22883 /* Succeed if it is the target label, otherwise fail since
22884 control falls in from somewhere else. */
22885 if (this_insn
== label
)
22887 arm_ccfsm_state
= 1;
22895 /* Succeed if the following insn is the target label.
22897 If return insns are used then the last insn in a function
22898 will be a barrier. */
22899 this_insn
= next_nonnote_insn (this_insn
);
22900 if (this_insn
&& this_insn
== label
)
22902 arm_ccfsm_state
= 1;
22910 /* The AAPCS says that conditional calls should not be
22911 used since they make interworking inefficient (the
22912 linker can't transform BL<cond> into BLX). That's
22913 only a problem if the machine has BLX. */
22920 /* Succeed if the following insn is the target label, or
22921 if the following two insns are a barrier and the
22923 this_insn
= next_nonnote_insn (this_insn
);
22924 if (this_insn
&& BARRIER_P (this_insn
))
22925 this_insn
= next_nonnote_insn (this_insn
);
22927 if (this_insn
&& this_insn
== label
22928 && insns_skipped
< max_insns_skipped
)
22930 arm_ccfsm_state
= 1;
22938 /* If this is an unconditional branch to the same label, succeed.
22939 If it is to another label, do nothing. If it is conditional,
22941 /* XXX Probably, the tests for SET and the PC are
22944 scanbody
= PATTERN (this_insn
);
22945 if (GET_CODE (scanbody
) == SET
22946 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22948 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22949 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22951 arm_ccfsm_state
= 2;
22954 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22957 /* Fail if a conditional return is undesirable (e.g. on a
22958 StrongARM), but still allow this if optimizing for size. */
22959 else if (GET_CODE (scanbody
) == return_code
22960 && !use_return_insn (TRUE
, NULL
)
22963 else if (GET_CODE (scanbody
) == return_code
)
22965 arm_ccfsm_state
= 2;
22968 else if (GET_CODE (scanbody
) == PARALLEL
)
22970 switch (get_attr_conds (this_insn
))
22980 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22985 /* Instructions using or affecting the condition codes make it
22987 scanbody
= PATTERN (this_insn
);
22988 if (!(GET_CODE (scanbody
) == SET
22989 || GET_CODE (scanbody
) == PARALLEL
)
22990 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23000 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23001 arm_target_label
= CODE_LABEL_NUMBER (label
);
23004 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23006 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23008 this_insn
= next_nonnote_insn (this_insn
);
23009 gcc_assert (!this_insn
23010 || (!BARRIER_P (this_insn
)
23011 && !LABEL_P (this_insn
)));
23015 /* Oh, dear! we ran off the end.. give up. */
23016 extract_constrain_insn_cached (insn
);
23017 arm_ccfsm_state
= 0;
23018 arm_target_insn
= NULL
;
23021 arm_target_insn
= this_insn
;
23024 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23027 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23029 if (reverse
|| then_not_else
)
23030 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23033 /* Restore recog_data (getting the attributes of other insns can
23034 destroy this array, but final.c assumes that it remains intact
23035 across this call. */
23036 extract_constrain_insn_cached (insn
);
23040 /* Output IT instructions. */
23042 thumb2_asm_output_opcode (FILE * stream
)
23047 if (arm_condexec_mask
)
23049 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23050 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23052 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23053 arm_condition_codes
[arm_current_cc
]);
23054 arm_condexec_mask
= 0;
23058 /* Returns true if REGNO is a valid register
23059 for holding a quantity of type MODE. */
23061 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23063 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23064 return (regno
== CC_REGNUM
23065 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23066 && regno
== VFPCC_REGNUM
));
23068 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23072 /* For the Thumb we only allow values bigger than SImode in
23073 registers 0 - 6, so that there is always a second low
23074 register available to hold the upper part of the value.
23075 We probably we ought to ensure that the register is the
23076 start of an even numbered register pair. */
23077 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23079 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23080 && IS_VFP_REGNUM (regno
))
23082 if (mode
== SFmode
|| mode
== SImode
)
23083 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23085 if (mode
== DFmode
)
23086 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23088 /* VFP registers can hold HFmode values, but there is no point in
23089 putting them there unless we have hardware conversion insns. */
23090 if (mode
== HFmode
)
23091 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23094 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23095 || (VALID_NEON_QREG_MODE (mode
)
23096 && NEON_REGNO_OK_FOR_QUAD (regno
))
23097 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23098 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23099 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23100 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23101 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23106 if (TARGET_REALLY_IWMMXT
)
23108 if (IS_IWMMXT_GR_REGNUM (regno
))
23109 return mode
== SImode
;
23111 if (IS_IWMMXT_REGNUM (regno
))
23112 return VALID_IWMMXT_REG_MODE (mode
);
23115 /* We allow almost any value to be stored in the general registers.
23116 Restrict doubleword quantities to even register pairs in ARM state
23117 so that we can use ldrd. Do not allow very large Neon structure
23118 opaque modes in general registers; they would use too many. */
23119 if (regno
<= LAST_ARM_REGNUM
)
23121 if (ARM_NUM_REGS (mode
) > 4)
23127 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23130 if (regno
== FRAME_POINTER_REGNUM
23131 || regno
== ARG_POINTER_REGNUM
)
23132 /* We only allow integers in the fake hard registers. */
23133 return GET_MODE_CLASS (mode
) == MODE_INT
;
23138 /* Implement MODES_TIEABLE_P. */
23141 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23143 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23146 /* We specifically want to allow elements of "structure" modes to
23147 be tieable to the structure. This more general condition allows
23148 other rarer situations too. */
23150 && (VALID_NEON_DREG_MODE (mode1
)
23151 || VALID_NEON_QREG_MODE (mode1
)
23152 || VALID_NEON_STRUCT_MODE (mode1
))
23153 && (VALID_NEON_DREG_MODE (mode2
)
23154 || VALID_NEON_QREG_MODE (mode2
)
23155 || VALID_NEON_STRUCT_MODE (mode2
)))
23161 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23162 not used in arm mode. */
23165 arm_regno_class (int regno
)
23167 if (regno
== PC_REGNUM
)
23172 if (regno
== STACK_POINTER_REGNUM
)
23174 if (regno
== CC_REGNUM
)
23181 if (TARGET_THUMB2
&& regno
< 8)
23184 if ( regno
<= LAST_ARM_REGNUM
23185 || regno
== FRAME_POINTER_REGNUM
23186 || regno
== ARG_POINTER_REGNUM
)
23187 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23189 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23190 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23192 if (IS_VFP_REGNUM (regno
))
23194 if (regno
<= D7_VFP_REGNUM
)
23195 return VFP_D0_D7_REGS
;
23196 else if (regno
<= LAST_LO_VFP_REGNUM
)
23197 return VFP_LO_REGS
;
23199 return VFP_HI_REGS
;
23202 if (IS_IWMMXT_REGNUM (regno
))
23203 return IWMMXT_REGS
;
23205 if (IS_IWMMXT_GR_REGNUM (regno
))
23206 return IWMMXT_GR_REGS
;
23211 /* Handle a special case when computing the offset
23212 of an argument from the frame pointer. */
23214 arm_debugger_arg_offset (int value
, rtx addr
)
23218 /* We are only interested if dbxout_parms() failed to compute the offset. */
23222 /* We can only cope with the case where the address is held in a register. */
23226 /* If we are using the frame pointer to point at the argument, then
23227 an offset of 0 is correct. */
23228 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23231 /* If we are using the stack pointer to point at the
23232 argument, then an offset of 0 is correct. */
23233 /* ??? Check this is consistent with thumb2 frame layout. */
23234 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23235 && REGNO (addr
) == SP_REGNUM
)
23238 /* Oh dear. The argument is pointed to by a register rather
23239 than being held in a register, or being stored at a known
23240 offset from the frame pointer. Since GDB only understands
23241 those two kinds of argument we must translate the address
23242 held in the register into an offset from the frame pointer.
23243 We do this by searching through the insns for the function
23244 looking to see where this register gets its value. If the
23245 register is initialized from the frame pointer plus an offset
23246 then we are in luck and we can continue, otherwise we give up.
23248 This code is exercised by producing debugging information
23249 for a function with arguments like this:
23251 double func (double a, double b, int c, double d) {return d;}
23253 Without this code the stab for parameter 'd' will be set to
23254 an offset of 0 from the frame pointer, rather than 8. */
23256 /* The if() statement says:
23258 If the insn is a normal instruction
23259 and if the insn is setting the value in a register
23260 and if the register being set is the register holding the address of the argument
23261 and if the address is computing by an addition
23262 that involves adding to a register
23263 which is the frame pointer
23268 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23270 if ( NONJUMP_INSN_P (insn
)
23271 && GET_CODE (PATTERN (insn
)) == SET
23272 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23273 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23274 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23275 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23276 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23279 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23288 warning (0, "unable to compute real location of stacked parameter");
23289 value
= 8; /* XXX magic hack */
23310 T_MAX
/* Size of enum. Keep last. */
23311 } neon_builtin_type_mode
;
23313 #define TYPE_MODE_BIT(X) (1 << (X))
23315 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23316 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23317 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23318 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23319 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23320 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23322 #define v8qi_UP T_V8QI
23323 #define v4hi_UP T_V4HI
23324 #define v4hf_UP T_V4HF
23325 #define v2si_UP T_V2SI
23326 #define v2sf_UP T_V2SF
23328 #define v16qi_UP T_V16QI
23329 #define v8hi_UP T_V8HI
23330 #define v4si_UP T_V4SI
23331 #define v4sf_UP T_V4SF
23332 #define v2di_UP T_V2DI
23337 #define UP(X) X##_UP
23374 NEON_LOADSTRUCTLANE
,
23376 NEON_STORESTRUCTLANE
,
23385 const neon_itype itype
;
23386 const neon_builtin_type_mode mode
;
23387 const enum insn_code code
;
23388 unsigned int fcode
;
23389 } neon_builtin_datum
;
23391 #define CF(N,X) CODE_FOR_neon_##N##X
23393 #define VAR1(T, N, A) \
23394 {#N, NEON_##T, UP (A), CF (N, A), 0}
23395 #define VAR2(T, N, A, B) \
23397 {#N, NEON_##T, UP (B), CF (N, B), 0}
23398 #define VAR3(T, N, A, B, C) \
23399 VAR2 (T, N, A, B), \
23400 {#N, NEON_##T, UP (C), CF (N, C), 0}
23401 #define VAR4(T, N, A, B, C, D) \
23402 VAR3 (T, N, A, B, C), \
23403 {#N, NEON_##T, UP (D), CF (N, D), 0}
23404 #define VAR5(T, N, A, B, C, D, E) \
23405 VAR4 (T, N, A, B, C, D), \
23406 {#N, NEON_##T, UP (E), CF (N, E), 0}
23407 #define VAR6(T, N, A, B, C, D, E, F) \
23408 VAR5 (T, N, A, B, C, D, E), \
23409 {#N, NEON_##T, UP (F), CF (N, F), 0}
23410 #define VAR7(T, N, A, B, C, D, E, F, G) \
23411 VAR6 (T, N, A, B, C, D, E, F), \
23412 {#N, NEON_##T, UP (G), CF (N, G), 0}
23413 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23414 VAR7 (T, N, A, B, C, D, E, F, G), \
23415 {#N, NEON_##T, UP (H), CF (N, H), 0}
23416 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23417 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23418 {#N, NEON_##T, UP (I), CF (N, I), 0}
23419 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23420 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23421 {#N, NEON_##T, UP (J), CF (N, J), 0}
23423 /* The NEON builtin data can be found in arm_neon_builtins.def.
23424 The mode entries in the following table correspond to the "key" type of the
23425 instruction variant, i.e. equivalent to that which would be specified after
23426 the assembler mnemonic, which usually refers to the last vector operand.
23427 (Signed/unsigned/polynomial types are not differentiated between though, and
23428 are all mapped onto the same mode for a given element size.) The modes
23429 listed per instruction should be the same as those defined for that
23430 instruction's pattern in neon.md. */
23432 static neon_builtin_datum neon_builtin_data
[] =
23434 #include "arm_neon_builtins.def"
23449 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23450 #define VAR1(T, N, A) \
23452 #define VAR2(T, N, A, B) \
23455 #define VAR3(T, N, A, B, C) \
23456 VAR2 (T, N, A, B), \
23458 #define VAR4(T, N, A, B, C, D) \
23459 VAR3 (T, N, A, B, C), \
23461 #define VAR5(T, N, A, B, C, D, E) \
23462 VAR4 (T, N, A, B, C, D), \
23464 #define VAR6(T, N, A, B, C, D, E, F) \
23465 VAR5 (T, N, A, B, C, D, E), \
23467 #define VAR7(T, N, A, B, C, D, E, F, G) \
23468 VAR6 (T, N, A, B, C, D, E, F), \
23470 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23471 VAR7 (T, N, A, B, C, D, E, F, G), \
23473 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23474 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23476 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23477 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23481 ARM_BUILTIN_GETWCGR0
,
23482 ARM_BUILTIN_GETWCGR1
,
23483 ARM_BUILTIN_GETWCGR2
,
23484 ARM_BUILTIN_GETWCGR3
,
23486 ARM_BUILTIN_SETWCGR0
,
23487 ARM_BUILTIN_SETWCGR1
,
23488 ARM_BUILTIN_SETWCGR2
,
23489 ARM_BUILTIN_SETWCGR3
,
23493 ARM_BUILTIN_WAVG2BR
,
23494 ARM_BUILTIN_WAVG2HR
,
23495 ARM_BUILTIN_WAVG2B
,
23496 ARM_BUILTIN_WAVG2H
,
23503 ARM_BUILTIN_WMACSZ
,
23505 ARM_BUILTIN_WMACUZ
,
23508 ARM_BUILTIN_WSADBZ
,
23510 ARM_BUILTIN_WSADHZ
,
23512 ARM_BUILTIN_WALIGNI
,
23513 ARM_BUILTIN_WALIGNR0
,
23514 ARM_BUILTIN_WALIGNR1
,
23515 ARM_BUILTIN_WALIGNR2
,
23516 ARM_BUILTIN_WALIGNR3
,
23519 ARM_BUILTIN_TMIAPH
,
23520 ARM_BUILTIN_TMIABB
,
23521 ARM_BUILTIN_TMIABT
,
23522 ARM_BUILTIN_TMIATB
,
23523 ARM_BUILTIN_TMIATT
,
23525 ARM_BUILTIN_TMOVMSKB
,
23526 ARM_BUILTIN_TMOVMSKH
,
23527 ARM_BUILTIN_TMOVMSKW
,
23529 ARM_BUILTIN_TBCSTB
,
23530 ARM_BUILTIN_TBCSTH
,
23531 ARM_BUILTIN_TBCSTW
,
23533 ARM_BUILTIN_WMADDS
,
23534 ARM_BUILTIN_WMADDU
,
23536 ARM_BUILTIN_WPACKHSS
,
23537 ARM_BUILTIN_WPACKWSS
,
23538 ARM_BUILTIN_WPACKDSS
,
23539 ARM_BUILTIN_WPACKHUS
,
23540 ARM_BUILTIN_WPACKWUS
,
23541 ARM_BUILTIN_WPACKDUS
,
23546 ARM_BUILTIN_WADDSSB
,
23547 ARM_BUILTIN_WADDSSH
,
23548 ARM_BUILTIN_WADDSSW
,
23549 ARM_BUILTIN_WADDUSB
,
23550 ARM_BUILTIN_WADDUSH
,
23551 ARM_BUILTIN_WADDUSW
,
23555 ARM_BUILTIN_WSUBSSB
,
23556 ARM_BUILTIN_WSUBSSH
,
23557 ARM_BUILTIN_WSUBSSW
,
23558 ARM_BUILTIN_WSUBUSB
,
23559 ARM_BUILTIN_WSUBUSH
,
23560 ARM_BUILTIN_WSUBUSW
,
23567 ARM_BUILTIN_WCMPEQB
,
23568 ARM_BUILTIN_WCMPEQH
,
23569 ARM_BUILTIN_WCMPEQW
,
23570 ARM_BUILTIN_WCMPGTUB
,
23571 ARM_BUILTIN_WCMPGTUH
,
23572 ARM_BUILTIN_WCMPGTUW
,
23573 ARM_BUILTIN_WCMPGTSB
,
23574 ARM_BUILTIN_WCMPGTSH
,
23575 ARM_BUILTIN_WCMPGTSW
,
23577 ARM_BUILTIN_TEXTRMSB
,
23578 ARM_BUILTIN_TEXTRMSH
,
23579 ARM_BUILTIN_TEXTRMSW
,
23580 ARM_BUILTIN_TEXTRMUB
,
23581 ARM_BUILTIN_TEXTRMUH
,
23582 ARM_BUILTIN_TEXTRMUW
,
23583 ARM_BUILTIN_TINSRB
,
23584 ARM_BUILTIN_TINSRH
,
23585 ARM_BUILTIN_TINSRW
,
23587 ARM_BUILTIN_WMAXSW
,
23588 ARM_BUILTIN_WMAXSH
,
23589 ARM_BUILTIN_WMAXSB
,
23590 ARM_BUILTIN_WMAXUW
,
23591 ARM_BUILTIN_WMAXUH
,
23592 ARM_BUILTIN_WMAXUB
,
23593 ARM_BUILTIN_WMINSW
,
23594 ARM_BUILTIN_WMINSH
,
23595 ARM_BUILTIN_WMINSB
,
23596 ARM_BUILTIN_WMINUW
,
23597 ARM_BUILTIN_WMINUH
,
23598 ARM_BUILTIN_WMINUB
,
23600 ARM_BUILTIN_WMULUM
,
23601 ARM_BUILTIN_WMULSM
,
23602 ARM_BUILTIN_WMULUL
,
23604 ARM_BUILTIN_PSADBH
,
23605 ARM_BUILTIN_WSHUFH
,
23619 ARM_BUILTIN_WSLLHI
,
23620 ARM_BUILTIN_WSLLWI
,
23621 ARM_BUILTIN_WSLLDI
,
23622 ARM_BUILTIN_WSRAHI
,
23623 ARM_BUILTIN_WSRAWI
,
23624 ARM_BUILTIN_WSRADI
,
23625 ARM_BUILTIN_WSRLHI
,
23626 ARM_BUILTIN_WSRLWI
,
23627 ARM_BUILTIN_WSRLDI
,
23628 ARM_BUILTIN_WRORHI
,
23629 ARM_BUILTIN_WRORWI
,
23630 ARM_BUILTIN_WRORDI
,
23632 ARM_BUILTIN_WUNPCKIHB
,
23633 ARM_BUILTIN_WUNPCKIHH
,
23634 ARM_BUILTIN_WUNPCKIHW
,
23635 ARM_BUILTIN_WUNPCKILB
,
23636 ARM_BUILTIN_WUNPCKILH
,
23637 ARM_BUILTIN_WUNPCKILW
,
23639 ARM_BUILTIN_WUNPCKEHSB
,
23640 ARM_BUILTIN_WUNPCKEHSH
,
23641 ARM_BUILTIN_WUNPCKEHSW
,
23642 ARM_BUILTIN_WUNPCKEHUB
,
23643 ARM_BUILTIN_WUNPCKEHUH
,
23644 ARM_BUILTIN_WUNPCKEHUW
,
23645 ARM_BUILTIN_WUNPCKELSB
,
23646 ARM_BUILTIN_WUNPCKELSH
,
23647 ARM_BUILTIN_WUNPCKELSW
,
23648 ARM_BUILTIN_WUNPCKELUB
,
23649 ARM_BUILTIN_WUNPCKELUH
,
23650 ARM_BUILTIN_WUNPCKELUW
,
23656 ARM_BUILTIN_WADDSUBHX
,
23657 ARM_BUILTIN_WSUBADDHX
,
23659 ARM_BUILTIN_WABSDIFFB
,
23660 ARM_BUILTIN_WABSDIFFH
,
23661 ARM_BUILTIN_WABSDIFFW
,
23663 ARM_BUILTIN_WADDCH
,
23664 ARM_BUILTIN_WADDCW
,
23667 ARM_BUILTIN_WAVG4R
,
23669 ARM_BUILTIN_WMADDSX
,
23670 ARM_BUILTIN_WMADDUX
,
23672 ARM_BUILTIN_WMADDSN
,
23673 ARM_BUILTIN_WMADDUN
,
23675 ARM_BUILTIN_WMULWSM
,
23676 ARM_BUILTIN_WMULWUM
,
23678 ARM_BUILTIN_WMULWSMR
,
23679 ARM_BUILTIN_WMULWUMR
,
23681 ARM_BUILTIN_WMULWL
,
23683 ARM_BUILTIN_WMULSMR
,
23684 ARM_BUILTIN_WMULUMR
,
23686 ARM_BUILTIN_WQMULM
,
23687 ARM_BUILTIN_WQMULMR
,
23689 ARM_BUILTIN_WQMULWM
,
23690 ARM_BUILTIN_WQMULWMR
,
23692 ARM_BUILTIN_WADDBHUSM
,
23693 ARM_BUILTIN_WADDBHUSL
,
23695 ARM_BUILTIN_WQMIABB
,
23696 ARM_BUILTIN_WQMIABT
,
23697 ARM_BUILTIN_WQMIATB
,
23698 ARM_BUILTIN_WQMIATT
,
23700 ARM_BUILTIN_WQMIABBN
,
23701 ARM_BUILTIN_WQMIABTN
,
23702 ARM_BUILTIN_WQMIATBN
,
23703 ARM_BUILTIN_WQMIATTN
,
23705 ARM_BUILTIN_WMIABB
,
23706 ARM_BUILTIN_WMIABT
,
23707 ARM_BUILTIN_WMIATB
,
23708 ARM_BUILTIN_WMIATT
,
23710 ARM_BUILTIN_WMIABBN
,
23711 ARM_BUILTIN_WMIABTN
,
23712 ARM_BUILTIN_WMIATBN
,
23713 ARM_BUILTIN_WMIATTN
,
23715 ARM_BUILTIN_WMIAWBB
,
23716 ARM_BUILTIN_WMIAWBT
,
23717 ARM_BUILTIN_WMIAWTB
,
23718 ARM_BUILTIN_WMIAWTT
,
23720 ARM_BUILTIN_WMIAWBBN
,
23721 ARM_BUILTIN_WMIAWBTN
,
23722 ARM_BUILTIN_WMIAWTBN
,
23723 ARM_BUILTIN_WMIAWTTN
,
23725 ARM_BUILTIN_WMERGE
,
23727 ARM_BUILTIN_CRC32B
,
23728 ARM_BUILTIN_CRC32H
,
23729 ARM_BUILTIN_CRC32W
,
23730 ARM_BUILTIN_CRC32CB
,
23731 ARM_BUILTIN_CRC32CH
,
23732 ARM_BUILTIN_CRC32CW
,
23734 ARM_BUILTIN_GET_FPSCR
,
23735 ARM_BUILTIN_SET_FPSCR
,
23741 #define CRYPTO1(L, U, M1, M2) \
23742 ARM_BUILTIN_CRYPTO_##U,
23743 #define CRYPTO2(L, U, M1, M2, M3) \
23744 ARM_BUILTIN_CRYPTO_##U,
23745 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23746 ARM_BUILTIN_CRYPTO_##U,
23748 #include "crypto.def"
23754 #include "arm_neon_builtins.def"
23759 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23773 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23775 #define NUM_DREG_TYPES 5
23776 #define NUM_QREG_TYPES 6
23779 arm_init_neon_builtins (void)
23781 unsigned int i
, fcode
;
23784 tree neon_intQI_type_node
;
23785 tree neon_intHI_type_node
;
23786 tree neon_floatHF_type_node
;
23787 tree neon_polyQI_type_node
;
23788 tree neon_polyHI_type_node
;
23789 tree neon_intSI_type_node
;
23790 tree neon_intDI_type_node
;
23791 tree neon_intUTI_type_node
;
23792 tree neon_float_type_node
;
23794 tree intQI_pointer_node
;
23795 tree intHI_pointer_node
;
23796 tree intSI_pointer_node
;
23797 tree intDI_pointer_node
;
23798 tree float_pointer_node
;
23800 tree const_intQI_node
;
23801 tree const_intHI_node
;
23802 tree const_intSI_node
;
23803 tree const_intDI_node
;
23804 tree const_float_node
;
23806 tree const_intQI_pointer_node
;
23807 tree const_intHI_pointer_node
;
23808 tree const_intSI_pointer_node
;
23809 tree const_intDI_pointer_node
;
23810 tree const_float_pointer_node
;
23812 tree V8QI_type_node
;
23813 tree V4HI_type_node
;
23814 tree V4UHI_type_node
;
23815 tree V4HF_type_node
;
23816 tree V2SI_type_node
;
23817 tree V2USI_type_node
;
23818 tree V2SF_type_node
;
23819 tree V16QI_type_node
;
23820 tree V8HI_type_node
;
23821 tree V8UHI_type_node
;
23822 tree V4SI_type_node
;
23823 tree V4USI_type_node
;
23824 tree V4SF_type_node
;
23825 tree V2DI_type_node
;
23826 tree V2UDI_type_node
;
23828 tree intUQI_type_node
;
23829 tree intUHI_type_node
;
23830 tree intUSI_type_node
;
23831 tree intUDI_type_node
;
23833 tree intEI_type_node
;
23834 tree intOI_type_node
;
23835 tree intCI_type_node
;
23836 tree intXI_type_node
;
23838 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23839 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23840 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23842 /* Create distinguished type nodes for NEON vector element types,
23843 and pointers to values of such types, so we can detect them later. */
23844 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23845 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23846 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23847 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23848 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23849 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23850 neon_float_type_node
= make_node (REAL_TYPE
);
23851 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23852 layout_type (neon_float_type_node
);
23853 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23854 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23855 layout_type (neon_floatHF_type_node
);
23857 /* Define typedefs which exactly correspond to the modes we are basing vector
23858 types on. If you change these names you'll need to change
23859 the table used by arm_mangle_type too. */
23860 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23861 "__builtin_neon_qi");
23862 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23863 "__builtin_neon_hi");
23864 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23865 "__builtin_neon_hf");
23866 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23867 "__builtin_neon_si");
23868 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23869 "__builtin_neon_sf");
23870 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23871 "__builtin_neon_di");
23872 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23873 "__builtin_neon_poly8");
23874 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23875 "__builtin_neon_poly16");
23877 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23878 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23879 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23880 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23881 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23883 /* Next create constant-qualified versions of the above types. */
23884 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23886 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23888 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23890 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23892 const_float_node
= build_qualified_type (neon_float_type_node
,
23895 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23896 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23897 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23898 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23899 const_float_pointer_node
= build_pointer_type (const_float_node
);
23901 /* Unsigned integer types for various mode sizes. */
23902 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23903 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23904 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23905 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23906 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23907 /* Now create vector types based on our NEON element types. */
23908 /* 64-bit vectors. */
23910 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23912 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23914 build_vector_type_for_mode (intUHI_type_node
, V4HImode
);
23916 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23918 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23920 build_vector_type_for_mode (intUSI_type_node
, V2SImode
);
23922 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23923 /* 128-bit vectors. */
23925 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23927 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23929 build_vector_type_for_mode (intUHI_type_node
, V8HImode
);
23931 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23933 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23935 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23937 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23939 build_vector_type_for_mode (intUDI_type_node
, V2DImode
);
23942 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23943 "__builtin_neon_uqi");
23944 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23945 "__builtin_neon_uhi");
23946 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23947 "__builtin_neon_usi");
23948 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23949 "__builtin_neon_udi");
23950 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23951 "__builtin_neon_poly64");
23952 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23953 "__builtin_neon_poly128");
23955 /* Opaque integer types for structures of vectors. */
23956 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23957 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23958 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23959 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23961 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23962 "__builtin_neon_ti");
23963 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23964 "__builtin_neon_ei");
23965 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23966 "__builtin_neon_oi");
23967 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23968 "__builtin_neon_ci");
23969 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23970 "__builtin_neon_xi");
23972 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23975 tree V16UQI_type_node
=
23976 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23978 tree v16uqi_ftype_v16uqi
23979 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23981 tree v16uqi_ftype_v16uqi_v16uqi
23982 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23983 V16UQI_type_node
, NULL_TREE
);
23985 tree v4usi_ftype_v4usi
23986 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23988 tree v4usi_ftype_v4usi_v4usi
23989 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23990 V4USI_type_node
, NULL_TREE
);
23992 tree v4usi_ftype_v4usi_v4usi_v4usi
23993 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23994 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23996 tree uti_ftype_udi_udi
23997 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23998 intUDI_type_node
, NULL_TREE
);
24011 ARM_BUILTIN_CRYPTO_##U
24013 "__builtin_arm_crypto_"#L
24014 #define FT1(R, A) \
24016 #define FT2(R, A1, A2) \
24017 R##_ftype_##A1##_##A2
24018 #define FT3(R, A1, A2, A3) \
24019 R##_ftype_##A1##_##A2##_##A3
24020 #define CRYPTO1(L, U, R, A) \
24021 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
24022 C (U), BUILT_IN_MD, \
24024 #define CRYPTO2(L, U, R, A1, A2) \
24025 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
24026 C (U), BUILT_IN_MD, \
24029 #define CRYPTO3(L, U, R, A1, A2, A3) \
24030 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
24031 C (U), BUILT_IN_MD, \
24033 #include "crypto.def"
24044 dreg_types
[0] = V8QI_type_node
;
24045 dreg_types
[1] = V4HI_type_node
;
24046 dreg_types
[2] = V2SI_type_node
;
24047 dreg_types
[3] = V2SF_type_node
;
24048 dreg_types
[4] = neon_intDI_type_node
;
24050 qreg_types
[0] = V16QI_type_node
;
24051 qreg_types
[1] = V8HI_type_node
;
24052 qreg_types
[2] = V4SI_type_node
;
24053 qreg_types
[3] = V4SF_type_node
;
24054 qreg_types
[4] = V2DI_type_node
;
24055 qreg_types
[5] = neon_intUTI_type_node
;
24057 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
24060 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
24062 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
24063 reinterp_ftype_dreg
[i
][j
]
24064 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
24066 reinterp_ftype_qreg
[i
][j
]
24067 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
24071 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
24072 i
< ARRAY_SIZE (neon_builtin_data
);
24075 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
24077 const char* const modenames
[] = {
24078 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24079 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24084 int is_load
= 0, is_store
= 0;
24086 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
24093 case NEON_LOAD1LANE
:
24094 case NEON_LOADSTRUCT
:
24095 case NEON_LOADSTRUCTLANE
:
24097 /* Fall through. */
24099 case NEON_STORE1LANE
:
24100 case NEON_STORESTRUCT
:
24101 case NEON_STORESTRUCTLANE
:
24104 /* Fall through. */
24108 case NEON_LOGICBINOP
:
24109 case NEON_SHIFTINSERT
:
24116 case NEON_SHIFTIMM
:
24117 case NEON_SHIFTACC
:
24123 case NEON_LANEMULL
:
24124 case NEON_LANEMULH
:
24126 case NEON_SCALARMUL
:
24127 case NEON_SCALARMULL
:
24128 case NEON_SCALARMULH
:
24129 case NEON_SCALARMAC
:
24135 tree return_type
= void_type_node
, args
= void_list_node
;
24137 /* Build a function type directly from the insn_data for
24138 this builtin. The build_function_type() function takes
24139 care of removing duplicates for us. */
24140 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
24144 if (is_load
&& k
== 1)
24146 /* Neon load patterns always have the memory
24147 operand in the operand 1 position. */
24148 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24149 == neon_struct_operand
);
24155 eltype
= const_intQI_pointer_node
;
24160 eltype
= const_intHI_pointer_node
;
24165 eltype
= const_intSI_pointer_node
;
24170 eltype
= const_float_pointer_node
;
24175 eltype
= const_intDI_pointer_node
;
24178 default: gcc_unreachable ();
24181 else if (is_store
&& k
== 0)
24183 /* Similarly, Neon store patterns use operand 0 as
24184 the memory location to store to. */
24185 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24186 == neon_struct_operand
);
24192 eltype
= intQI_pointer_node
;
24197 eltype
= intHI_pointer_node
;
24202 eltype
= intSI_pointer_node
;
24207 eltype
= float_pointer_node
;
24212 eltype
= intDI_pointer_node
;
24215 default: gcc_unreachable ();
24220 switch (insn_data
[d
->code
].operand
[k
].mode
)
24222 case VOIDmode
: eltype
= void_type_node
; break;
24224 case QImode
: eltype
= neon_intQI_type_node
; break;
24225 case HImode
: eltype
= neon_intHI_type_node
; break;
24226 case SImode
: eltype
= neon_intSI_type_node
; break;
24227 case SFmode
: eltype
= neon_float_type_node
; break;
24228 case DImode
: eltype
= neon_intDI_type_node
; break;
24229 case TImode
: eltype
= intTI_type_node
; break;
24230 case EImode
: eltype
= intEI_type_node
; break;
24231 case OImode
: eltype
= intOI_type_node
; break;
24232 case CImode
: eltype
= intCI_type_node
; break;
24233 case XImode
: eltype
= intXI_type_node
; break;
24234 /* 64-bit vectors. */
24235 case V8QImode
: eltype
= V8QI_type_node
; break;
24236 case V4HImode
: eltype
= V4HI_type_node
; break;
24237 case V2SImode
: eltype
= V2SI_type_node
; break;
24238 case V2SFmode
: eltype
= V2SF_type_node
; break;
24239 /* 128-bit vectors. */
24240 case V16QImode
: eltype
= V16QI_type_node
; break;
24241 case V8HImode
: eltype
= V8HI_type_node
; break;
24242 case V4SImode
: eltype
= V4SI_type_node
; break;
24243 case V4SFmode
: eltype
= V4SF_type_node
; break;
24244 case V2DImode
: eltype
= V2DI_type_node
; break;
24245 default: gcc_unreachable ();
24249 if (k
== 0 && !is_store
)
24250 return_type
= eltype
;
24252 args
= tree_cons (NULL_TREE
, eltype
, args
);
24255 ftype
= build_function_type (return_type
, args
);
24259 case NEON_REINTERP
:
24261 /* We iterate over NUM_DREG_TYPES doubleword types,
24262 then NUM_QREG_TYPES quadword types.
24263 V4HF is not a type used in reinterpret, so we translate
24264 d->mode to the correct index in reinterp_ftype_dreg. */
24266 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
24267 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
24269 switch (insn_data
[d
->code
].operand
[0].mode
)
24271 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
24272 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
24273 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
24274 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
24275 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
24276 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
24277 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
24278 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
24279 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
24280 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
24281 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
24282 default: gcc_unreachable ();
24286 case NEON_FLOAT_WIDEN
:
24288 tree eltype
= NULL_TREE
;
24289 tree return_type
= NULL_TREE
;
24291 switch (insn_data
[d
->code
].operand
[1].mode
)
24294 eltype
= V4HF_type_node
;
24295 return_type
= V4SF_type_node
;
24297 default: gcc_unreachable ();
24299 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24302 case NEON_FLOAT_NARROW
:
24304 tree eltype
= NULL_TREE
;
24305 tree return_type
= NULL_TREE
;
24307 switch (insn_data
[d
->code
].operand
[1].mode
)
24310 eltype
= V4SF_type_node
;
24311 return_type
= V4HF_type_node
;
24313 default: gcc_unreachable ();
24315 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24320 tree eltype
= NULL_TREE
;
24321 switch (insn_data
[d
->code
].operand
[1].mode
)
24324 eltype
= V4UHI_type_node
;
24327 eltype
= V8UHI_type_node
;
24330 eltype
= V2USI_type_node
;
24333 eltype
= V4USI_type_node
;
24336 eltype
= V2UDI_type_node
;
24338 default: gcc_unreachable ();
24340 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24343 case NEON_COPYSIGNF
:
24345 tree eltype
= NULL_TREE
;
24346 switch (insn_data
[d
->code
].operand
[1].mode
)
24349 eltype
= V2SF_type_node
;
24352 eltype
= V4SF_type_node
;
24354 default: gcc_unreachable ();
24356 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24360 gcc_unreachable ();
24363 gcc_assert (ftype
!= NULL
);
24365 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
24367 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
24369 arm_builtin_decls
[fcode
] = decl
;
24373 #undef NUM_DREG_TYPES
24374 #undef NUM_QREG_TYPES
24376 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24379 if ((MASK) & insn_flags) \
24382 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24383 BUILT_IN_MD, NULL, NULL_TREE); \
24384 arm_builtin_decls[CODE] = bdecl; \
24389 struct builtin_description
24391 const unsigned int mask
;
24392 const enum insn_code icode
;
24393 const char * const name
;
24394 const enum arm_builtins code
;
24395 const enum rtx_code comparison
;
24396 const unsigned int flag
;
24399 static const struct builtin_description bdesc_2arg
[] =
24401 #define IWMMXT_BUILTIN(code, string, builtin) \
24402 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24403 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24405 #define IWMMXT2_BUILTIN(code, string, builtin) \
24406 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24407 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24409 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
24410 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
24411 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
24412 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
24413 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
24414 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
24415 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
24416 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
24417 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
24418 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
24419 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
24420 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
24421 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
24422 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
24423 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
24424 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
24425 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
24426 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
24427 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
24428 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
24429 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
24430 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
24431 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
24432 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
24433 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
24434 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
24435 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
24436 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
24437 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
24438 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
24439 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
24440 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
24441 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
24442 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
24443 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
24444 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
24445 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
24446 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
24447 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
24448 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
24449 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
24450 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
24451 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
24452 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
24453 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
24454 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
24455 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
24456 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
24457 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
24458 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
24459 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
24460 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
24461 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
24462 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
24463 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
24464 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
24465 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
24466 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
24467 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
24468 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
24469 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
24470 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
24471 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
24472 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
24473 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
24474 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
24475 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
24476 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
24477 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
24478 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
24479 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
24480 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
24481 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24482 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24483 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24484 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24485 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24486 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24488 #define IWMMXT_BUILTIN2(code, builtin) \
24489 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24491 #define IWMMXT2_BUILTIN2(code, builtin) \
24492 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24494 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24495 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24496 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24497 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24498 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24499 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24500 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24501 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24502 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24503 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24506 #define FP_BUILTIN(L, U) \
24507 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24510 FP_BUILTIN (get_fpscr
, GET_FPSCR
)
24511 FP_BUILTIN (set_fpscr
, SET_FPSCR
)
24514 #define CRC32_BUILTIN(L, U) \
24515 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24517 CRC32_BUILTIN (crc32b
, CRC32B
)
24518 CRC32_BUILTIN (crc32h
, CRC32H
)
24519 CRC32_BUILTIN (crc32w
, CRC32W
)
24520 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24521 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24522 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24523 #undef CRC32_BUILTIN
24526 #define CRYPTO_BUILTIN(L, U) \
24527 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24532 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO1(L, U, R, A)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24542 static const struct builtin_description bdesc_1arg
[] =
24544 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24545 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24546 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24547 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24548 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24549 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24550 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24551 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24552 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24553 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24554 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24555 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24556 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24557 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24558 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24559 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24560 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24561 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24562 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24563 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24564 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24565 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24566 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24567 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24569 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24570 #define CRYPTO2(L, U, R, A1, A2)
24571 #define CRYPTO3(L, U, R, A1, A2, A3)
24572 #include "crypto.def"
24578 static const struct builtin_description bdesc_3arg
[] =
24580 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24581 #define CRYPTO1(L, U, R, A)
24582 #define CRYPTO2(L, U, R, A1, A2)
24583 #include "crypto.def"
24588 #undef CRYPTO_BUILTIN
24590 /* Set up all the iWMMXt builtins. This is not called if
24591 TARGET_IWMMXT is zero. */
24594 arm_init_iwmmxt_builtins (void)
24596 const struct builtin_description
* d
;
24599 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24600 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24601 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24603 tree v8qi_ftype_v8qi_v8qi_int
24604 = build_function_type_list (V8QI_type_node
,
24605 V8QI_type_node
, V8QI_type_node
,
24606 integer_type_node
, NULL_TREE
);
24607 tree v4hi_ftype_v4hi_int
24608 = build_function_type_list (V4HI_type_node
,
24609 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24610 tree v2si_ftype_v2si_int
24611 = build_function_type_list (V2SI_type_node
,
24612 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24613 tree v2si_ftype_di_di
24614 = build_function_type_list (V2SI_type_node
,
24615 long_long_integer_type_node
,
24616 long_long_integer_type_node
,
24618 tree di_ftype_di_int
24619 = build_function_type_list (long_long_integer_type_node
,
24620 long_long_integer_type_node
,
24621 integer_type_node
, NULL_TREE
);
24622 tree di_ftype_di_int_int
24623 = build_function_type_list (long_long_integer_type_node
,
24624 long_long_integer_type_node
,
24626 integer_type_node
, NULL_TREE
);
24627 tree int_ftype_v8qi
24628 = build_function_type_list (integer_type_node
,
24629 V8QI_type_node
, NULL_TREE
);
24630 tree int_ftype_v4hi
24631 = build_function_type_list (integer_type_node
,
24632 V4HI_type_node
, NULL_TREE
);
24633 tree int_ftype_v2si
24634 = build_function_type_list (integer_type_node
,
24635 V2SI_type_node
, NULL_TREE
);
24636 tree int_ftype_v8qi_int
24637 = build_function_type_list (integer_type_node
,
24638 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24639 tree int_ftype_v4hi_int
24640 = build_function_type_list (integer_type_node
,
24641 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24642 tree int_ftype_v2si_int
24643 = build_function_type_list (integer_type_node
,
24644 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24645 tree v8qi_ftype_v8qi_int_int
24646 = build_function_type_list (V8QI_type_node
,
24647 V8QI_type_node
, integer_type_node
,
24648 integer_type_node
, NULL_TREE
);
24649 tree v4hi_ftype_v4hi_int_int
24650 = build_function_type_list (V4HI_type_node
,
24651 V4HI_type_node
, integer_type_node
,
24652 integer_type_node
, NULL_TREE
);
24653 tree v2si_ftype_v2si_int_int
24654 = build_function_type_list (V2SI_type_node
,
24655 V2SI_type_node
, integer_type_node
,
24656 integer_type_node
, NULL_TREE
);
24657 /* Miscellaneous. */
24658 tree v8qi_ftype_v4hi_v4hi
24659 = build_function_type_list (V8QI_type_node
,
24660 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24661 tree v4hi_ftype_v2si_v2si
24662 = build_function_type_list (V4HI_type_node
,
24663 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24664 tree v8qi_ftype_v4hi_v8qi
24665 = build_function_type_list (V8QI_type_node
,
24666 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24667 tree v2si_ftype_v4hi_v4hi
24668 = build_function_type_list (V2SI_type_node
,
24669 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24670 tree v2si_ftype_v8qi_v8qi
24671 = build_function_type_list (V2SI_type_node
,
24672 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24673 tree v4hi_ftype_v4hi_di
24674 = build_function_type_list (V4HI_type_node
,
24675 V4HI_type_node
, long_long_integer_type_node
,
24677 tree v2si_ftype_v2si_di
24678 = build_function_type_list (V2SI_type_node
,
24679 V2SI_type_node
, long_long_integer_type_node
,
24682 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24683 tree int_ftype_void
24684 = build_function_type_list (integer_type_node
, NULL_TREE
);
24686 = build_function_type_list (long_long_integer_type_node
,
24687 V8QI_type_node
, NULL_TREE
);
24689 = build_function_type_list (long_long_integer_type_node
,
24690 V4HI_type_node
, NULL_TREE
);
24692 = build_function_type_list (long_long_integer_type_node
,
24693 V2SI_type_node
, NULL_TREE
);
24694 tree v2si_ftype_v4hi
24695 = build_function_type_list (V2SI_type_node
,
24696 V4HI_type_node
, NULL_TREE
);
24697 tree v4hi_ftype_v8qi
24698 = build_function_type_list (V4HI_type_node
,
24699 V8QI_type_node
, NULL_TREE
);
24700 tree v8qi_ftype_v8qi
24701 = build_function_type_list (V8QI_type_node
,
24702 V8QI_type_node
, NULL_TREE
);
24703 tree v4hi_ftype_v4hi
24704 = build_function_type_list (V4HI_type_node
,
24705 V4HI_type_node
, NULL_TREE
);
24706 tree v2si_ftype_v2si
24707 = build_function_type_list (V2SI_type_node
,
24708 V2SI_type_node
, NULL_TREE
);
24710 tree di_ftype_di_v4hi_v4hi
24711 = build_function_type_list (long_long_unsigned_type_node
,
24712 long_long_unsigned_type_node
,
24713 V4HI_type_node
, V4HI_type_node
,
24716 tree di_ftype_v4hi_v4hi
24717 = build_function_type_list (long_long_unsigned_type_node
,
24718 V4HI_type_node
,V4HI_type_node
,
24721 tree v2si_ftype_v2si_v4hi_v4hi
24722 = build_function_type_list (V2SI_type_node
,
24723 V2SI_type_node
, V4HI_type_node
,
24724 V4HI_type_node
, NULL_TREE
);
24726 tree v2si_ftype_v2si_v8qi_v8qi
24727 = build_function_type_list (V2SI_type_node
,
24728 V2SI_type_node
, V8QI_type_node
,
24729 V8QI_type_node
, NULL_TREE
);
24731 tree di_ftype_di_v2si_v2si
24732 = build_function_type_list (long_long_unsigned_type_node
,
24733 long_long_unsigned_type_node
,
24734 V2SI_type_node
, V2SI_type_node
,
24737 tree di_ftype_di_di_int
24738 = build_function_type_list (long_long_unsigned_type_node
,
24739 long_long_unsigned_type_node
,
24740 long_long_unsigned_type_node
,
24741 integer_type_node
, NULL_TREE
);
24743 tree void_ftype_int
24744 = build_function_type_list (void_type_node
,
24745 integer_type_node
, NULL_TREE
);
24747 tree v8qi_ftype_char
24748 = build_function_type_list (V8QI_type_node
,
24749 signed_char_type_node
, NULL_TREE
);
24751 tree v4hi_ftype_short
24752 = build_function_type_list (V4HI_type_node
,
24753 short_integer_type_node
, NULL_TREE
);
24755 tree v2si_ftype_int
24756 = build_function_type_list (V2SI_type_node
,
24757 integer_type_node
, NULL_TREE
);
24759 /* Normal vector binops. */
24760 tree v8qi_ftype_v8qi_v8qi
24761 = build_function_type_list (V8QI_type_node
,
24762 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24763 tree v4hi_ftype_v4hi_v4hi
24764 = build_function_type_list (V4HI_type_node
,
24765 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24766 tree v2si_ftype_v2si_v2si
24767 = build_function_type_list (V2SI_type_node
,
24768 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24769 tree di_ftype_di_di
24770 = build_function_type_list (long_long_unsigned_type_node
,
24771 long_long_unsigned_type_node
,
24772 long_long_unsigned_type_node
,
24775 /* Add all builtins that are more or less simple operations on two
24777 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24779 /* Use one of the operands; the target can have a different mode for
24780 mask-generating compares. */
24784 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24787 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24792 type
= v8qi_ftype_v8qi_v8qi
;
24795 type
= v4hi_ftype_v4hi_v4hi
;
24798 type
= v2si_ftype_v2si_v2si
;
24801 type
= di_ftype_di_di
;
24805 gcc_unreachable ();
24808 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24811 /* Add the remaining MMX insns with somewhat more complicated types. */
24812 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24814 ARM_BUILTIN_ ## CODE)
24816 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24817 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24818 ARM_BUILTIN_ ## CODE)
24820 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24821 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24822 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24823 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24824 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24825 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24826 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24827 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24828 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24830 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24831 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24832 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24833 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24834 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24835 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24837 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24838 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24839 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24840 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24841 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24842 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24844 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24845 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24846 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24847 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24848 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24849 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24851 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24852 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24853 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24854 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24855 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24856 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24858 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24860 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24861 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24862 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24863 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24864 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24865 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24866 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24867 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24868 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24869 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24871 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24872 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24873 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24874 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24875 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24876 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24877 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24878 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24879 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24881 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24882 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24883 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24885 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24886 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24887 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24889 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24890 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24892 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24893 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24894 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24895 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24896 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24897 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24899 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24900 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24901 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24902 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24903 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24904 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24905 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24906 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24907 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24908 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24909 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24910 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24912 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24913 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24914 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24915 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24917 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24918 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24919 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24920 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24921 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24922 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24923 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24925 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24926 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24927 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24929 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24930 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24931 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24932 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24934 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24935 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24936 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24937 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24939 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24940 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24941 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24942 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24944 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24945 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24946 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24947 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24949 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24950 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24951 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24952 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24954 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24955 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24956 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24957 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24959 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24961 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24962 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24963 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24965 #undef iwmmx_mbuiltin
24966 #undef iwmmx2_mbuiltin
24970 arm_init_fp16_builtins (void)
24972 tree fp16_type
= make_node (REAL_TYPE
);
24973 TYPE_PRECISION (fp16_type
) = 16;
24974 layout_type (fp16_type
);
24975 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24979 arm_init_crc32_builtins ()
24981 tree si_ftype_si_qi
24982 = build_function_type_list (unsigned_intSI_type_node
,
24983 unsigned_intSI_type_node
,
24984 unsigned_intQI_type_node
, NULL_TREE
);
24985 tree si_ftype_si_hi
24986 = build_function_type_list (unsigned_intSI_type_node
,
24987 unsigned_intSI_type_node
,
24988 unsigned_intHI_type_node
, NULL_TREE
);
24989 tree si_ftype_si_si
24990 = build_function_type_list (unsigned_intSI_type_node
,
24991 unsigned_intSI_type_node
,
24992 unsigned_intSI_type_node
, NULL_TREE
);
24994 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24995 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24996 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24997 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24998 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24999 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25000 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
25001 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
25002 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25003 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
25004 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
25005 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25006 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
25007 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
25008 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25009 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
25010 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
25011 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25015 arm_init_builtins (void)
25017 if (TARGET_REALLY_IWMMXT
)
25018 arm_init_iwmmxt_builtins ();
25021 arm_init_neon_builtins ();
25023 if (arm_fp16_format
)
25024 arm_init_fp16_builtins ();
25027 arm_init_crc32_builtins ();
25029 if (TARGET_VFP
&& TARGET_HARD_FLOAT
)
25031 tree ftype_set_fpscr
25032 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL
);
25033 tree ftype_get_fpscr
25034 = build_function_type_list (unsigned_type_node
, NULL
);
25036 arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
]
25037 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr
,
25038 ARM_BUILTIN_GET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25039 arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
]
25040 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr
,
25041 ARM_BUILTIN_SET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25045 /* Return the ARM builtin for CODE. */
25048 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
25050 if (code
>= ARM_BUILTIN_MAX
)
25051 return error_mark_node
;
25053 return arm_builtin_decls
[code
];
25056 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25058 static const char *
25059 arm_invalid_parameter_type (const_tree t
)
25061 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25062 return N_("function parameters cannot have __fp16 type");
25066 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25068 static const char *
25069 arm_invalid_return_type (const_tree t
)
25071 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25072 return N_("functions cannot return __fp16 type");
25076 /* Implement TARGET_PROMOTED_TYPE. */
25079 arm_promoted_type (const_tree t
)
25081 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25082 return float_type_node
;
25086 /* Implement TARGET_CONVERT_TO_TYPE.
25087 Specifically, this hook implements the peculiarity of the ARM
25088 half-precision floating-point C semantics that requires conversions between
25089 __fp16 to or from double to do an intermediate conversion to float. */
25092 arm_convert_to_type (tree type
, tree expr
)
25094 tree fromtype
= TREE_TYPE (expr
);
25095 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
25097 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
25098 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
25099 return convert (type
, convert (float_type_node
, expr
));
25103 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25104 This simply adds HFmode as a supported mode; even though we don't
25105 implement arithmetic on this type directly, it's supported by
25106 optabs conversions, much the way the double-word arithmetic is
25107 special-cased in the default hook. */
25110 arm_scalar_mode_supported_p (machine_mode mode
)
25112 if (mode
== HFmode
)
25113 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25114 else if (ALL_FIXED_POINT_MODE_P (mode
))
25117 return default_scalar_mode_supported_p (mode
);
25120 /* Errors in the source file can cause expand_expr to return const0_rtx
25121 where we expect a vector. To avoid crashing, use one of the vector
25122 clear instructions. */
25125 safe_vector_operand (rtx x
, machine_mode mode
)
25127 if (x
!= const0_rtx
)
25129 x
= gen_reg_rtx (mode
);
25131 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
25132 : gen_rtx_SUBREG (DImode
, x
, 0)));
25136 /* Function to expand ternary builtins. */
25138 arm_expand_ternop_builtin (enum insn_code icode
,
25139 tree exp
, rtx target
)
25142 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25143 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25144 tree arg2
= CALL_EXPR_ARG (exp
, 2);
25146 rtx op0
= expand_normal (arg0
);
25147 rtx op1
= expand_normal (arg1
);
25148 rtx op2
= expand_normal (arg2
);
25149 rtx op3
= NULL_RTX
;
25151 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25152 lane operand depending on endianness. */
25153 bool builtin_sha1cpm_p
= false;
25155 if (insn_data
[icode
].n_operands
== 5)
25157 gcc_assert (icode
== CODE_FOR_crypto_sha1c
25158 || icode
== CODE_FOR_crypto_sha1p
25159 || icode
== CODE_FOR_crypto_sha1m
);
25160 builtin_sha1cpm_p
= true;
25162 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25163 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25164 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25165 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
25168 if (VECTOR_MODE_P (mode0
))
25169 op0
= safe_vector_operand (op0
, mode0
);
25170 if (VECTOR_MODE_P (mode1
))
25171 op1
= safe_vector_operand (op1
, mode1
);
25172 if (VECTOR_MODE_P (mode2
))
25173 op2
= safe_vector_operand (op2
, mode2
);
25176 || GET_MODE (target
) != tmode
25177 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25178 target
= gen_reg_rtx (tmode
);
25180 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25181 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
25182 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
25184 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25185 op0
= copy_to_mode_reg (mode0
, op0
);
25186 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25187 op1
= copy_to_mode_reg (mode1
, op1
);
25188 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25189 op2
= copy_to_mode_reg (mode2
, op2
);
25190 if (builtin_sha1cpm_p
)
25191 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25193 if (builtin_sha1cpm_p
)
25194 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
25196 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25203 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25206 arm_expand_binop_builtin (enum insn_code icode
,
25207 tree exp
, rtx target
)
25210 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25211 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25212 rtx op0
= expand_normal (arg0
);
25213 rtx op1
= expand_normal (arg1
);
25214 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25215 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25216 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25218 if (VECTOR_MODE_P (mode0
))
25219 op0
= safe_vector_operand (op0
, mode0
);
25220 if (VECTOR_MODE_P (mode1
))
25221 op1
= safe_vector_operand (op1
, mode1
);
25224 || GET_MODE (target
) != tmode
25225 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25226 target
= gen_reg_rtx (tmode
);
25228 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25229 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
25231 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25232 op0
= copy_to_mode_reg (mode0
, op0
);
25233 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25234 op1
= copy_to_mode_reg (mode1
, op1
);
25236 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25243 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25246 arm_expand_unop_builtin (enum insn_code icode
,
25247 tree exp
, rtx target
, int do_load
)
25250 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25251 rtx op0
= expand_normal (arg0
);
25252 rtx op1
= NULL_RTX
;
25253 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25254 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25255 bool builtin_sha1h_p
= false;
25257 if (insn_data
[icode
].n_operands
== 3)
25259 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
25260 builtin_sha1h_p
= true;
25264 || GET_MODE (target
) != tmode
25265 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25266 target
= gen_reg_rtx (tmode
);
25268 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
25271 if (VECTOR_MODE_P (mode0
))
25272 op0
= safe_vector_operand (op0
, mode0
);
25274 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25275 op0
= copy_to_mode_reg (mode0
, op0
);
25277 if (builtin_sha1h_p
)
25278 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25280 if (builtin_sha1h_p
)
25281 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25283 pat
= GEN_FCN (icode
) (target
, op0
);
25291 NEON_ARG_COPY_TO_REG
,
25297 #define NEON_MAX_BUILTIN_ARGS 5
25299 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25300 and return an expression for the accessed memory.
25302 The intrinsic function operates on a block of registers that has
25303 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25304 function references the memory at EXP of type TYPE and in mode
25305 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25309 neon_dereference_pointer (tree exp
, tree type
, machine_mode mem_mode
,
25310 machine_mode reg_mode
,
25311 neon_builtin_type_mode type_mode
)
25313 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
25314 tree elem_type
, upper_bound
, array_type
;
25316 /* Work out the size of the register block in bytes. */
25317 reg_size
= GET_MODE_SIZE (reg_mode
);
25319 /* Work out the size of each vector in bytes. */
25320 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
25321 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
25323 /* Work out how many vectors there are. */
25324 gcc_assert (reg_size
% vector_size
== 0);
25325 nvectors
= reg_size
/ vector_size
;
25327 /* Work out the type of each element. */
25328 gcc_assert (POINTER_TYPE_P (type
));
25329 elem_type
= TREE_TYPE (type
);
25331 /* Work out how many elements are being loaded or stored.
25332 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25333 and memory elements; anything else implies a lane load or store. */
25334 if (mem_mode
== reg_mode
)
25335 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
25339 /* Create a type that describes the full access. */
25340 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
25341 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
25343 /* Dereference EXP using that type. */
25344 return fold_build2 (MEM_REF
, array_type
, exp
,
25345 build_int_cst (build_pointer_type (array_type
), 0));
25348 /* Expand a Neon builtin. */
25350 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
25351 neon_builtin_type_mode type_mode
,
25352 tree exp
, int fcode
, ...)
25356 tree arg
[NEON_MAX_BUILTIN_ARGS
];
25357 rtx op
[NEON_MAX_BUILTIN_ARGS
];
25360 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25361 machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
25362 machine_mode other_mode
;
25368 || GET_MODE (target
) != tmode
25369 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
25370 target
= gen_reg_rtx (tmode
);
25372 va_start (ap
, fcode
);
25374 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
25378 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
25380 if (thisarg
== NEON_ARG_STOP
)
25384 opno
= argc
+ have_retval
;
25385 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
25386 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
25387 arg_type
= TREE_VALUE (formals
);
25388 if (thisarg
== NEON_ARG_MEMORY
)
25390 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
25391 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
25392 mode
[argc
], other_mode
,
25396 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25398 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
25399 (thisarg
== NEON_ARG_MEMORY
25400 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
25404 case NEON_ARG_COPY_TO_REG
:
25405 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25406 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25407 (op
[argc
], mode
[argc
]))
25408 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
25411 case NEON_ARG_CONSTANT
:
25412 /* FIXME: This error message is somewhat unhelpful. */
25413 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25414 (op
[argc
], mode
[argc
]))
25415 error ("argument must be a constant");
25418 case NEON_ARG_MEMORY
:
25419 /* Check if expand failed. */
25420 if (op
[argc
] == const0_rtx
)
25422 gcc_assert (MEM_P (op
[argc
]));
25423 PUT_MODE (op
[argc
], mode
[argc
]);
25424 /* ??? arm_neon.h uses the same built-in functions for signed
25425 and unsigned accesses, casting where necessary. This isn't
25427 set_mem_alias_set (op
[argc
], 0);
25428 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25429 (op
[argc
], mode
[argc
]))
25430 op
[argc
] = (replace_equiv_address
25431 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
25434 case NEON_ARG_STOP
:
25435 gcc_unreachable ();
25439 formals
= TREE_CHAIN (formals
);
25449 pat
= GEN_FCN (icode
) (target
, op
[0]);
25453 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
25457 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
25461 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
25465 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
25469 gcc_unreachable ();
25475 pat
= GEN_FCN (icode
) (op
[0]);
25479 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
25483 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
25487 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
25491 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
25495 gcc_unreachable ();
25506 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25507 constants defined per-instruction or per instruction-variant. Instead, the
25508 required info is looked up in the table neon_builtin_data. */
25510 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25512 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25513 neon_itype itype
= d
->itype
;
25514 enum insn_code icode
= d
->code
;
25515 neon_builtin_type_mode type_mode
= d
->mode
;
25522 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25523 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25527 case NEON_SCALARMUL
:
25528 case NEON_SCALARMULL
:
25529 case NEON_SCALARMULH
:
25530 case NEON_SHIFTINSERT
:
25531 case NEON_LOGICBINOP
:
25532 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25533 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25537 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25538 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25539 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25543 case NEON_SHIFTIMM
:
25544 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25545 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25549 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25550 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25555 case NEON_FLOAT_WIDEN
:
25556 case NEON_FLOAT_NARROW
:
25558 case NEON_REINTERP
:
25559 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25560 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25562 case NEON_COPYSIGNF
:
25565 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25566 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25569 case NEON_LANEMULL
:
25570 case NEON_LANEMULH
:
25571 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25572 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25573 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25576 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25577 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25578 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25580 case NEON_SHIFTACC
:
25581 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25582 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25583 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25585 case NEON_SCALARMAC
:
25586 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25587 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25588 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25592 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25593 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25597 case NEON_LOADSTRUCT
:
25598 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25599 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25601 case NEON_LOAD1LANE
:
25602 case NEON_LOADSTRUCTLANE
:
25603 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25604 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25608 case NEON_STORESTRUCT
:
25609 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25610 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25612 case NEON_STORE1LANE
:
25613 case NEON_STORESTRUCTLANE
:
25614 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25615 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25619 gcc_unreachable ();
25622 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25624 neon_reinterpret (rtx dest
, rtx src
)
25626 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25629 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25630 not to early-clobber SRC registers in the process.
25632 We assume that the operands described by SRC and DEST represent a
25633 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25634 number of components into which the copy has been decomposed. */
25636 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25640 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25641 || REGNO (operands
[0]) < REGNO (operands
[1]))
25643 for (i
= 0; i
< count
; i
++)
25645 operands
[2 * i
] = dest
[i
];
25646 operands
[2 * i
+ 1] = src
[i
];
25651 for (i
= 0; i
< count
; i
++)
25653 operands
[2 * i
] = dest
[count
- i
- 1];
25654 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25659 /* Split operands into moves from op[1] + op[2] into op[0]. */
25662 neon_split_vcombine (rtx operands
[3])
25664 unsigned int dest
= REGNO (operands
[0]);
25665 unsigned int src1
= REGNO (operands
[1]);
25666 unsigned int src2
= REGNO (operands
[2]);
25667 machine_mode halfmode
= GET_MODE (operands
[1]);
25668 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25669 rtx destlo
, desthi
;
25671 if (src1
== dest
&& src2
== dest
+ halfregs
)
25673 /* No-op move. Can't split to nothing; emit something. */
25674 emit_note (NOTE_INSN_DELETED
);
25678 /* Preserve register attributes for variable tracking. */
25679 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25680 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25681 GET_MODE_SIZE (halfmode
));
25683 /* Special case of reversed high/low parts. Use VSWP. */
25684 if (src2
== dest
&& src1
== dest
+ halfregs
)
25686 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25687 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25688 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25692 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25694 /* Try to avoid unnecessary moves if part of the result
25695 is in the right place already. */
25697 emit_move_insn (destlo
, operands
[1]);
25698 if (src2
!= dest
+ halfregs
)
25699 emit_move_insn (desthi
, operands
[2]);
25703 if (src2
!= dest
+ halfregs
)
25704 emit_move_insn (desthi
, operands
[2]);
25706 emit_move_insn (destlo
, operands
[1]);
25710 /* Expand an expression EXP that calls a built-in function,
25711 with result going to TARGET if that's convenient
25712 (and in mode MODE if that's convenient).
25713 SUBTARGET may be used as the target for computing one of EXP's operands.
25714 IGNORE is nonzero if the value is to be ignored. */
25717 arm_expand_builtin (tree exp
,
25719 rtx subtarget ATTRIBUTE_UNUSED
,
25720 machine_mode mode ATTRIBUTE_UNUSED
,
25721 int ignore ATTRIBUTE_UNUSED
)
25723 const struct builtin_description
* d
;
25724 enum insn_code icode
;
25725 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25733 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25735 machine_mode tmode
;
25736 machine_mode mode0
;
25737 machine_mode mode1
;
25738 machine_mode mode2
;
25744 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25745 return arm_expand_neon_builtin (fcode
, exp
, target
);
25749 case ARM_BUILTIN_GET_FPSCR
:
25750 case ARM_BUILTIN_SET_FPSCR
:
25751 if (fcode
== ARM_BUILTIN_GET_FPSCR
)
25753 icode
= CODE_FOR_get_fpscr
;
25754 target
= gen_reg_rtx (SImode
);
25755 pat
= GEN_FCN (icode
) (target
);
25760 icode
= CODE_FOR_set_fpscr
;
25761 arg0
= CALL_EXPR_ARG (exp
, 0);
25762 op0
= expand_normal (arg0
);
25763 pat
= GEN_FCN (icode
) (op0
);
25768 case ARM_BUILTIN_TEXTRMSB
:
25769 case ARM_BUILTIN_TEXTRMUB
:
25770 case ARM_BUILTIN_TEXTRMSH
:
25771 case ARM_BUILTIN_TEXTRMUH
:
25772 case ARM_BUILTIN_TEXTRMSW
:
25773 case ARM_BUILTIN_TEXTRMUW
:
25774 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25775 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25776 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25777 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25778 : CODE_FOR_iwmmxt_textrmw
);
25780 arg0
= CALL_EXPR_ARG (exp
, 0);
25781 arg1
= CALL_EXPR_ARG (exp
, 1);
25782 op0
= expand_normal (arg0
);
25783 op1
= expand_normal (arg1
);
25784 tmode
= insn_data
[icode
].operand
[0].mode
;
25785 mode0
= insn_data
[icode
].operand
[1].mode
;
25786 mode1
= insn_data
[icode
].operand
[2].mode
;
25788 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25789 op0
= copy_to_mode_reg (mode0
, op0
);
25790 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25792 /* @@@ better error message */
25793 error ("selector must be an immediate");
25794 return gen_reg_rtx (tmode
);
25797 opint
= INTVAL (op1
);
25798 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25800 if (opint
> 7 || opint
< 0)
25801 error ("the range of selector should be in 0 to 7");
25803 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25805 if (opint
> 3 || opint
< 0)
25806 error ("the range of selector should be in 0 to 3");
25808 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25810 if (opint
> 1 || opint
< 0)
25811 error ("the range of selector should be in 0 to 1");
25815 || GET_MODE (target
) != tmode
25816 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25817 target
= gen_reg_rtx (tmode
);
25818 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25824 case ARM_BUILTIN_WALIGNI
:
25825 /* If op2 is immediate, call walighi, else call walighr. */
25826 arg0
= CALL_EXPR_ARG (exp
, 0);
25827 arg1
= CALL_EXPR_ARG (exp
, 1);
25828 arg2
= CALL_EXPR_ARG (exp
, 2);
25829 op0
= expand_normal (arg0
);
25830 op1
= expand_normal (arg1
);
25831 op2
= expand_normal (arg2
);
25832 if (CONST_INT_P (op2
))
25834 icode
= CODE_FOR_iwmmxt_waligni
;
25835 tmode
= insn_data
[icode
].operand
[0].mode
;
25836 mode0
= insn_data
[icode
].operand
[1].mode
;
25837 mode1
= insn_data
[icode
].operand
[2].mode
;
25838 mode2
= insn_data
[icode
].operand
[3].mode
;
25839 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25840 op0
= copy_to_mode_reg (mode0
, op0
);
25841 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25842 op1
= copy_to_mode_reg (mode1
, op1
);
25843 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25844 selector
= INTVAL (op2
);
25845 if (selector
> 7 || selector
< 0)
25846 error ("the range of selector should be in 0 to 7");
25850 icode
= CODE_FOR_iwmmxt_walignr
;
25851 tmode
= insn_data
[icode
].operand
[0].mode
;
25852 mode0
= insn_data
[icode
].operand
[1].mode
;
25853 mode1
= insn_data
[icode
].operand
[2].mode
;
25854 mode2
= insn_data
[icode
].operand
[3].mode
;
25855 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25856 op0
= copy_to_mode_reg (mode0
, op0
);
25857 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25858 op1
= copy_to_mode_reg (mode1
, op1
);
25859 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25860 op2
= copy_to_mode_reg (mode2
, op2
);
25863 || GET_MODE (target
) != tmode
25864 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25865 target
= gen_reg_rtx (tmode
);
25866 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25872 case ARM_BUILTIN_TINSRB
:
25873 case ARM_BUILTIN_TINSRH
:
25874 case ARM_BUILTIN_TINSRW
:
25875 case ARM_BUILTIN_WMERGE
:
25876 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25877 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25878 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25879 : CODE_FOR_iwmmxt_tinsrw
);
25880 arg0
= CALL_EXPR_ARG (exp
, 0);
25881 arg1
= CALL_EXPR_ARG (exp
, 1);
25882 arg2
= CALL_EXPR_ARG (exp
, 2);
25883 op0
= expand_normal (arg0
);
25884 op1
= expand_normal (arg1
);
25885 op2
= expand_normal (arg2
);
25886 tmode
= insn_data
[icode
].operand
[0].mode
;
25887 mode0
= insn_data
[icode
].operand
[1].mode
;
25888 mode1
= insn_data
[icode
].operand
[2].mode
;
25889 mode2
= insn_data
[icode
].operand
[3].mode
;
25891 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25892 op0
= copy_to_mode_reg (mode0
, op0
);
25893 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25894 op1
= copy_to_mode_reg (mode1
, op1
);
25895 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25897 error ("selector must be an immediate");
25900 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25902 selector
= INTVAL (op2
);
25903 if (selector
> 7 || selector
< 0)
25904 error ("the range of selector should be in 0 to 7");
25906 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25907 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25908 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25911 selector
= INTVAL (op2
);
25912 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25913 error ("the range of selector should be in 0 to 7");
25914 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25915 error ("the range of selector should be in 0 to 3");
25916 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25917 error ("the range of selector should be in 0 to 1");
25919 op2
= GEN_INT (mask
);
25922 || GET_MODE (target
) != tmode
25923 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25924 target
= gen_reg_rtx (tmode
);
25925 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25931 case ARM_BUILTIN_SETWCGR0
:
25932 case ARM_BUILTIN_SETWCGR1
:
25933 case ARM_BUILTIN_SETWCGR2
:
25934 case ARM_BUILTIN_SETWCGR3
:
25935 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25936 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25937 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25938 : CODE_FOR_iwmmxt_setwcgr3
);
25939 arg0
= CALL_EXPR_ARG (exp
, 0);
25940 op0
= expand_normal (arg0
);
25941 mode0
= insn_data
[icode
].operand
[0].mode
;
25942 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25943 op0
= copy_to_mode_reg (mode0
, op0
);
25944 pat
= GEN_FCN (icode
) (op0
);
25950 case ARM_BUILTIN_GETWCGR0
:
25951 case ARM_BUILTIN_GETWCGR1
:
25952 case ARM_BUILTIN_GETWCGR2
:
25953 case ARM_BUILTIN_GETWCGR3
:
25954 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25955 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25956 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25957 : CODE_FOR_iwmmxt_getwcgr3
);
25958 tmode
= insn_data
[icode
].operand
[0].mode
;
25960 || GET_MODE (target
) != tmode
25961 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25962 target
= gen_reg_rtx (tmode
);
25963 pat
= GEN_FCN (icode
) (target
);
25969 case ARM_BUILTIN_WSHUFH
:
25970 icode
= CODE_FOR_iwmmxt_wshufh
;
25971 arg0
= CALL_EXPR_ARG (exp
, 0);
25972 arg1
= CALL_EXPR_ARG (exp
, 1);
25973 op0
= expand_normal (arg0
);
25974 op1
= expand_normal (arg1
);
25975 tmode
= insn_data
[icode
].operand
[0].mode
;
25976 mode1
= insn_data
[icode
].operand
[1].mode
;
25977 mode2
= insn_data
[icode
].operand
[2].mode
;
25979 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25980 op0
= copy_to_mode_reg (mode1
, op0
);
25981 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25983 error ("mask must be an immediate");
25986 selector
= INTVAL (op1
);
25987 if (selector
< 0 || selector
> 255)
25988 error ("the range of mask should be in 0 to 255");
25990 || GET_MODE (target
) != tmode
25991 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25992 target
= gen_reg_rtx (tmode
);
25993 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25999 case ARM_BUILTIN_WMADDS
:
26000 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
26001 case ARM_BUILTIN_WMADDSX
:
26002 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
26003 case ARM_BUILTIN_WMADDSN
:
26004 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
26005 case ARM_BUILTIN_WMADDU
:
26006 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
26007 case ARM_BUILTIN_WMADDUX
:
26008 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
26009 case ARM_BUILTIN_WMADDUN
:
26010 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
26011 case ARM_BUILTIN_WSADBZ
:
26012 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
26013 case ARM_BUILTIN_WSADHZ
:
26014 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
26016 /* Several three-argument builtins. */
26017 case ARM_BUILTIN_WMACS
:
26018 case ARM_BUILTIN_WMACU
:
26019 case ARM_BUILTIN_TMIA
:
26020 case ARM_BUILTIN_TMIAPH
:
26021 case ARM_BUILTIN_TMIATT
:
26022 case ARM_BUILTIN_TMIATB
:
26023 case ARM_BUILTIN_TMIABT
:
26024 case ARM_BUILTIN_TMIABB
:
26025 case ARM_BUILTIN_WQMIABB
:
26026 case ARM_BUILTIN_WQMIABT
:
26027 case ARM_BUILTIN_WQMIATB
:
26028 case ARM_BUILTIN_WQMIATT
:
26029 case ARM_BUILTIN_WQMIABBN
:
26030 case ARM_BUILTIN_WQMIABTN
:
26031 case ARM_BUILTIN_WQMIATBN
:
26032 case ARM_BUILTIN_WQMIATTN
:
26033 case ARM_BUILTIN_WMIABB
:
26034 case ARM_BUILTIN_WMIABT
:
26035 case ARM_BUILTIN_WMIATB
:
26036 case ARM_BUILTIN_WMIATT
:
26037 case ARM_BUILTIN_WMIABBN
:
26038 case ARM_BUILTIN_WMIABTN
:
26039 case ARM_BUILTIN_WMIATBN
:
26040 case ARM_BUILTIN_WMIATTN
:
26041 case ARM_BUILTIN_WMIAWBB
:
26042 case ARM_BUILTIN_WMIAWBT
:
26043 case ARM_BUILTIN_WMIAWTB
:
26044 case ARM_BUILTIN_WMIAWTT
:
26045 case ARM_BUILTIN_WMIAWBBN
:
26046 case ARM_BUILTIN_WMIAWBTN
:
26047 case ARM_BUILTIN_WMIAWTBN
:
26048 case ARM_BUILTIN_WMIAWTTN
:
26049 case ARM_BUILTIN_WSADB
:
26050 case ARM_BUILTIN_WSADH
:
26051 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
26052 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
26053 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
26054 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
26055 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
26056 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
26057 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
26058 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
26059 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
26060 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
26061 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
26062 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
26063 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
26064 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
26065 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
26066 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
26067 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
26068 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
26069 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
26070 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
26071 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
26072 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
26073 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
26074 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
26075 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
26076 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
26077 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
26078 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
26079 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
26080 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
26081 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
26082 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
26083 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
26084 : CODE_FOR_iwmmxt_wsadh
);
26085 arg0
= CALL_EXPR_ARG (exp
, 0);
26086 arg1
= CALL_EXPR_ARG (exp
, 1);
26087 arg2
= CALL_EXPR_ARG (exp
, 2);
26088 op0
= expand_normal (arg0
);
26089 op1
= expand_normal (arg1
);
26090 op2
= expand_normal (arg2
);
26091 tmode
= insn_data
[icode
].operand
[0].mode
;
26092 mode0
= insn_data
[icode
].operand
[1].mode
;
26093 mode1
= insn_data
[icode
].operand
[2].mode
;
26094 mode2
= insn_data
[icode
].operand
[3].mode
;
26096 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
26097 op0
= copy_to_mode_reg (mode0
, op0
);
26098 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
26099 op1
= copy_to_mode_reg (mode1
, op1
);
26100 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
26101 op2
= copy_to_mode_reg (mode2
, op2
);
26103 || GET_MODE (target
) != tmode
26104 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
26105 target
= gen_reg_rtx (tmode
);
26106 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
26112 case ARM_BUILTIN_WZERO
:
26113 target
= gen_reg_rtx (DImode
);
26114 emit_insn (gen_iwmmxt_clrdi (target
));
26117 case ARM_BUILTIN_WSRLHI
:
26118 case ARM_BUILTIN_WSRLWI
:
26119 case ARM_BUILTIN_WSRLDI
:
26120 case ARM_BUILTIN_WSLLHI
:
26121 case ARM_BUILTIN_WSLLWI
:
26122 case ARM_BUILTIN_WSLLDI
:
26123 case ARM_BUILTIN_WSRAHI
:
26124 case ARM_BUILTIN_WSRAWI
:
26125 case ARM_BUILTIN_WSRADI
:
26126 case ARM_BUILTIN_WRORHI
:
26127 case ARM_BUILTIN_WRORWI
:
26128 case ARM_BUILTIN_WRORDI
:
26129 case ARM_BUILTIN_WSRLH
:
26130 case ARM_BUILTIN_WSRLW
:
26131 case ARM_BUILTIN_WSRLD
:
26132 case ARM_BUILTIN_WSLLH
:
26133 case ARM_BUILTIN_WSLLW
:
26134 case ARM_BUILTIN_WSLLD
:
26135 case ARM_BUILTIN_WSRAH
:
26136 case ARM_BUILTIN_WSRAW
:
26137 case ARM_BUILTIN_WSRAD
:
26138 case ARM_BUILTIN_WRORH
:
26139 case ARM_BUILTIN_WRORW
:
26140 case ARM_BUILTIN_WRORD
:
26141 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
26142 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
26143 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
26144 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
26145 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
26146 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
26147 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
26148 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
26149 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
26150 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
26151 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
26152 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
26153 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
26154 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
26155 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
26156 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
26157 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
26158 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
26159 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
26160 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
26161 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
26162 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
26163 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
26164 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
26165 : CODE_FOR_nothing
);
26166 arg1
= CALL_EXPR_ARG (exp
, 1);
26167 op1
= expand_normal (arg1
);
26168 if (GET_MODE (op1
) == VOIDmode
)
26170 imm
= INTVAL (op1
);
26171 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
26172 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
26173 && (imm
< 0 || imm
> 32))
26175 if (fcode
== ARM_BUILTIN_WRORHI
)
26176 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26177 else if (fcode
== ARM_BUILTIN_WRORWI
)
26178 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26179 else if (fcode
== ARM_BUILTIN_WRORH
)
26180 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26182 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26184 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
26185 && (imm
< 0 || imm
> 64))
26187 if (fcode
== ARM_BUILTIN_WRORDI
)
26188 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26190 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26194 if (fcode
== ARM_BUILTIN_WSRLHI
)
26195 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26196 else if (fcode
== ARM_BUILTIN_WSRLWI
)
26197 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26198 else if (fcode
== ARM_BUILTIN_WSRLDI
)
26199 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26200 else if (fcode
== ARM_BUILTIN_WSLLHI
)
26201 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26202 else if (fcode
== ARM_BUILTIN_WSLLWI
)
26203 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26204 else if (fcode
== ARM_BUILTIN_WSLLDI
)
26205 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26206 else if (fcode
== ARM_BUILTIN_WSRAHI
)
26207 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26208 else if (fcode
== ARM_BUILTIN_WSRAWI
)
26209 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26210 else if (fcode
== ARM_BUILTIN_WSRADI
)
26211 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26212 else if (fcode
== ARM_BUILTIN_WSRLH
)
26213 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26214 else if (fcode
== ARM_BUILTIN_WSRLW
)
26215 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26216 else if (fcode
== ARM_BUILTIN_WSRLD
)
26217 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26218 else if (fcode
== ARM_BUILTIN_WSLLH
)
26219 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26220 else if (fcode
== ARM_BUILTIN_WSLLW
)
26221 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26222 else if (fcode
== ARM_BUILTIN_WSLLD
)
26223 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26224 else if (fcode
== ARM_BUILTIN_WSRAH
)
26225 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26226 else if (fcode
== ARM_BUILTIN_WSRAW
)
26227 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26229 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26232 return arm_expand_binop_builtin (icode
, exp
, target
);
26238 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
26239 if (d
->code
== (const enum arm_builtins
) fcode
)
26240 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
26242 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
26243 if (d
->code
== (const enum arm_builtins
) fcode
)
26244 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
26246 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
26247 if (d
->code
== (const enum arm_builtins
) fcode
)
26248 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
26250 /* @@@ Should really do something sensible here. */
26254 /* Return the number (counting from 0) of
26255 the least significant set bit in MASK. */
26258 number_of_first_bit_set (unsigned mask
)
26260 return ctz_hwi (mask
);
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264 registers to be described as saved. MASK is the set of registers
26265 to be saved; REAL_REGS is the set of registers to be described as
26266 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26269 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26271 unsigned long regno
;
26272 rtx par
[10], tmp
, reg
;
26276 /* Build the parallel of the registers actually being stored. */
26277 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26279 regno
= ctz_hwi (mask
);
26280 reg
= gen_rtx_REG (SImode
, regno
);
26283 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26285 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26290 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26291 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26292 tmp
= gen_frame_mem (BLKmode
, tmp
);
26293 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
26296 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26297 insn
= emit_insn (tmp
);
26299 /* Always build the stack adjustment note for unwind info. */
26300 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26301 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
26304 /* Build the parallel of the registers recorded as saved for unwind. */
26305 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26307 regno
= ctz_hwi (real_regs
);
26308 reg
= gen_rtx_REG (SImode
, regno
);
26310 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26311 tmp
= gen_frame_mem (SImode
, tmp
);
26312 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
26313 RTX_FRAME_RELATED_P (tmp
) = 1;
26321 RTX_FRAME_RELATED_P (par
[0]) = 1;
26322 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26325 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26330 /* Emit code to push or pop registers to or from the stack. F is the
26331 assembly file. MASK is the registers to pop. */
26333 thumb_pop (FILE *f
, unsigned long mask
)
26336 int lo_mask
= mask
& 0xFF;
26337 int pushed_words
= 0;
26341 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26343 /* Special case. Do not generate a POP PC statement here, do it in
26345 thumb_exit (f
, -1);
26349 fprintf (f
, "\tpop\t{");
26351 /* Look at the low registers first. */
26352 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26356 asm_fprintf (f
, "%r", regno
);
26358 if ((lo_mask
& ~1) != 0)
26365 if (mask
& (1 << PC_REGNUM
))
26367 /* Catch popping the PC. */
26368 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
26369 || crtl
->calls_eh_return
)
26371 /* The PC is never poped directly, instead
26372 it is popped into r3 and then BX is used. */
26373 fprintf (f
, "}\n");
26375 thumb_exit (f
, -1);
26384 asm_fprintf (f
, "%r", PC_REGNUM
);
26388 fprintf (f
, "}\n");
26391 /* Generate code to return from a thumb function.
26392 If 'reg_containing_return_addr' is -1, then the return address is
26393 actually on the stack, at the stack pointer. */
26395 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26397 unsigned regs_available_for_popping
;
26398 unsigned regs_to_pop
;
26400 unsigned available
;
26404 int restore_a4
= FALSE
;
26406 /* Compute the registers we need to pop. */
26410 if (reg_containing_return_addr
== -1)
26412 regs_to_pop
|= 1 << LR_REGNUM
;
26416 if (TARGET_BACKTRACE
)
26418 /* Restore the (ARM) frame pointer and stack pointer. */
26419 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26423 /* If there is nothing to pop then just emit the BX instruction and
26425 if (pops_needed
== 0)
26427 if (crtl
->calls_eh_return
)
26428 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26430 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26433 /* Otherwise if we are not supporting interworking and we have not created
26434 a backtrace structure and the function was not entered in ARM mode then
26435 just pop the return address straight into the PC. */
26436 else if (!TARGET_INTERWORK
26437 && !TARGET_BACKTRACE
26438 && !is_called_in_ARM_mode (current_function_decl
)
26439 && !crtl
->calls_eh_return
)
26441 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26445 /* Find out how many of the (return) argument registers we can corrupt. */
26446 regs_available_for_popping
= 0;
26448 /* If returning via __builtin_eh_return, the bottom three registers
26449 all contain information needed for the return. */
26450 if (crtl
->calls_eh_return
)
26454 /* If we can deduce the registers used from the function's
26455 return value. This is more reliable that examining
26456 df_regs_ever_live_p () because that will be set if the register is
26457 ever used in the function, not just if the register is used
26458 to hold a return value. */
26460 if (crtl
->return_rtx
!= 0)
26461 mode
= GET_MODE (crtl
->return_rtx
);
26463 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26465 size
= GET_MODE_SIZE (mode
);
26469 /* In a void function we can use any argument register.
26470 In a function that returns a structure on the stack
26471 we can use the second and third argument registers. */
26472 if (mode
== VOIDmode
)
26473 regs_available_for_popping
=
26474 (1 << ARG_REGISTER (1))
26475 | (1 << ARG_REGISTER (2))
26476 | (1 << ARG_REGISTER (3));
26478 regs_available_for_popping
=
26479 (1 << ARG_REGISTER (2))
26480 | (1 << ARG_REGISTER (3));
26482 else if (size
<= 4)
26483 regs_available_for_popping
=
26484 (1 << ARG_REGISTER (2))
26485 | (1 << ARG_REGISTER (3));
26486 else if (size
<= 8)
26487 regs_available_for_popping
=
26488 (1 << ARG_REGISTER (3));
26491 /* Match registers to be popped with registers into which we pop them. */
26492 for (available
= regs_available_for_popping
,
26493 required
= regs_to_pop
;
26494 required
!= 0 && available
!= 0;
26495 available
&= ~(available
& - available
),
26496 required
&= ~(required
& - required
))
26499 /* If we have any popping registers left over, remove them. */
26501 regs_available_for_popping
&= ~available
;
26503 /* Otherwise if we need another popping register we can use
26504 the fourth argument register. */
26505 else if (pops_needed
)
26507 /* If we have not found any free argument registers and
26508 reg a4 contains the return address, we must move it. */
26509 if (regs_available_for_popping
== 0
26510 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26512 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26513 reg_containing_return_addr
= LR_REGNUM
;
26515 else if (size
> 12)
26517 /* Register a4 is being used to hold part of the return value,
26518 but we have dire need of a free, low register. */
26521 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26524 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26526 /* The fourth argument register is available. */
26527 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26533 /* Pop as many registers as we can. */
26534 thumb_pop (f
, regs_available_for_popping
);
26536 /* Process the registers we popped. */
26537 if (reg_containing_return_addr
== -1)
26539 /* The return address was popped into the lowest numbered register. */
26540 regs_to_pop
&= ~(1 << LR_REGNUM
);
26542 reg_containing_return_addr
=
26543 number_of_first_bit_set (regs_available_for_popping
);
26545 /* Remove this register for the mask of available registers, so that
26546 the return address will not be corrupted by further pops. */
26547 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26550 /* If we popped other registers then handle them here. */
26551 if (regs_available_for_popping
)
26555 /* Work out which register currently contains the frame pointer. */
26556 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26558 /* Move it into the correct place. */
26559 asm_fprintf (f
, "\tmov\t%r, %r\n",
26560 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26562 /* (Temporarily) remove it from the mask of popped registers. */
26563 regs_available_for_popping
&= ~(1 << frame_pointer
);
26564 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26566 if (regs_available_for_popping
)
26570 /* We popped the stack pointer as well,
26571 find the register that contains it. */
26572 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26574 /* Move it into the stack register. */
26575 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26577 /* At this point we have popped all necessary registers, so
26578 do not worry about restoring regs_available_for_popping
26579 to its correct value:
26581 assert (pops_needed == 0)
26582 assert (regs_available_for_popping == (1 << frame_pointer))
26583 assert (regs_to_pop == (1 << STACK_POINTER)) */
26587 /* Since we have just move the popped value into the frame
26588 pointer, the popping register is available for reuse, and
26589 we know that we still have the stack pointer left to pop. */
26590 regs_available_for_popping
|= (1 << frame_pointer
);
26594 /* If we still have registers left on the stack, but we no longer have
26595 any registers into which we can pop them, then we must move the return
26596 address into the link register and make available the register that
26598 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26600 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26602 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26603 reg_containing_return_addr
);
26605 reg_containing_return_addr
= LR_REGNUM
;
26608 /* If we have registers left on the stack then pop some more.
26609 We know that at most we will want to pop FP and SP. */
26610 if (pops_needed
> 0)
26615 thumb_pop (f
, regs_available_for_popping
);
26617 /* We have popped either FP or SP.
26618 Move whichever one it is into the correct register. */
26619 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26620 move_to
= number_of_first_bit_set (regs_to_pop
);
26622 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26624 regs_to_pop
&= ~(1 << move_to
);
26629 /* If we still have not popped everything then we must have only
26630 had one register available to us and we are now popping the SP. */
26631 if (pops_needed
> 0)
26635 thumb_pop (f
, regs_available_for_popping
);
26637 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26639 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26641 assert (regs_to_pop == (1 << STACK_POINTER))
26642 assert (pops_needed == 1)
26646 /* If necessary restore the a4 register. */
26649 if (reg_containing_return_addr
!= LR_REGNUM
)
26651 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26652 reg_containing_return_addr
= LR_REGNUM
;
26655 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26658 if (crtl
->calls_eh_return
)
26659 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26661 /* Return to caller. */
26662 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26665 /* Scan INSN just before assembler is output for it.
26666 For Thumb-1, we track the status of the condition codes; this
26667 information is used in the cbranchsi4_insn pattern. */
26669 thumb1_final_prescan_insn (rtx_insn
*insn
)
26671 if (flag_print_asm_name
)
26672 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26673 INSN_ADDRESSES (INSN_UID (insn
)));
26674 /* Don't overwrite the previous setter when we get to a cbranch. */
26675 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26677 enum attr_conds conds
;
26679 if (cfun
->machine
->thumb1_cc_insn
)
26681 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26682 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26685 conds
= get_attr_conds (insn
);
26686 if (conds
== CONDS_SET
)
26688 rtx set
= single_set (insn
);
26689 cfun
->machine
->thumb1_cc_insn
= insn
;
26690 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26691 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26692 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26693 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26695 rtx src1
= XEXP (SET_SRC (set
), 1);
26696 if (src1
== const0_rtx
)
26697 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26699 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26701 /* Record the src register operand instead of dest because
26702 cprop_hardreg pass propagates src. */
26703 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26706 else if (conds
!= CONDS_NOCOND
)
26707 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26710 /* Check if unexpected far jump is used. */
26711 if (cfun
->machine
->lr_save_eliminated
26712 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26713 internal_error("Unexpected thumb1 far jump");
26717 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26719 unsigned HOST_WIDE_INT mask
= 0xff;
26722 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26723 if (val
== 0) /* XXX */
26726 for (i
= 0; i
< 25; i
++)
26727 if ((val
& (mask
<< i
)) == val
)
26733 /* Returns nonzero if the current function contains,
26734 or might contain a far jump. */
26736 thumb_far_jump_used_p (void)
26739 bool far_jump
= false;
26740 unsigned int func_size
= 0;
26742 /* This test is only important for leaf functions. */
26743 /* assert (!leaf_function_p ()); */
26745 /* If we have already decided that far jumps may be used,
26746 do not bother checking again, and always return true even if
26747 it turns out that they are not being used. Once we have made
26748 the decision that far jumps are present (and that hence the link
26749 register will be pushed onto the stack) we cannot go back on it. */
26750 if (cfun
->machine
->far_jump_used
)
26753 /* If this function is not being called from the prologue/epilogue
26754 generation code then it must be being called from the
26755 INITIAL_ELIMINATION_OFFSET macro. */
26756 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26758 /* In this case we know that we are being asked about the elimination
26759 of the arg pointer register. If that register is not being used,
26760 then there are no arguments on the stack, and we do not have to
26761 worry that a far jump might force the prologue to push the link
26762 register, changing the stack offsets. In this case we can just
26763 return false, since the presence of far jumps in the function will
26764 not affect stack offsets.
26766 If the arg pointer is live (or if it was live, but has now been
26767 eliminated and so set to dead) then we do have to test to see if
26768 the function might contain a far jump. This test can lead to some
26769 false negatives, since before reload is completed, then length of
26770 branch instructions is not known, so gcc defaults to returning their
26771 longest length, which in turn sets the far jump attribute to true.
26773 A false negative will not result in bad code being generated, but it
26774 will result in a needless push and pop of the link register. We
26775 hope that this does not occur too often.
26777 If we need doubleword stack alignment this could affect the other
26778 elimination offsets so we can't risk getting it wrong. */
26779 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26780 cfun
->machine
->arg_pointer_live
= 1;
26781 else if (!cfun
->machine
->arg_pointer_live
)
26785 /* We should not change far_jump_used during or after reload, as there is
26786 no chance to change stack frame layout. */
26787 if (reload_in_progress
|| reload_completed
)
26790 /* Check to see if the function contains a branch
26791 insn with the far jump attribute set. */
26792 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26794 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26798 func_size
+= get_attr_length (insn
);
26801 /* Attribute far_jump will always be true for thumb1 before
26802 shorten_branch pass. So checking far_jump attribute before
26803 shorten_branch isn't much useful.
26805 Following heuristic tries to estimate more accurately if a far jump
26806 may finally be used. The heuristic is very conservative as there is
26807 no chance to roll-back the decision of not to use far jump.
26809 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26810 2-byte insn is associated with a 4 byte constant pool. Using
26811 function size 2048/3 as the threshold is conservative enough. */
26814 if ((func_size
* 3) >= 2048)
26816 /* Record the fact that we have decided that
26817 the function does use far jumps. */
26818 cfun
->machine
->far_jump_used
= 1;
26826 /* Return nonzero if FUNC must be entered in ARM mode. */
26828 is_called_in_ARM_mode (tree func
)
26830 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26832 /* Ignore the problem about functions whose address is taken. */
26833 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26837 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26843 /* Given the stack offsets and register mask in OFFSETS, decide how
26844 many additional registers to push instead of subtracting a constant
26845 from SP. For epilogues the principle is the same except we use pop.
26846 FOR_PROLOGUE indicates which we're generating. */
26848 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26850 HOST_WIDE_INT amount
;
26851 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26852 /* Extract a mask of the ones we can give to the Thumb's push/pop
26854 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26855 /* Then count how many other high registers will need to be pushed. */
26856 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26857 int n_free
, reg_base
, size
;
26859 if (!for_prologue
&& frame_pointer_needed
)
26860 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26862 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26864 /* If the stack frame size is 512 exactly, we can save one load
26865 instruction, which should make this a win even when optimizing
26867 if (!optimize_size
&& amount
!= 512)
26870 /* Can't do this if there are high registers to push. */
26871 if (high_regs_pushed
!= 0)
26874 /* Shouldn't do it in the prologue if no registers would normally
26875 be pushed at all. In the epilogue, also allow it if we'll have
26876 a pop insn for the PC. */
26879 || TARGET_BACKTRACE
26880 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26881 || TARGET_INTERWORK
26882 || crtl
->args
.pretend_args_size
!= 0))
26885 /* Don't do this if thumb_expand_prologue wants to emit instructions
26886 between the push and the stack frame allocation. */
26888 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26889 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26896 size
= arm_size_return_regs ();
26897 reg_base
= ARM_NUM_INTS (size
);
26898 live_regs_mask
>>= reg_base
;
26901 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26902 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26904 live_regs_mask
>>= 1;
26910 gcc_assert (amount
/ 4 * 4 == amount
);
26912 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26913 return (amount
- 508) / 4;
26914 if (amount
<= n_free
* 4)
26919 /* The bits which aren't usefully expanded as rtl. */
26921 thumb1_unexpanded_epilogue (void)
26923 arm_stack_offsets
*offsets
;
26925 unsigned long live_regs_mask
= 0;
26926 int high_regs_pushed
= 0;
26928 int had_to_push_lr
;
26931 if (cfun
->machine
->return_used_this_function
!= 0)
26934 if (IS_NAKED (arm_current_func_type ()))
26937 offsets
= arm_get_frame_offsets ();
26938 live_regs_mask
= offsets
->saved_regs_mask
;
26939 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26941 /* If we can deduce the registers used from the function's return value.
26942 This is more reliable that examining df_regs_ever_live_p () because that
26943 will be set if the register is ever used in the function, not just if
26944 the register is used to hold a return value. */
26945 size
= arm_size_return_regs ();
26947 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26950 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26951 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26954 /* The prolog may have pushed some high registers to use as
26955 work registers. e.g. the testsuite file:
26956 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26957 compiles to produce:
26958 push {r4, r5, r6, r7, lr}
26962 as part of the prolog. We have to undo that pushing here. */
26964 if (high_regs_pushed
)
26966 unsigned long mask
= live_regs_mask
& 0xff;
26969 /* The available low registers depend on the size of the value we are
26977 /* Oh dear! We have no low registers into which we can pop
26980 ("no low registers available for popping high registers");
26982 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26983 if (live_regs_mask
& (1 << next_hi_reg
))
26986 while (high_regs_pushed
)
26988 /* Find lo register(s) into which the high register(s) can
26990 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26992 if (mask
& (1 << regno
))
26993 high_regs_pushed
--;
26994 if (high_regs_pushed
== 0)
26998 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
27000 /* Pop the values into the low register(s). */
27001 thumb_pop (asm_out_file
, mask
);
27003 /* Move the value(s) into the high registers. */
27004 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
27006 if (mask
& (1 << regno
))
27008 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
27011 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
27012 if (live_regs_mask
& (1 << next_hi_reg
))
27017 live_regs_mask
&= ~0x0f00;
27020 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
27021 live_regs_mask
&= 0xff;
27023 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
27025 /* Pop the return address into the PC. */
27026 if (had_to_push_lr
)
27027 live_regs_mask
|= 1 << PC_REGNUM
;
27029 /* Either no argument registers were pushed or a backtrace
27030 structure was created which includes an adjusted stack
27031 pointer, so just pop everything. */
27032 if (live_regs_mask
)
27033 thumb_pop (asm_out_file
, live_regs_mask
);
27035 /* We have either just popped the return address into the
27036 PC or it is was kept in LR for the entire function.
27037 Note that thumb_pop has already called thumb_exit if the
27038 PC was in the list. */
27039 if (!had_to_push_lr
)
27040 thumb_exit (asm_out_file
, LR_REGNUM
);
27044 /* Pop everything but the return address. */
27045 if (live_regs_mask
)
27046 thumb_pop (asm_out_file
, live_regs_mask
);
27048 if (had_to_push_lr
)
27052 /* We have no free low regs, so save one. */
27053 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
27057 /* Get the return address into a temporary register. */
27058 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
27062 /* Move the return address to lr. */
27063 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
27065 /* Restore the low register. */
27066 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
27071 regno
= LAST_ARG_REGNUM
;
27076 /* Remove the argument registers that were pushed onto the stack. */
27077 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
27078 SP_REGNUM
, SP_REGNUM
,
27079 crtl
->args
.pretend_args_size
);
27081 thumb_exit (asm_out_file
, regno
);
27087 /* Functions to save and restore machine-specific function data. */
27088 static struct machine_function
*
27089 arm_init_machine_status (void)
27091 struct machine_function
*machine
;
27092 machine
= ggc_cleared_alloc
<machine_function
> ();
27094 #if ARM_FT_UNKNOWN != 0
27095 machine
->func_type
= ARM_FT_UNKNOWN
;
27100 /* Return an RTX indicating where the return address to the
27101 calling function can be found. */
27103 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27108 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27111 /* Do anything needed before RTL is emitted for each function. */
27113 arm_init_expanders (void)
27115 /* Arrange to initialize and mark the machine per-function status. */
27116 init_machine_status
= arm_init_machine_status
;
27118 /* This is to stop the combine pass optimizing away the alignment
27119 adjustment of va_arg. */
27120 /* ??? It is claimed that this should not be necessary. */
27122 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27126 /* Like arm_compute_initial_elimination offset. Simpler because there
27127 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27128 to point at the base of the local variables after static stack
27129 space for a function has been allocated. */
27132 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27134 arm_stack_offsets
*offsets
;
27136 offsets
= arm_get_frame_offsets ();
27140 case ARG_POINTER_REGNUM
:
27143 case STACK_POINTER_REGNUM
:
27144 return offsets
->outgoing_args
- offsets
->saved_args
;
27146 case FRAME_POINTER_REGNUM
:
27147 return offsets
->soft_frame
- offsets
->saved_args
;
27149 case ARM_HARD_FRAME_POINTER_REGNUM
:
27150 return offsets
->saved_regs
- offsets
->saved_args
;
27152 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27153 return offsets
->locals_base
- offsets
->saved_args
;
27156 gcc_unreachable ();
27160 case FRAME_POINTER_REGNUM
:
27163 case STACK_POINTER_REGNUM
:
27164 return offsets
->outgoing_args
- offsets
->soft_frame
;
27166 case ARM_HARD_FRAME_POINTER_REGNUM
:
27167 return offsets
->saved_regs
- offsets
->soft_frame
;
27169 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27170 return offsets
->locals_base
- offsets
->soft_frame
;
27173 gcc_unreachable ();
27178 gcc_unreachable ();
27182 /* Generate the function's prologue. */
27185 thumb1_expand_prologue (void)
27189 HOST_WIDE_INT amount
;
27190 arm_stack_offsets
*offsets
;
27191 unsigned long func_type
;
27193 unsigned long live_regs_mask
;
27194 unsigned long l_mask
;
27195 unsigned high_regs_pushed
= 0;
27197 func_type
= arm_current_func_type ();
27199 /* Naked functions don't have prologues. */
27200 if (IS_NAKED (func_type
))
27203 if (IS_INTERRUPT (func_type
))
27205 error ("interrupt Service Routines cannot be coded in Thumb mode");
27209 if (is_called_in_ARM_mode (current_function_decl
))
27210 emit_insn (gen_prologue_thumb1_interwork ());
27212 offsets
= arm_get_frame_offsets ();
27213 live_regs_mask
= offsets
->saved_regs_mask
;
27215 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27216 l_mask
= live_regs_mask
& 0x40ff;
27217 /* Then count how many other high registers will need to be pushed. */
27218 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27220 if (crtl
->args
.pretend_args_size
)
27222 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27224 if (cfun
->machine
->uses_anonymous_args
)
27226 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27227 unsigned long mask
;
27229 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27230 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27232 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27236 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27237 stack_pointer_rtx
, x
));
27239 RTX_FRAME_RELATED_P (insn
) = 1;
27242 if (TARGET_BACKTRACE
)
27244 HOST_WIDE_INT offset
= 0;
27245 unsigned work_register
;
27246 rtx work_reg
, x
, arm_hfp_rtx
;
27248 /* We have been asked to create a stack backtrace structure.
27249 The code looks like this:
27253 0 sub SP, #16 Reserve space for 4 registers.
27254 2 push {R7} Push low registers.
27255 4 add R7, SP, #20 Get the stack pointer before the push.
27256 6 str R7, [SP, #8] Store the stack pointer
27257 (before reserving the space).
27258 8 mov R7, PC Get hold of the start of this code + 12.
27259 10 str R7, [SP, #16] Store it.
27260 12 mov R7, FP Get hold of the current frame pointer.
27261 14 str R7, [SP, #4] Store it.
27262 16 mov R7, LR Get hold of the current return address.
27263 18 str R7, [SP, #12] Store it.
27264 20 add R7, SP, #16 Point at the start of the
27265 backtrace structure.
27266 22 mov FP, R7 Put this value into the frame pointer. */
27268 work_register
= thumb_find_work_register (live_regs_mask
);
27269 work_reg
= gen_rtx_REG (SImode
, work_register
);
27270 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27272 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27273 stack_pointer_rtx
, GEN_INT (-16)));
27274 RTX_FRAME_RELATED_P (insn
) = 1;
27278 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27279 RTX_FRAME_RELATED_P (insn
) = 1;
27281 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27284 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27285 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27287 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27288 x
= gen_frame_mem (SImode
, x
);
27289 emit_move_insn (x
, work_reg
);
27291 /* Make sure that the instruction fetching the PC is in the right place
27292 to calculate "start of backtrace creation code + 12". */
27293 /* ??? The stores using the common WORK_REG ought to be enough to
27294 prevent the scheduler from doing anything weird. Failing that
27295 we could always move all of the following into an UNSPEC_VOLATILE. */
27298 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27299 emit_move_insn (work_reg
, x
);
27301 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27302 x
= gen_frame_mem (SImode
, x
);
27303 emit_move_insn (x
, work_reg
);
27305 emit_move_insn (work_reg
, arm_hfp_rtx
);
27307 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27308 x
= gen_frame_mem (SImode
, x
);
27309 emit_move_insn (x
, work_reg
);
27313 emit_move_insn (work_reg
, arm_hfp_rtx
);
27315 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27316 x
= gen_frame_mem (SImode
, x
);
27317 emit_move_insn (x
, work_reg
);
27319 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27320 emit_move_insn (work_reg
, x
);
27322 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27323 x
= gen_frame_mem (SImode
, x
);
27324 emit_move_insn (x
, work_reg
);
27327 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27328 emit_move_insn (work_reg
, x
);
27330 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27331 x
= gen_frame_mem (SImode
, x
);
27332 emit_move_insn (x
, work_reg
);
27334 x
= GEN_INT (offset
+ 12);
27335 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27337 emit_move_insn (arm_hfp_rtx
, work_reg
);
27339 /* Optimization: If we are not pushing any low registers but we are going
27340 to push some high registers then delay our first push. This will just
27341 be a push of LR and we can combine it with the push of the first high
27343 else if ((l_mask
& 0xff) != 0
27344 || (high_regs_pushed
== 0 && l_mask
))
27346 unsigned long mask
= l_mask
;
27347 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27348 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27349 RTX_FRAME_RELATED_P (insn
) = 1;
27352 if (high_regs_pushed
)
27354 unsigned pushable_regs
;
27355 unsigned next_hi_reg
;
27356 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27357 : crtl
->args
.info
.nregs
;
27358 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27360 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27361 if (live_regs_mask
& (1 << next_hi_reg
))
27364 /* Here we need to mask out registers used for passing arguments
27365 even if they can be pushed. This is to avoid using them to stash the high
27366 registers. Such kind of stash may clobber the use of arguments. */
27367 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
27369 if (pushable_regs
== 0)
27370 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27372 while (high_regs_pushed
> 0)
27374 unsigned long real_regs_mask
= 0;
27376 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27378 if (pushable_regs
& (1 << regno
))
27380 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27381 gen_rtx_REG (SImode
, next_hi_reg
));
27383 high_regs_pushed
--;
27384 real_regs_mask
|= (1 << next_hi_reg
);
27386 if (high_regs_pushed
)
27388 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27390 if (live_regs_mask
& (1 << next_hi_reg
))
27395 pushable_regs
&= ~((1 << regno
) - 1);
27401 /* If we had to find a work register and we have not yet
27402 saved the LR then add it to the list of regs to push. */
27403 if (l_mask
== (1 << LR_REGNUM
))
27405 pushable_regs
|= l_mask
;
27406 real_regs_mask
|= l_mask
;
27410 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
27411 RTX_FRAME_RELATED_P (insn
) = 1;
27415 /* Load the pic register before setting the frame pointer,
27416 so we can use r7 as a temporary work register. */
27417 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27418 arm_load_pic_register (live_regs_mask
);
27420 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27421 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27422 stack_pointer_rtx
);
27424 if (flag_stack_usage_info
)
27425 current_function_static_stack_size
27426 = offsets
->outgoing_args
- offsets
->saved_args
;
27428 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27429 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27434 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27435 GEN_INT (- amount
)));
27436 RTX_FRAME_RELATED_P (insn
) = 1;
27442 /* The stack decrement is too big for an immediate value in a single
27443 insn. In theory we could issue multiple subtracts, but after
27444 three of them it becomes more space efficient to place the full
27445 value in the constant pool and load into a register. (Also the
27446 ARM debugger really likes to see only one stack decrement per
27447 function). So instead we look for a scratch register into which
27448 we can load the decrement, and then we subtract this from the
27449 stack pointer. Unfortunately on the thumb the only available
27450 scratch registers are the argument registers, and we cannot use
27451 these as they may hold arguments to the function. Instead we
27452 attempt to locate a call preserved register which is used by this
27453 function. If we can find one, then we know that it will have
27454 been pushed at the start of the prologue and so we can corrupt
27456 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27457 if (live_regs_mask
& (1 << regno
))
27460 gcc_assert(regno
<= LAST_LO_REGNUM
);
27462 reg
= gen_rtx_REG (SImode
, regno
);
27464 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27466 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27467 stack_pointer_rtx
, reg
));
27469 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27470 plus_constant (Pmode
, stack_pointer_rtx
,
27472 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27473 RTX_FRAME_RELATED_P (insn
) = 1;
27477 if (frame_pointer_needed
)
27478 thumb_set_frame_pointer (offsets
);
27480 /* If we are profiling, make sure no instructions are scheduled before
27481 the call to mcount. Similarly if the user has requested no
27482 scheduling in the prolog. Similarly if we want non-call exceptions
27483 using the EABI unwinder, to prevent faulting instructions from being
27484 swapped with a stack adjustment. */
27485 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27486 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27487 && cfun
->can_throw_non_call_exceptions
))
27488 emit_insn (gen_blockage ());
27490 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27491 if (live_regs_mask
& 0xff)
27492 cfun
->machine
->lr_save_eliminated
= 0;
27495 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27496 POP instruction can be generated. LR should be replaced by PC. All
27497 the checks required are already done by USE_RETURN_INSN (). Hence,
27498 all we really need to check here is if single register is to be
27499 returned, or multiple register return. */
27501 thumb2_expand_return (bool simple_return
)
27504 unsigned long saved_regs_mask
;
27505 arm_stack_offsets
*offsets
;
27507 offsets
= arm_get_frame_offsets ();
27508 saved_regs_mask
= offsets
->saved_regs_mask
;
27510 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27511 if (saved_regs_mask
& (1 << i
))
27514 if (!simple_return
&& saved_regs_mask
)
27518 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27519 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27520 rtx addr
= gen_rtx_MEM (SImode
,
27521 gen_rtx_POST_INC (SImode
,
27522 stack_pointer_rtx
));
27523 set_mem_alias_set (addr
, get_frame_alias_set ());
27524 XVECEXP (par
, 0, 0) = ret_rtx
;
27525 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27526 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27527 emit_jump_insn (par
);
27531 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27532 saved_regs_mask
|= (1 << PC_REGNUM
);
27533 arm_emit_multi_reg_pop (saved_regs_mask
);
27538 emit_jump_insn (simple_return_rtx
);
27543 thumb1_expand_epilogue (void)
27545 HOST_WIDE_INT amount
;
27546 arm_stack_offsets
*offsets
;
27549 /* Naked functions don't have prologues. */
27550 if (IS_NAKED (arm_current_func_type ()))
27553 offsets
= arm_get_frame_offsets ();
27554 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27556 if (frame_pointer_needed
)
27558 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27559 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27561 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27563 gcc_assert (amount
>= 0);
27566 emit_insn (gen_blockage ());
27569 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27570 GEN_INT (amount
)));
27573 /* r3 is always free in the epilogue. */
27574 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27576 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27577 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27581 /* Emit a USE (stack_pointer_rtx), so that
27582 the stack adjustment will not be deleted. */
27583 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27585 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27586 emit_insn (gen_blockage ());
27588 /* Emit a clobber for each insn that will be restored in the epilogue,
27589 so that flow2 will get register lifetimes correct. */
27590 for (regno
= 0; regno
< 13; regno
++)
27591 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27592 emit_clobber (gen_rtx_REG (SImode
, regno
));
27594 if (! df_regs_ever_live_p (LR_REGNUM
))
27595 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27598 /* Epilogue code for APCS frame. */
27600 arm_expand_epilogue_apcs_frame (bool really_return
)
27602 unsigned long func_type
;
27603 unsigned long saved_regs_mask
;
27606 int floats_from_frame
= 0;
27607 arm_stack_offsets
*offsets
;
27609 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27610 func_type
= arm_current_func_type ();
27612 /* Get frame offsets for ARM. */
27613 offsets
= arm_get_frame_offsets ();
27614 saved_regs_mask
= offsets
->saved_regs_mask
;
27616 /* Find the offset of the floating-point save area in the frame. */
27618 = (offsets
->saved_args
27619 + arm_compute_static_chain_stack_bytes ()
27622 /* Compute how many core registers saved and how far away the floats are. */
27623 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27624 if (saved_regs_mask
& (1 << i
))
27627 floats_from_frame
+= 4;
27630 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27633 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27635 /* The offset is from IP_REGNUM. */
27636 int saved_size
= arm_get_vfp_saved_size ();
27637 if (saved_size
> 0)
27640 floats_from_frame
+= saved_size
;
27641 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27642 hard_frame_pointer_rtx
,
27643 GEN_INT (-floats_from_frame
)));
27644 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27645 ip_rtx
, hard_frame_pointer_rtx
);
27648 /* Generate VFP register multi-pop. */
27649 start_reg
= FIRST_VFP_REGNUM
;
27651 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27652 /* Look for a case where a reg does not need restoring. */
27653 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27654 && (!df_regs_ever_live_p (i
+ 1)
27655 || call_used_regs
[i
+ 1]))
27657 if (start_reg
!= i
)
27658 arm_emit_vfp_multi_reg_pop (start_reg
,
27659 (i
- start_reg
) / 2,
27660 gen_rtx_REG (SImode
,
27665 /* Restore the remaining regs that we have discovered (or possibly
27666 even all of them, if the conditional in the for loop never
27668 if (start_reg
!= i
)
27669 arm_emit_vfp_multi_reg_pop (start_reg
,
27670 (i
- start_reg
) / 2,
27671 gen_rtx_REG (SImode
, IP_REGNUM
));
27676 /* The frame pointer is guaranteed to be non-double-word aligned, as
27677 it is set to double-word-aligned old_stack_pointer - 4. */
27679 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27681 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27682 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27684 rtx addr
= gen_frame_mem (V2SImode
,
27685 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27687 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27688 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27689 gen_rtx_REG (V2SImode
, i
),
27695 /* saved_regs_mask should contain IP which contains old stack pointer
27696 at the time of activation creation. Since SP and IP are adjacent registers,
27697 we can restore the value directly into SP. */
27698 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27699 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27700 saved_regs_mask
|= (1 << SP_REGNUM
);
27702 /* There are two registers left in saved_regs_mask - LR and PC. We
27703 only need to restore LR (the return address), but to
27704 save time we can load it directly into PC, unless we need a
27705 special function exit sequence, or we are not really returning. */
27707 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27708 && !crtl
->calls_eh_return
)
27709 /* Delete LR from the register mask, so that LR on
27710 the stack is loaded into the PC in the register mask. */
27711 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27713 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27715 num_regs
= bit_count (saved_regs_mask
);
27716 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27719 emit_insn (gen_blockage ());
27720 /* Unwind the stack to just below the saved registers. */
27721 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27722 hard_frame_pointer_rtx
,
27723 GEN_INT (- 4 * num_regs
)));
27725 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27726 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27729 arm_emit_multi_reg_pop (saved_regs_mask
);
27731 if (IS_INTERRUPT (func_type
))
27733 /* Interrupt handlers will have pushed the
27734 IP onto the stack, so restore it now. */
27736 rtx addr
= gen_rtx_MEM (SImode
,
27737 gen_rtx_POST_INC (SImode
,
27738 stack_pointer_rtx
));
27739 set_mem_alias_set (addr
, get_frame_alias_set ());
27740 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27741 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27742 gen_rtx_REG (SImode
, IP_REGNUM
),
27746 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27749 if (crtl
->calls_eh_return
)
27750 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27752 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27754 if (IS_STACKALIGN (func_type
))
27755 /* Restore the original stack pointer. Before prologue, the stack was
27756 realigned and the original stack pointer saved in r0. For details,
27757 see comment in arm_expand_prologue. */
27758 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27760 emit_jump_insn (simple_return_rtx
);
27763 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27764 function is not a sibcall. */
27766 arm_expand_epilogue (bool really_return
)
27768 unsigned long func_type
;
27769 unsigned long saved_regs_mask
;
27773 arm_stack_offsets
*offsets
;
27775 func_type
= arm_current_func_type ();
27777 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27778 let output_return_instruction take care of instruction emission if any. */
27779 if (IS_NAKED (func_type
)
27780 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27783 emit_jump_insn (simple_return_rtx
);
27787 /* If we are throwing an exception, then we really must be doing a
27788 return, so we can't tail-call. */
27789 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27791 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27793 arm_expand_epilogue_apcs_frame (really_return
);
27797 /* Get frame offsets for ARM. */
27798 offsets
= arm_get_frame_offsets ();
27799 saved_regs_mask
= offsets
->saved_regs_mask
;
27800 num_regs
= bit_count (saved_regs_mask
);
27802 if (frame_pointer_needed
)
27805 /* Restore stack pointer if necessary. */
27808 /* In ARM mode, frame pointer points to first saved register.
27809 Restore stack pointer to last saved register. */
27810 amount
= offsets
->frame
- offsets
->saved_regs
;
27812 /* Force out any pending memory operations that reference stacked data
27813 before stack de-allocation occurs. */
27814 emit_insn (gen_blockage ());
27815 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27816 hard_frame_pointer_rtx
,
27817 GEN_INT (amount
)));
27818 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27820 hard_frame_pointer_rtx
);
27822 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27824 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27828 /* In Thumb-2 mode, the frame pointer points to the last saved
27830 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27833 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27834 hard_frame_pointer_rtx
,
27835 GEN_INT (amount
)));
27836 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27837 hard_frame_pointer_rtx
,
27838 hard_frame_pointer_rtx
);
27841 /* Force out any pending memory operations that reference stacked data
27842 before stack de-allocation occurs. */
27843 emit_insn (gen_blockage ());
27844 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27845 hard_frame_pointer_rtx
));
27846 arm_add_cfa_adjust_cfa_note (insn
, 0,
27848 hard_frame_pointer_rtx
);
27849 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27851 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27856 /* Pop off outgoing args and local frame to adjust stack pointer to
27857 last saved register. */
27858 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27862 /* Force out any pending memory operations that reference stacked data
27863 before stack de-allocation occurs. */
27864 emit_insn (gen_blockage ());
27865 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27867 GEN_INT (amount
)));
27868 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27869 stack_pointer_rtx
, stack_pointer_rtx
);
27870 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27872 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27876 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27878 /* Generate VFP register multi-pop. */
27879 int end_reg
= LAST_VFP_REGNUM
+ 1;
27881 /* Scan the registers in reverse order. We need to match
27882 any groupings made in the prologue and generate matching
27883 vldm operations. The need to match groups is because,
27884 unlike pop, vldm can only do consecutive regs. */
27885 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27886 /* Look for a case where a reg does not need restoring. */
27887 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27888 && (!df_regs_ever_live_p (i
+ 1)
27889 || call_used_regs
[i
+ 1]))
27891 /* Restore the regs discovered so far (from reg+2 to
27893 if (end_reg
> i
+ 2)
27894 arm_emit_vfp_multi_reg_pop (i
+ 2,
27895 (end_reg
- (i
+ 2)) / 2,
27896 stack_pointer_rtx
);
27900 /* Restore the remaining regs that we have discovered (or possibly
27901 even all of them, if the conditional in the for loop never
27903 if (end_reg
> i
+ 2)
27904 arm_emit_vfp_multi_reg_pop (i
+ 2,
27905 (end_reg
- (i
+ 2)) / 2,
27906 stack_pointer_rtx
);
27910 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27911 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27914 rtx addr
= gen_rtx_MEM (V2SImode
,
27915 gen_rtx_POST_INC (SImode
,
27916 stack_pointer_rtx
));
27917 set_mem_alias_set (addr
, get_frame_alias_set ());
27918 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27919 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27920 gen_rtx_REG (V2SImode
, i
),
27922 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27923 stack_pointer_rtx
, stack_pointer_rtx
);
27926 if (saved_regs_mask
)
27929 bool return_in_pc
= false;
27931 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27932 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27933 && !IS_STACKALIGN (func_type
)
27935 && crtl
->args
.pretend_args_size
== 0
27936 && saved_regs_mask
& (1 << LR_REGNUM
)
27937 && !crtl
->calls_eh_return
)
27939 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27940 saved_regs_mask
|= (1 << PC_REGNUM
);
27941 return_in_pc
= true;
27944 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27946 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27947 if (saved_regs_mask
& (1 << i
))
27949 rtx addr
= gen_rtx_MEM (SImode
,
27950 gen_rtx_POST_INC (SImode
,
27951 stack_pointer_rtx
));
27952 set_mem_alias_set (addr
, get_frame_alias_set ());
27954 if (i
== PC_REGNUM
)
27956 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27957 XVECEXP (insn
, 0, 0) = ret_rtx
;
27958 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27959 gen_rtx_REG (SImode
, i
),
27961 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27962 insn
= emit_jump_insn (insn
);
27966 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27968 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27969 gen_rtx_REG (SImode
, i
),
27971 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27973 stack_pointer_rtx
);
27980 && current_tune
->prefer_ldrd_strd
27981 && !optimize_function_for_size_p (cfun
))
27984 thumb2_emit_ldrd_pop (saved_regs_mask
);
27985 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27986 arm_emit_ldrd_pop (saved_regs_mask
);
27988 arm_emit_multi_reg_pop (saved_regs_mask
);
27991 arm_emit_multi_reg_pop (saved_regs_mask
);
27994 if (return_in_pc
== true)
27998 if (crtl
->args
.pretend_args_size
)
28001 rtx dwarf
= NULL_RTX
;
28003 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28005 GEN_INT (crtl
->args
.pretend_args_size
)));
28007 RTX_FRAME_RELATED_P (tmp
) = 1;
28009 if (cfun
->machine
->uses_anonymous_args
)
28011 /* Restore pretend args. Refer arm_expand_prologue on how to save
28012 pretend_args in stack. */
28013 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28014 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28015 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28016 if (saved_regs_mask
& (1 << i
))
28018 rtx reg
= gen_rtx_REG (SImode
, i
);
28019 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28022 REG_NOTES (tmp
) = dwarf
;
28024 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
28025 stack_pointer_rtx
, stack_pointer_rtx
);
28028 if (!really_return
)
28031 if (crtl
->calls_eh_return
)
28032 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28034 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28036 if (IS_STACKALIGN (func_type
))
28037 /* Restore the original stack pointer. Before prologue, the stack was
28038 realigned and the original stack pointer saved in r0. For details,
28039 see comment in arm_expand_prologue. */
28040 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
28042 emit_jump_insn (simple_return_rtx
);
28045 /* Implementation of insn prologue_thumb1_interwork. This is the first
28046 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28049 thumb1_output_interwork (void)
28052 FILE *f
= asm_out_file
;
28054 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28055 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28057 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28059 /* Generate code sequence to switch us into Thumb mode. */
28060 /* The .code 32 directive has already been emitted by
28061 ASM_DECLARE_FUNCTION_NAME. */
28062 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28063 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28065 /* Generate a label, so that the debugger will notice the
28066 change in instruction sets. This label is also used by
28067 the assembler to bypass the ARM code when this function
28068 is called from a Thumb encoded function elsewhere in the
28069 same file. Hence the definition of STUB_NAME here must
28070 agree with the definition in gas/config/tc-arm.c. */
28072 #define STUB_NAME ".real_start_of"
28074 fprintf (f
, "\t.code\t16\n");
28076 if (arm_dllexport_name_p (name
))
28077 name
= arm_strip_name_encoding (name
);
28079 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28080 fprintf (f
, "\t.thumb_func\n");
28081 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28086 /* Handle the case of a double word load into a low register from
28087 a computed memory address. The computed address may involve a
28088 register which is overwritten by the load. */
28090 thumb_load_double_from_address (rtx
*operands
)
28098 gcc_assert (REG_P (operands
[0]));
28099 gcc_assert (MEM_P (operands
[1]));
28101 /* Get the memory address. */
28102 addr
= XEXP (operands
[1], 0);
28104 /* Work out how the memory address is computed. */
28105 switch (GET_CODE (addr
))
28108 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28110 if (REGNO (operands
[0]) == REGNO (addr
))
28112 output_asm_insn ("ldr\t%H0, %2", operands
);
28113 output_asm_insn ("ldr\t%0, %1", operands
);
28117 output_asm_insn ("ldr\t%0, %1", operands
);
28118 output_asm_insn ("ldr\t%H0, %2", operands
);
28123 /* Compute <address> + 4 for the high order load. */
28124 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28126 output_asm_insn ("ldr\t%0, %1", operands
);
28127 output_asm_insn ("ldr\t%H0, %2", operands
);
28131 arg1
= XEXP (addr
, 0);
28132 arg2
= XEXP (addr
, 1);
28134 if (CONSTANT_P (arg1
))
28135 base
= arg2
, offset
= arg1
;
28137 base
= arg1
, offset
= arg2
;
28139 gcc_assert (REG_P (base
));
28141 /* Catch the case of <address> = <reg> + <reg> */
28142 if (REG_P (offset
))
28144 int reg_offset
= REGNO (offset
);
28145 int reg_base
= REGNO (base
);
28146 int reg_dest
= REGNO (operands
[0]);
28148 /* Add the base and offset registers together into the
28149 higher destination register. */
28150 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28151 reg_dest
+ 1, reg_base
, reg_offset
);
28153 /* Load the lower destination register from the address in
28154 the higher destination register. */
28155 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28156 reg_dest
, reg_dest
+ 1);
28158 /* Load the higher destination register from its own address
28160 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28161 reg_dest
+ 1, reg_dest
+ 1);
28165 /* Compute <address> + 4 for the high order load. */
28166 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28168 /* If the computed address is held in the low order register
28169 then load the high order register first, otherwise always
28170 load the low order register first. */
28171 if (REGNO (operands
[0]) == REGNO (base
))
28173 output_asm_insn ("ldr\t%H0, %2", operands
);
28174 output_asm_insn ("ldr\t%0, %1", operands
);
28178 output_asm_insn ("ldr\t%0, %1", operands
);
28179 output_asm_insn ("ldr\t%H0, %2", operands
);
28185 /* With no registers to worry about we can just load the value
28187 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28189 output_asm_insn ("ldr\t%H0, %2", operands
);
28190 output_asm_insn ("ldr\t%0, %1", operands
);
28194 gcc_unreachable ();
28201 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28208 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28211 operands
[4] = operands
[5];
28214 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28215 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28219 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28222 operands
[4] = operands
[5];
28225 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28228 operands
[5] = operands
[6];
28231 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28234 operands
[4] = operands
[5];
28238 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28239 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28243 gcc_unreachable ();
28249 /* Output a call-via instruction for thumb state. */
28251 thumb_call_via_reg (rtx reg
)
28253 int regno
= REGNO (reg
);
28256 gcc_assert (regno
< LR_REGNUM
);
28258 /* If we are in the normal text section we can use a single instance
28259 per compilation unit. If we are doing function sections, then we need
28260 an entry per section, since we can't rely on reachability. */
28261 if (in_section
== text_section
)
28263 thumb_call_reg_needed
= 1;
28265 if (thumb_call_via_label
[regno
] == NULL
)
28266 thumb_call_via_label
[regno
] = gen_label_rtx ();
28267 labelp
= thumb_call_via_label
+ regno
;
28271 if (cfun
->machine
->call_via
[regno
] == NULL
)
28272 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28273 labelp
= cfun
->machine
->call_via
+ regno
;
28276 output_asm_insn ("bl\t%a0", labelp
);
28280 /* Routines for generating rtl. */
28282 thumb_expand_movmemqi (rtx
*operands
)
28284 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28285 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28286 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28287 HOST_WIDE_INT offset
= 0;
28291 emit_insn (gen_movmem12b (out
, in
, out
, in
));
28297 emit_insn (gen_movmem8b (out
, in
, out
, in
));
28303 rtx reg
= gen_reg_rtx (SImode
);
28304 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28305 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28312 rtx reg
= gen_reg_rtx (HImode
);
28313 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28314 plus_constant (Pmode
, in
,
28316 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28325 rtx reg
= gen_reg_rtx (QImode
);
28326 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28327 plus_constant (Pmode
, in
,
28329 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28336 thumb_reload_out_hi (rtx
*operands
)
28338 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28341 /* Handle reading a half-word from memory during reload. */
28343 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
28345 gcc_unreachable ();
28348 /* Return the length of a function name prefix
28349 that starts with the character 'c'. */
28351 arm_get_strip_length (int c
)
28355 ARM_NAME_ENCODING_LENGTHS
28360 /* Return a pointer to a function's name with any
28361 and all prefix encodings stripped from it. */
28363 arm_strip_name_encoding (const char *name
)
28367 while ((skip
= arm_get_strip_length (* name
)))
28373 /* If there is a '*' anywhere in the name's prefix, then
28374 emit the stripped name verbatim, otherwise prepend an
28375 underscore if leading underscores are being used. */
28377 arm_asm_output_labelref (FILE *stream
, const char *name
)
28382 while ((skip
= arm_get_strip_length (* name
)))
28384 verbatim
|= (*name
== '*');
28389 fputs (name
, stream
);
28391 asm_fprintf (stream
, "%U%s", name
);
28394 /* This function is used to emit an EABI tag and its associated value.
28395 We emit the numerical value of the tag in case the assembler does not
28396 support textual tags. (Eg gas prior to 2.20). If requested we include
28397 the tag name in a comment so that anyone reading the assembler output
28398 will know which tag is being set.
28400 This function is not static because arm-c.c needs it too. */
28403 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28405 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28406 if (flag_verbose_asm
|| flag_debug_asm
)
28407 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28408 asm_fprintf (asm_out_file
, "\n");
28412 arm_file_start (void)
28416 if (TARGET_UNIFIED_ASM
)
28417 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
28421 const char *fpu_name
;
28422 if (arm_selected_arch
)
28424 /* armv7ve doesn't support any extensions. */
28425 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
28427 /* Keep backward compatability for assemblers
28428 which don't support armv7ve. */
28429 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28430 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28431 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28432 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28433 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28437 const char* pos
= strchr (arm_selected_arch
->name
, '+');
28441 gcc_assert (strlen (arm_selected_arch
->name
)
28442 <= sizeof (buf
) / sizeof (*pos
));
28443 strncpy (buf
, arm_selected_arch
->name
,
28444 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
28445 buf
[pos
- arm_selected_arch
->name
] = '\0';
28446 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
28447 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
28450 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
28453 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
28454 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
28457 const char* truncated_name
28458 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
28459 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28462 if (TARGET_SOFT_FLOAT
)
28464 fpu_name
= "softvfp";
28468 fpu_name
= arm_fpu_desc
->name
;
28469 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
28471 if (TARGET_HARD_FLOAT
)
28472 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28473 if (TARGET_HARD_FLOAT_ABI
)
28474 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28477 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
28479 /* Some of these attributes only apply when the corresponding features
28480 are used. However we don't have any easy way of figuring this out.
28481 Conservatively record the setting that would have been used. */
28483 if (flag_rounding_math
)
28484 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28486 if (!flag_unsafe_math_optimizations
)
28488 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28489 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28491 if (flag_signaling_nans
)
28492 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28494 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28495 flag_finite_math_only
? 1 : 3);
28497 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28498 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28499 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28500 flag_short_enums
? 1 : 2);
28502 /* Tag_ABI_optimization_goals. */
28505 else if (optimize
>= 2)
28511 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28513 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28516 if (arm_fp16_format
)
28517 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28518 (int) arm_fp16_format
);
28520 if (arm_lang_output_object_attributes_hook
)
28521 arm_lang_output_object_attributes_hook();
28524 default_file_start ();
28528 arm_file_end (void)
28532 if (NEED_INDICATE_EXEC_STACK
)
28533 /* Add .note.GNU-stack. */
28534 file_end_indicate_exec_stack ();
28536 if (! thumb_call_reg_needed
)
28539 switch_to_section (text_section
);
28540 asm_fprintf (asm_out_file
, "\t.code 16\n");
28541 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28543 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28545 rtx label
= thumb_call_via_label
[regno
];
28549 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28550 CODE_LABEL_NUMBER (label
));
28551 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28557 /* Symbols in the text segment can be accessed without indirecting via the
28558 constant pool; it may take an extra binary operation, but this is still
28559 faster than indirecting via memory. Don't do this when not optimizing,
28560 since we won't be calculating al of the offsets necessary to do this
28564 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28566 if (optimize
> 0 && TREE_CONSTANT (decl
))
28567 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28569 default_encode_section_info (decl
, rtl
, first
);
28571 #endif /* !ARM_PE */
28574 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28576 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28577 && !strcmp (prefix
, "L"))
28579 arm_ccfsm_state
= 0;
28580 arm_target_insn
= NULL
;
28582 default_internal_label (stream
, prefix
, labelno
);
28585 /* Output code to add DELTA to the first argument, and then jump
28586 to FUNCTION. Used for C++ multiple inheritance. */
28588 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28589 HOST_WIDE_INT delta
,
28590 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28593 static int thunk_label
= 0;
28596 int mi_delta
= delta
;
28597 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28599 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28602 mi_delta
= - mi_delta
;
28604 final_start_function (emit_barrier (), file
, 1);
28608 int labelno
= thunk_label
++;
28609 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28610 /* Thunks are entered in arm mode when avaiable. */
28611 if (TARGET_THUMB1_ONLY
)
28613 /* push r3 so we can use it as a temporary. */
28614 /* TODO: Omit this save if r3 is not used. */
28615 fputs ("\tpush {r3}\n", file
);
28616 fputs ("\tldr\tr3, ", file
);
28620 fputs ("\tldr\tr12, ", file
);
28622 assemble_name (file
, label
);
28623 fputc ('\n', file
);
28626 /* If we are generating PIC, the ldr instruction below loads
28627 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28628 the address of the add + 8, so we have:
28630 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28633 Note that we have "+ 1" because some versions of GNU ld
28634 don't set the low bit of the result for R_ARM_REL32
28635 relocations against thumb function symbols.
28636 On ARMv6M this is +4, not +8. */
28637 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28638 assemble_name (file
, labelpc
);
28639 fputs (":\n", file
);
28640 if (TARGET_THUMB1_ONLY
)
28642 /* This is 2 insns after the start of the thunk, so we know it
28643 is 4-byte aligned. */
28644 fputs ("\tadd\tr3, pc, r3\n", file
);
28645 fputs ("\tmov r12, r3\n", file
);
28648 fputs ("\tadd\tr12, pc, r12\n", file
);
28650 else if (TARGET_THUMB1_ONLY
)
28651 fputs ("\tmov r12, r3\n", file
);
28653 if (TARGET_THUMB1_ONLY
)
28655 if (mi_delta
> 255)
28657 fputs ("\tldr\tr3, ", file
);
28658 assemble_name (file
, label
);
28659 fputs ("+4\n", file
);
28660 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
28661 mi_op
, this_regno
, this_regno
);
28663 else if (mi_delta
!= 0)
28665 /* Thumb1 unified syntax requires s suffix in instruction name when
28666 one of the operands is immediate. */
28667 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
28668 mi_op
, this_regno
, this_regno
,
28674 /* TODO: Use movw/movt for large constants when available. */
28675 while (mi_delta
!= 0)
28677 if ((mi_delta
& (3 << shift
)) == 0)
28681 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28682 mi_op
, this_regno
, this_regno
,
28683 mi_delta
& (0xff << shift
));
28684 mi_delta
&= ~(0xff << shift
);
28691 if (TARGET_THUMB1_ONLY
)
28692 fputs ("\tpop\t{r3}\n", file
);
28694 fprintf (file
, "\tbx\tr12\n");
28695 ASM_OUTPUT_ALIGN (file
, 2);
28696 assemble_name (file
, label
);
28697 fputs (":\n", file
);
28700 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28701 rtx tem
= XEXP (DECL_RTL (function
), 0);
28702 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28703 pipeline offset is four rather than eight. Adjust the offset
28705 tem
= plus_constant (GET_MODE (tem
), tem
,
28706 TARGET_THUMB1_ONLY
? -3 : -7);
28707 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28709 gen_rtx_SYMBOL_REF (Pmode
,
28710 ggc_strdup (labelpc
)));
28711 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28714 /* Output ".word .LTHUNKn". */
28715 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28717 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28718 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28722 fputs ("\tb\t", file
);
28723 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28724 if (NEED_PLT_RELOC
)
28725 fputs ("(PLT)", file
);
28726 fputc ('\n', file
);
28729 final_end_function ();
28733 arm_emit_vector_const (FILE *file
, rtx x
)
28736 const char * pattern
;
28738 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28740 switch (GET_MODE (x
))
28742 case V2SImode
: pattern
= "%08x"; break;
28743 case V4HImode
: pattern
= "%04x"; break;
28744 case V8QImode
: pattern
= "%02x"; break;
28745 default: gcc_unreachable ();
28748 fprintf (file
, "0x");
28749 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28753 element
= CONST_VECTOR_ELT (x
, i
);
28754 fprintf (file
, pattern
, INTVAL (element
));
28760 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28761 HFmode constant pool entries are actually loaded with ldr. */
28763 arm_emit_fp16_const (rtx c
)
28768 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28769 bits
= real_to_target (NULL
, &r
, HFmode
);
28770 if (WORDS_BIG_ENDIAN
)
28771 assemble_zeros (2);
28772 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28773 if (!WORDS_BIG_ENDIAN
)
28774 assemble_zeros (2);
28778 arm_output_load_gr (rtx
*operands
)
28785 if (!MEM_P (operands
[1])
28786 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28787 || !REG_P (reg
= XEXP (sum
, 0))
28788 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28789 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28790 return "wldrw%?\t%0, %1";
28792 /* Fix up an out-of-range load of a GR register. */
28793 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28794 wcgr
= operands
[0];
28796 output_asm_insn ("ldr%?\t%0, %1", operands
);
28798 operands
[0] = wcgr
;
28800 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28801 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28806 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28808 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28809 named arg and all anonymous args onto the stack.
28810 XXX I know the prologue shouldn't be pushing registers, but it is faster
28814 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28818 int second_time ATTRIBUTE_UNUSED
)
28820 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28823 cfun
->machine
->uses_anonymous_args
= 1;
28824 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28826 nregs
= pcum
->aapcs_ncrn
;
28827 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28831 nregs
= pcum
->nregs
;
28833 if (nregs
< NUM_ARG_REGS
)
28834 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28837 /* We can't rely on the caller doing the proper promotion when
28838 using APCS or ATPCS. */
28841 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28843 return !TARGET_AAPCS_BASED
;
28846 static machine_mode
28847 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28849 int *punsignedp ATTRIBUTE_UNUSED
,
28850 const_tree fntype ATTRIBUTE_UNUSED
,
28851 int for_return ATTRIBUTE_UNUSED
)
28853 if (GET_MODE_CLASS (mode
) == MODE_INT
28854 && GET_MODE_SIZE (mode
) < 4)
28860 /* AAPCS based ABIs use short enums by default. */
28863 arm_default_short_enums (void)
28865 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28869 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28872 arm_align_anon_bitfield (void)
28874 return TARGET_AAPCS_BASED
;
28878 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28881 arm_cxx_guard_type (void)
28883 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28887 /* The EABI says test the least significant bit of a guard variable. */
28890 arm_cxx_guard_mask_bit (void)
28892 return TARGET_AAPCS_BASED
;
28896 /* The EABI specifies that all array cookies are 8 bytes long. */
28899 arm_get_cookie_size (tree type
)
28903 if (!TARGET_AAPCS_BASED
)
28904 return default_cxx_get_cookie_size (type
);
28906 size
= build_int_cst (sizetype
, 8);
28911 /* The EABI says that array cookies should also contain the element size. */
28914 arm_cookie_has_size (void)
28916 return TARGET_AAPCS_BASED
;
28920 /* The EABI says constructors and destructors should return a pointer to
28921 the object constructed/destroyed. */
28924 arm_cxx_cdtor_returns_this (void)
28926 return TARGET_AAPCS_BASED
;
28929 /* The EABI says that an inline function may never be the key
28933 arm_cxx_key_method_may_be_inline (void)
28935 return !TARGET_AAPCS_BASED
;
28939 arm_cxx_determine_class_data_visibility (tree decl
)
28941 if (!TARGET_AAPCS_BASED
28942 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28945 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28946 is exported. However, on systems without dynamic vague linkage,
28947 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28948 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28949 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28951 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28952 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28956 arm_cxx_class_data_always_comdat (void)
28958 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28959 vague linkage if the class has no key function. */
28960 return !TARGET_AAPCS_BASED
;
28964 /* The EABI says __aeabi_atexit should be used to register static
28968 arm_cxx_use_aeabi_atexit (void)
28970 return TARGET_AAPCS_BASED
;
28975 arm_set_return_address (rtx source
, rtx scratch
)
28977 arm_stack_offsets
*offsets
;
28978 HOST_WIDE_INT delta
;
28980 unsigned long saved_regs
;
28982 offsets
= arm_get_frame_offsets ();
28983 saved_regs
= offsets
->saved_regs_mask
;
28985 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28986 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28989 if (frame_pointer_needed
)
28990 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28993 /* LR will be the first saved register. */
28994 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28999 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29000 GEN_INT (delta
& ~4095)));
29005 addr
= stack_pointer_rtx
;
29007 addr
= plus_constant (Pmode
, addr
, delta
);
29009 /* The store needs to be marked as frame related in order to prevent
29010 DSE from deleting it as dead if it is based on fp. */
29011 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
29012 RTX_FRAME_RELATED_P (insn
) = 1;
29013 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
29019 thumb_set_return_address (rtx source
, rtx scratch
)
29021 arm_stack_offsets
*offsets
;
29022 HOST_WIDE_INT delta
;
29023 HOST_WIDE_INT limit
;
29026 unsigned long mask
;
29030 offsets
= arm_get_frame_offsets ();
29031 mask
= offsets
->saved_regs_mask
;
29032 if (mask
& (1 << LR_REGNUM
))
29035 /* Find the saved regs. */
29036 if (frame_pointer_needed
)
29038 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29039 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29045 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29048 /* Allow for the stack frame. */
29049 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29051 /* The link register is always the first saved register. */
29054 /* Construct the address. */
29055 addr
= gen_rtx_REG (SImode
, reg
);
29058 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29059 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29063 addr
= plus_constant (Pmode
, addr
, delta
);
29065 /* The store needs to be marked as frame related in order to prevent
29066 DSE from deleting it as dead if it is based on fp. */
29067 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
29068 RTX_FRAME_RELATED_P (insn
) = 1;
29069 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
29072 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29075 /* Implements target hook vector_mode_supported_p. */
29077 arm_vector_mode_supported_p (machine_mode mode
)
29079 /* Neon also supports V2SImode, etc. listed in the clause below. */
29080 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29081 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
29084 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29085 && ((mode
== V2SImode
)
29086 || (mode
== V4HImode
)
29087 || (mode
== V8QImode
)))
29090 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29091 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29092 || mode
== V2HAmode
))
29098 /* Implements target hook array_mode_supported_p. */
29101 arm_array_mode_supported_p (machine_mode mode
,
29102 unsigned HOST_WIDE_INT nelems
)
29105 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29106 && (nelems
>= 2 && nelems
<= 4))
29112 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29113 registers when autovectorizing for Neon, at least until multiple vector
29114 widths are supported properly by the middle-end. */
29116 static machine_mode
29117 arm_preferred_simd_mode (machine_mode mode
)
29123 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29125 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29127 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29129 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29131 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29138 if (TARGET_REALLY_IWMMXT
)
29154 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29156 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29157 using r0-r4 for function arguments, r7 for the stack frame and don't have
29158 enough left over to do doubleword arithmetic. For Thumb-2 all the
29159 potentially problematic instructions accept high registers so this is not
29160 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29161 that require many low registers. */
29163 arm_class_likely_spilled_p (reg_class_t rclass
)
29165 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29166 || rclass
== CC_REG
)
29172 /* Implements target hook small_register_classes_for_mode_p. */
29174 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29176 return TARGET_THUMB1
;
29179 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29180 ARM insns and therefore guarantee that the shift count is modulo 256.
29181 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29182 guarantee no particular behavior for out-of-range counts. */
29184 static unsigned HOST_WIDE_INT
29185 arm_shift_truncation_mask (machine_mode mode
)
29187 return mode
== SImode
? 255 : 0;
29191 /* Map internal gcc register numbers to DWARF2 register numbers. */
29194 arm_dbx_register_number (unsigned int regno
)
29199 if (IS_VFP_REGNUM (regno
))
29201 /* See comment in arm_dwarf_register_span. */
29202 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29203 return 64 + regno
- FIRST_VFP_REGNUM
;
29205 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29208 if (IS_IWMMXT_GR_REGNUM (regno
))
29209 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29211 if (IS_IWMMXT_REGNUM (regno
))
29212 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29214 gcc_unreachable ();
29217 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29218 GCC models tham as 64 32-bit registers, so we need to describe this to
29219 the DWARF generation code. Other registers can use the default. */
29221 arm_dwarf_register_span (rtx rtl
)
29229 regno
= REGNO (rtl
);
29230 if (!IS_VFP_REGNUM (regno
))
29233 /* XXX FIXME: The EABI defines two VFP register ranges:
29234 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29236 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29237 corresponding D register. Until GDB supports this, we shall use the
29238 legacy encodings. We also use these encodings for D0-D15 for
29239 compatibility with older debuggers. */
29240 mode
= GET_MODE (rtl
);
29241 if (GET_MODE_SIZE (mode
) < 8)
29244 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29246 nregs
= GET_MODE_SIZE (mode
) / 4;
29247 for (i
= 0; i
< nregs
; i
+= 2)
29248 if (TARGET_BIG_END
)
29250 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29251 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29255 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29256 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29261 nregs
= GET_MODE_SIZE (mode
) / 8;
29262 for (i
= 0; i
< nregs
; i
++)
29263 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29266 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29269 #if ARM_UNWIND_INFO
29270 /* Emit unwind directives for a store-multiple instruction or stack pointer
29271 push during alignment.
29272 These should only ever be generated by the function prologue code, so
29273 expect them to have a particular form.
29274 The store-multiple instruction sometimes pushes pc as the last register,
29275 although it should not be tracked into unwind information, or for -Os
29276 sometimes pushes some dummy registers before first register that needs
29277 to be tracked in unwind information; such dummy registers are there just
29278 to avoid separate stack adjustment, and will not be restored in the
29282 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29285 HOST_WIDE_INT offset
;
29286 HOST_WIDE_INT nregs
;
29290 unsigned padfirst
= 0, padlast
= 0;
29293 e
= XVECEXP (p
, 0, 0);
29294 gcc_assert (GET_CODE (e
) == SET
);
29296 /* First insn will adjust the stack pointer. */
29297 gcc_assert (GET_CODE (e
) == SET
29298 && REG_P (SET_DEST (e
))
29299 && REGNO (SET_DEST (e
)) == SP_REGNUM
29300 && GET_CODE (SET_SRC (e
)) == PLUS
);
29302 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29303 nregs
= XVECLEN (p
, 0) - 1;
29304 gcc_assert (nregs
);
29306 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29309 /* For -Os dummy registers can be pushed at the beginning to
29310 avoid separate stack pointer adjustment. */
29311 e
= XVECEXP (p
, 0, 1);
29312 e
= XEXP (SET_DEST (e
), 0);
29313 if (GET_CODE (e
) == PLUS
)
29314 padfirst
= INTVAL (XEXP (e
, 1));
29315 gcc_assert (padfirst
== 0 || optimize_size
);
29316 /* The function prologue may also push pc, but not annotate it as it is
29317 never restored. We turn this into a stack pointer adjustment. */
29318 e
= XVECEXP (p
, 0, nregs
);
29319 e
= XEXP (SET_DEST (e
), 0);
29320 if (GET_CODE (e
) == PLUS
)
29321 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29323 padlast
= offset
- 4;
29324 gcc_assert (padlast
== 0 || padlast
== 4);
29326 fprintf (asm_out_file
, "\t.pad #4\n");
29328 fprintf (asm_out_file
, "\t.save {");
29330 else if (IS_VFP_REGNUM (reg
))
29333 fprintf (asm_out_file
, "\t.vsave {");
29336 /* Unknown register type. */
29337 gcc_unreachable ();
29339 /* If the stack increment doesn't match the size of the saved registers,
29340 something has gone horribly wrong. */
29341 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29345 /* The remaining insns will describe the stores. */
29346 for (i
= 1; i
<= nregs
; i
++)
29348 /* Expect (set (mem <addr>) (reg)).
29349 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29350 e
= XVECEXP (p
, 0, i
);
29351 gcc_assert (GET_CODE (e
) == SET
29352 && MEM_P (SET_DEST (e
))
29353 && REG_P (SET_SRC (e
)));
29355 reg
= REGNO (SET_SRC (e
));
29356 gcc_assert (reg
>= lastreg
);
29359 fprintf (asm_out_file
, ", ");
29360 /* We can't use %r for vfp because we need to use the
29361 double precision register names. */
29362 if (IS_VFP_REGNUM (reg
))
29363 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29365 asm_fprintf (asm_out_file
, "%r", reg
);
29367 #ifdef ENABLE_CHECKING
29368 /* Check that the addresses are consecutive. */
29369 e
= XEXP (SET_DEST (e
), 0);
29370 if (GET_CODE (e
) == PLUS
)
29371 gcc_assert (REG_P (XEXP (e
, 0))
29372 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29373 && CONST_INT_P (XEXP (e
, 1))
29374 && offset
== INTVAL (XEXP (e
, 1)));
29378 && REGNO (e
) == SP_REGNUM
);
29379 offset
+= reg_size
;
29382 fprintf (asm_out_file
, "}\n");
29384 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29387 /* Emit unwind directives for a SET. */
29390 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29398 switch (GET_CODE (e0
))
29401 /* Pushing a single register. */
29402 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29403 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29404 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29407 asm_fprintf (asm_out_file
, "\t.save ");
29408 if (IS_VFP_REGNUM (REGNO (e1
)))
29409 asm_fprintf(asm_out_file
, "{d%d}\n",
29410 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29412 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29416 if (REGNO (e0
) == SP_REGNUM
)
29418 /* A stack increment. */
29419 if (GET_CODE (e1
) != PLUS
29420 || !REG_P (XEXP (e1
, 0))
29421 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29422 || !CONST_INT_P (XEXP (e1
, 1)))
29425 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29426 -INTVAL (XEXP (e1
, 1)));
29428 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29430 HOST_WIDE_INT offset
;
29432 if (GET_CODE (e1
) == PLUS
)
29434 if (!REG_P (XEXP (e1
, 0))
29435 || !CONST_INT_P (XEXP (e1
, 1)))
29437 reg
= REGNO (XEXP (e1
, 0));
29438 offset
= INTVAL (XEXP (e1
, 1));
29439 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29440 HARD_FRAME_POINTER_REGNUM
, reg
,
29443 else if (REG_P (e1
))
29446 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29447 HARD_FRAME_POINTER_REGNUM
, reg
);
29452 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29454 /* Move from sp to reg. */
29455 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29457 else if (GET_CODE (e1
) == PLUS
29458 && REG_P (XEXP (e1
, 0))
29459 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29460 && CONST_INT_P (XEXP (e1
, 1)))
29462 /* Set reg to offset from sp. */
29463 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29464 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29476 /* Emit unwind directives for the given insn. */
29479 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
29482 bool handled_one
= false;
29484 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29487 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29488 && (TREE_NOTHROW (current_function_decl
)
29489 || crtl
->all_throwers_are_sibcalls
))
29492 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29495 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29497 switch (REG_NOTE_KIND (note
))
29499 case REG_FRAME_RELATED_EXPR
:
29500 pat
= XEXP (note
, 0);
29503 case REG_CFA_REGISTER
:
29504 pat
= XEXP (note
, 0);
29507 pat
= PATTERN (insn
);
29508 if (GET_CODE (pat
) == PARALLEL
)
29509 pat
= XVECEXP (pat
, 0, 0);
29512 /* Only emitted for IS_STACKALIGN re-alignment. */
29517 src
= SET_SRC (pat
);
29518 dest
= SET_DEST (pat
);
29520 gcc_assert (src
== stack_pointer_rtx
);
29521 reg
= REGNO (dest
);
29522 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29525 handled_one
= true;
29528 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29529 to get correct dwarf information for shrink-wrap. We should not
29530 emit unwind information for it because these are used either for
29531 pretend arguments or notes to adjust sp and restore registers from
29533 case REG_CFA_DEF_CFA
:
29534 case REG_CFA_ADJUST_CFA
:
29535 case REG_CFA_RESTORE
:
29538 case REG_CFA_EXPRESSION
:
29539 case REG_CFA_OFFSET
:
29540 /* ??? Only handling here what we actually emit. */
29541 gcc_unreachable ();
29549 pat
= PATTERN (insn
);
29552 switch (GET_CODE (pat
))
29555 arm_unwind_emit_set (asm_out_file
, pat
);
29559 /* Store multiple. */
29560 arm_unwind_emit_sequence (asm_out_file
, pat
);
29569 /* Output a reference from a function exception table to the type_info
29570 object X. The EABI specifies that the symbol should be relocated by
29571 an R_ARM_TARGET2 relocation. */
29574 arm_output_ttype (rtx x
)
29576 fputs ("\t.word\t", asm_out_file
);
29577 output_addr_const (asm_out_file
, x
);
29578 /* Use special relocations for symbol references. */
29579 if (!CONST_INT_P (x
))
29580 fputs ("(TARGET2)", asm_out_file
);
29581 fputc ('\n', asm_out_file
);
29586 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29589 arm_asm_emit_except_personality (rtx personality
)
29591 fputs ("\t.personality\t", asm_out_file
);
29592 output_addr_const (asm_out_file
, personality
);
29593 fputc ('\n', asm_out_file
);
29596 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29599 arm_asm_init_sections (void)
29601 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29604 #endif /* ARM_UNWIND_INFO */
29606 /* Output unwind directives for the start/end of a function. */
29609 arm_output_fn_unwind (FILE * f
, bool prologue
)
29611 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29615 fputs ("\t.fnstart\n", f
);
29618 /* If this function will never be unwound, then mark it as such.
29619 The came condition is used in arm_unwind_emit to suppress
29620 the frame annotations. */
29621 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29622 && (TREE_NOTHROW (current_function_decl
)
29623 || crtl
->all_throwers_are_sibcalls
))
29624 fputs("\t.cantunwind\n", f
);
29626 fputs ("\t.fnend\n", f
);
29631 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29633 enum tls_reloc reloc
;
29636 val
= XVECEXP (x
, 0, 0);
29637 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29639 output_addr_const (fp
, val
);
29644 fputs ("(tlsgd)", fp
);
29647 fputs ("(tlsldm)", fp
);
29650 fputs ("(tlsldo)", fp
);
29653 fputs ("(gottpoff)", fp
);
29656 fputs ("(tpoff)", fp
);
29659 fputs ("(tlsdesc)", fp
);
29662 gcc_unreachable ();
29671 fputs (" + (. - ", fp
);
29672 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29673 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29674 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29675 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29685 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29688 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29690 gcc_assert (size
== 4);
29691 fputs ("\t.word\t", file
);
29692 output_addr_const (file
, x
);
29693 fputs ("(tlsldo)", file
);
29696 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29699 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29701 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29702 return arm_emit_tls_decoration (fp
, x
);
29703 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29706 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29708 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29709 assemble_name_raw (fp
, label
);
29713 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29715 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29719 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29723 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29725 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29729 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29733 else if (GET_CODE (x
) == CONST_VECTOR
)
29734 return arm_emit_vector_const (fp
, x
);
29739 /* Output assembly for a shift instruction.
29740 SET_FLAGS determines how the instruction modifies the condition codes.
29741 0 - Do not set condition codes.
29742 1 - Set condition codes.
29743 2 - Use smallest instruction. */
29745 arm_output_shift(rtx
* operands
, int set_flags
)
29748 static const char flag_chars
[3] = {'?', '.', '!'};
29753 c
= flag_chars
[set_flags
];
29754 if (TARGET_UNIFIED_ASM
)
29756 shift
= shift_op(operands
[3], &val
);
29760 operands
[2] = GEN_INT(val
);
29761 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29764 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29767 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29768 output_asm_insn (pattern
, operands
);
29772 /* Output assembly for a WMMX immediate shift instruction. */
29774 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29776 int shift
= INTVAL (operands
[2]);
29778 machine_mode opmode
= GET_MODE (operands
[0]);
29780 gcc_assert (shift
>= 0);
29782 /* If the shift value in the register versions is > 63 (for D qualifier),
29783 31 (for W qualifier) or 15 (for H qualifier). */
29784 if (((opmode
== V4HImode
) && (shift
> 15))
29785 || ((opmode
== V2SImode
) && (shift
> 31))
29786 || ((opmode
== DImode
) && (shift
> 63)))
29790 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29791 output_asm_insn (templ
, operands
);
29792 if (opmode
== DImode
)
29794 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29795 output_asm_insn (templ
, operands
);
29800 /* The destination register will contain all zeros. */
29801 sprintf (templ
, "wzero\t%%0");
29802 output_asm_insn (templ
, operands
);
29807 if ((opmode
== DImode
) && (shift
> 32))
29809 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29810 output_asm_insn (templ
, operands
);
29811 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29812 output_asm_insn (templ
, operands
);
29816 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29817 output_asm_insn (templ
, operands
);
29822 /* Output assembly for a WMMX tinsr instruction. */
29824 arm_output_iwmmxt_tinsr (rtx
*operands
)
29826 int mask
= INTVAL (operands
[3]);
29829 int units
= mode_nunits
[GET_MODE (operands
[0])];
29830 gcc_assert ((mask
& (mask
- 1)) == 0);
29831 for (i
= 0; i
< units
; ++i
)
29833 if ((mask
& 0x01) == 1)
29839 gcc_assert (i
< units
);
29841 switch (GET_MODE (operands
[0]))
29844 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29847 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29850 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29853 gcc_unreachable ();
29856 output_asm_insn (templ
, operands
);
29861 /* Output a Thumb-1 casesi dispatch sequence. */
29863 thumb1_output_casesi (rtx
*operands
)
29865 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
29867 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29869 switch (GET_MODE(diff_vec
))
29872 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29873 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29875 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29876 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29878 return "bl\t%___gnu_thumb1_case_si";
29880 gcc_unreachable ();
29884 /* Output a Thumb-2 casesi instruction. */
29886 thumb2_output_casesi (rtx
*operands
)
29888 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
29890 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29892 output_asm_insn ("cmp\t%0, %1", operands
);
29893 output_asm_insn ("bhi\t%l3", operands
);
29894 switch (GET_MODE(diff_vec
))
29897 return "tbb\t[%|pc, %0]";
29899 return "tbh\t[%|pc, %0, lsl #1]";
29903 output_asm_insn ("adr\t%4, %l2", operands
);
29904 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29905 output_asm_insn ("add\t%4, %4, %5", operands
);
29910 output_asm_insn ("adr\t%4, %l2", operands
);
29911 return "ldr\t%|pc, [%4, %0, lsl #2]";
29914 gcc_unreachable ();
29918 /* Most ARM cores are single issue, but some newer ones can dual issue.
29919 The scheduler descriptions rely on this being correct. */
29921 arm_issue_rate (void)
29948 /* A table and a function to perform ARM-specific name mangling for
29949 NEON vector types in order to conform to the AAPCS (see "Procedure
29950 Call Standard for the ARM Architecture", Appendix A). To qualify
29951 for emission with the mangled names defined in that document, a
29952 vector type must not only be of the correct mode but also be
29953 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29957 const char *element_type_name
;
29958 const char *aapcs_name
;
29959 } arm_mangle_map_entry
;
29961 static arm_mangle_map_entry arm_mangle_map
[] = {
29962 /* 64-bit containerized types. */
29963 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29964 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29965 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29966 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29967 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29968 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29969 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29970 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29971 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29972 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29974 /* 128-bit containerized types. */
29975 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29976 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29977 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29978 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29979 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29980 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29981 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29982 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29983 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29984 { VOIDmode
, NULL
, NULL
}
29988 arm_mangle_type (const_tree type
)
29990 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29992 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29993 has to be managled as if it is in the "std" namespace. */
29994 if (TARGET_AAPCS_BASED
29995 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29996 return "St9__va_list";
29998 /* Half-precision float. */
29999 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
30002 if (TREE_CODE (type
) != VECTOR_TYPE
)
30005 /* Check the mode of the vector type, and the name of the vector
30006 element type, against the table. */
30007 while (pos
->mode
!= VOIDmode
)
30009 tree elt_type
= TREE_TYPE (type
);
30011 if (pos
->mode
== TYPE_MODE (type
)
30012 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
30013 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
30014 pos
->element_type_name
))
30015 return pos
->aapcs_name
;
30020 /* Use the default mangling for unrecognized (possibly user-defined)
30025 /* Order of allocation of core registers for Thumb: this allocation is
30026 written over the corresponding initial entries of the array
30027 initialized with REG_ALLOC_ORDER. We allocate all low registers
30028 first. Saving and restoring a low register is usually cheaper than
30029 using a call-clobbered high register. */
30031 static const int thumb_core_reg_alloc_order
[] =
30033 3, 2, 1, 0, 4, 5, 6, 7,
30034 14, 12, 8, 9, 10, 11
30037 /* Adjust register allocation order when compiling for Thumb. */
30040 arm_order_regs_for_local_alloc (void)
30042 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30043 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30045 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30046 sizeof (thumb_core_reg_alloc_order
));
30049 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30052 arm_frame_pointer_required (void)
30054 return (cfun
->has_nonlocal_label
30055 || SUBTARGET_FRAME_POINTER_REQUIRED
30056 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
30059 /* Only thumb1 can't support conditional execution, so return true if
30060 the target is not thumb1. */
30062 arm_have_conditional_execution (void)
30064 return !TARGET_THUMB1
;
30068 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
30070 machine_mode in_mode
, out_mode
;
30072 bool out_unsigned_p
= TYPE_UNSIGNED (type_out
);
30074 if (TREE_CODE (type_out
) != VECTOR_TYPE
30075 || TREE_CODE (type_in
) != VECTOR_TYPE
)
30078 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30079 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30080 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30081 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30083 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30084 decl of the vectorized builtin for the appropriate vector mode.
30085 NULL_TREE is returned if no such builtin is available. */
30086 #undef ARM_CHECK_BUILTIN_MODE
30087 #define ARM_CHECK_BUILTIN_MODE(C) \
30088 (TARGET_NEON && TARGET_FPU_ARMV8 \
30089 && flag_unsafe_math_optimizations \
30090 && ARM_CHECK_BUILTIN_MODE_1 (C))
30092 #undef ARM_CHECK_BUILTIN_MODE_1
30093 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30094 (out_mode == SFmode && out_n == C \
30095 && in_mode == SFmode && in_n == C)
30097 #undef ARM_FIND_VRINT_VARIANT
30098 #define ARM_FIND_VRINT_VARIANT(N) \
30099 (ARM_CHECK_BUILTIN_MODE (2) \
30100 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30101 : (ARM_CHECK_BUILTIN_MODE (4) \
30102 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30105 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
30107 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30110 case BUILT_IN_FLOORF
:
30111 return ARM_FIND_VRINT_VARIANT (vrintm
);
30112 case BUILT_IN_CEILF
:
30113 return ARM_FIND_VRINT_VARIANT (vrintp
);
30114 case BUILT_IN_TRUNCF
:
30115 return ARM_FIND_VRINT_VARIANT (vrintz
);
30116 case BUILT_IN_ROUNDF
:
30117 return ARM_FIND_VRINT_VARIANT (vrinta
);
30118 #undef ARM_CHECK_BUILTIN_MODE_1
30119 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30120 (out_mode == SImode && out_n == C \
30121 && in_mode == SFmode && in_n == C)
30123 #define ARM_FIND_VCVT_VARIANT(N) \
30124 (ARM_CHECK_BUILTIN_MODE (2) \
30125 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30126 : (ARM_CHECK_BUILTIN_MODE (4) \
30127 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30130 #define ARM_FIND_VCVTU_VARIANT(N) \
30131 (ARM_CHECK_BUILTIN_MODE (2) \
30132 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30133 : (ARM_CHECK_BUILTIN_MODE (4) \
30134 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30136 case BUILT_IN_LROUNDF
:
30137 return out_unsigned_p
30138 ? ARM_FIND_VCVTU_VARIANT (vcvta
)
30139 : ARM_FIND_VCVT_VARIANT (vcvta
);
30140 case BUILT_IN_LCEILF
:
30141 return out_unsigned_p
30142 ? ARM_FIND_VCVTU_VARIANT (vcvtp
)
30143 : ARM_FIND_VCVT_VARIANT (vcvtp
);
30144 case BUILT_IN_LFLOORF
:
30145 return out_unsigned_p
30146 ? ARM_FIND_VCVTU_VARIANT (vcvtm
)
30147 : ARM_FIND_VCVT_VARIANT (vcvtm
);
30148 #undef ARM_CHECK_BUILTIN_MODE
30149 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30150 (out_mode == N##mode && out_n == C \
30151 && in_mode == N##mode && in_n == C)
30152 case BUILT_IN_BSWAP16
:
30153 if (ARM_CHECK_BUILTIN_MODE (4, HI
))
30154 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi
, false);
30155 else if (ARM_CHECK_BUILTIN_MODE (8, HI
))
30156 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi
, false);
30159 case BUILT_IN_BSWAP32
:
30160 if (ARM_CHECK_BUILTIN_MODE (2, SI
))
30161 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si
, false);
30162 else if (ARM_CHECK_BUILTIN_MODE (4, SI
))
30163 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si
, false);
30166 case BUILT_IN_BSWAP64
:
30167 if (ARM_CHECK_BUILTIN_MODE (2, DI
))
30168 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di
, false);
30171 case BUILT_IN_COPYSIGNF
:
30172 if (ARM_CHECK_BUILTIN_MODE (2, SF
))
30173 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf
, false);
30174 else if (ARM_CHECK_BUILTIN_MODE (4, SF
))
30175 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf
, false);
30185 #undef ARM_FIND_VCVT_VARIANT
30186 #undef ARM_FIND_VCVTU_VARIANT
30187 #undef ARM_CHECK_BUILTIN_MODE
30188 #undef ARM_FIND_VRINT_VARIANT
30191 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30192 static HOST_WIDE_INT
30193 arm_vector_alignment (const_tree type
)
30195 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30197 if (TARGET_AAPCS_BASED
)
30198 align
= MIN (align
, 64);
30203 static unsigned int
30204 arm_autovectorize_vector_sizes (void)
30206 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
30210 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30212 /* Vectors which aren't in packed structures will not be less aligned than
30213 the natural alignment of their element type, so this is safe. */
30214 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30217 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30221 arm_builtin_support_vector_misalignment (machine_mode mode
,
30222 const_tree type
, int misalignment
,
30225 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30227 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30232 /* If the misalignment is unknown, we should be able to handle the access
30233 so long as it is not to a member of a packed data structure. */
30234 if (misalignment
== -1)
30237 /* Return true if the misalignment is a multiple of the natural alignment
30238 of the vector's element type. This is probably always going to be
30239 true in practice, since we've already established that this isn't a
30241 return ((misalignment
% align
) == 0);
30244 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30249 arm_conditional_register_usage (void)
30253 if (TARGET_THUMB1
&& optimize_size
)
30255 /* When optimizing for size on Thumb-1, it's better not
30256 to use the HI regs, because of the overhead of
30258 for (regno
= FIRST_HI_REGNUM
;
30259 regno
<= LAST_HI_REGNUM
; ++regno
)
30260 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30263 /* The link register can be clobbered by any branch insn,
30264 but we have no way to track that at present, so mark
30265 it as unavailable. */
30267 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30269 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
30271 /* VFPv3 registers are disabled when earlier VFP
30272 versions are selected due to the definition of
30273 LAST_VFP_REGNUM. */
30274 for (regno
= FIRST_VFP_REGNUM
;
30275 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30277 fixed_regs
[regno
] = 0;
30278 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30279 || regno
>= FIRST_VFP_REGNUM
+ 32;
30283 if (TARGET_REALLY_IWMMXT
)
30285 regno
= FIRST_IWMMXT_GR_REGNUM
;
30286 /* The 2002/10/09 revision of the XScale ABI has wCG0
30287 and wCG1 as call-preserved registers. The 2002/11/21
30288 revision changed this so that all wCG registers are
30289 scratch registers. */
30290 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30291 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30292 fixed_regs
[regno
] = 0;
30293 /* The XScale ABI has wR0 - wR9 as scratch registers,
30294 the rest as call-preserved registers. */
30295 for (regno
= FIRST_IWMMXT_REGNUM
;
30296 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30298 fixed_regs
[regno
] = 0;
30299 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30303 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30305 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30306 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30308 else if (TARGET_APCS_STACK
)
30310 fixed_regs
[10] = 1;
30311 call_used_regs
[10] = 1;
30313 /* -mcaller-super-interworking reserves r11 for calls to
30314 _interwork_r11_call_via_rN(). Making the register global
30315 is an easy way of ensuring that it remains valid for all
30317 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30318 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30320 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30321 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30322 if (TARGET_CALLER_INTERWORKING
)
30323 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30325 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30329 arm_preferred_rename_class (reg_class_t rclass
)
30331 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30332 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30333 and code size can be reduced. */
30334 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30340 /* Compute the atrribute "length" of insn "*push_multi".
30341 So this function MUST be kept in sync with that insn pattern. */
30343 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30345 int i
, regno
, hi_reg
;
30346 int num_saves
= XVECLEN (parallel_op
, 0);
30356 regno
= REGNO (first_op
);
30357 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30358 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30360 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30361 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30369 /* Compute the number of instructions emitted by output_move_double. */
30371 arm_count_output_move_double_insns (rtx
*operands
)
30375 /* output_move_double may modify the operands array, so call it
30376 here on a copy of the array. */
30377 ops
[0] = operands
[0];
30378 ops
[1] = operands
[1];
30379 output_move_double (ops
, false, &count
);
30384 vfp3_const_double_for_fract_bits (rtx operand
)
30386 REAL_VALUE_TYPE r0
;
30388 if (!CONST_DOUBLE_P (operand
))
30391 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30392 if (exact_real_inverse (DFmode
, &r0
))
30394 if (exact_real_truncate (DFmode
, &r0
))
30396 HOST_WIDE_INT value
= real_to_integer (&r0
);
30397 value
= value
& 0xffffffff;
30398 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30399 return int_log2 (value
);
30406 vfp3_const_double_for_bits (rtx operand
)
30408 REAL_VALUE_TYPE r0
;
30410 if (!CONST_DOUBLE_P (operand
))
30413 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30414 if (exact_real_truncate (DFmode
, &r0
))
30416 HOST_WIDE_INT value
= real_to_integer (&r0
);
30417 value
= value
& 0xffffffff;
30418 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30419 return int_log2 (value
);
30425 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30428 arm_pre_atomic_barrier (enum memmodel model
)
30430 if (need_atomic_barrier_p (model
, true))
30431 emit_insn (gen_memory_barrier ());
30435 arm_post_atomic_barrier (enum memmodel model
)
30437 if (need_atomic_barrier_p (model
, false))
30438 emit_insn (gen_memory_barrier ());
30441 /* Emit the load-exclusive and store-exclusive instructions.
30442 Use acquire and release versions if necessary. */
30445 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30447 rtx (*gen
) (rtx
, rtx
);
30453 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30454 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30455 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30456 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30458 gcc_unreachable ();
30465 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30466 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
30467 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
30468 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
30470 gcc_unreachable ();
30474 emit_insn (gen (rval
, mem
));
30478 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
30481 rtx (*gen
) (rtx
, rtx
, rtx
);
30487 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30488 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30489 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30490 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30492 gcc_unreachable ();
30499 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30500 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
30501 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
30502 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
30504 gcc_unreachable ();
30508 emit_insn (gen (bval
, rval
, mem
));
30511 /* Mark the previous jump instruction as unlikely. */
30514 emit_unlikely_jump (rtx insn
)
30516 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
30518 insn
= emit_jump_insn (insn
);
30519 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
30522 /* Expand a compare and swap pattern. */
30525 arm_expand_compare_and_swap (rtx operands
[])
30527 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30529 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
30531 bval
= operands
[0];
30532 rval
= operands
[1];
30534 oldval
= operands
[3];
30535 newval
= operands
[4];
30536 is_weak
= operands
[5];
30537 mod_s
= operands
[6];
30538 mod_f
= operands
[7];
30539 mode
= GET_MODE (mem
);
30541 /* Normally the succ memory model must be stronger than fail, but in the
30542 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30543 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30545 if (TARGET_HAVE_LDACQ
30546 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
30547 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
30548 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30554 /* For narrow modes, we're going to perform the comparison in SImode,
30555 so do the zero-extension now. */
30556 rval
= gen_reg_rtx (SImode
);
30557 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30561 /* Force the value into a register if needed. We waited until after
30562 the zero-extension above to do this properly. */
30563 if (!arm_add_operand (oldval
, SImode
))
30564 oldval
= force_reg (SImode
, oldval
);
30568 if (!cmpdi_operand (oldval
, mode
))
30569 oldval
= force_reg (mode
, oldval
);
30573 gcc_unreachable ();
30578 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
30579 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
30580 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
30581 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
30583 gcc_unreachable ();
30586 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
30588 if (mode
== QImode
|| mode
== HImode
)
30589 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30591 /* In all cases, we arrange for success to be signaled by Z set.
30592 This arrangement allows for the boolean result to be used directly
30593 in a subsequent branch, post optimization. */
30594 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30595 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30596 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30599 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30600 another memory store between the load-exclusive and store-exclusive can
30601 reset the monitor from Exclusive to Open state. This means we must wait
30602 until after reload to split the pattern, lest we get a register spill in
30603 the middle of the atomic sequence. */
30606 arm_split_compare_and_swap (rtx operands
[])
30608 rtx rval
, mem
, oldval
, newval
, scratch
;
30610 enum memmodel mod_s
, mod_f
;
30612 rtx_code_label
*label1
, *label2
;
30615 rval
= operands
[0];
30617 oldval
= operands
[2];
30618 newval
= operands
[3];
30619 is_weak
= (operands
[4] != const0_rtx
);
30620 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30621 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30622 scratch
= operands
[7];
30623 mode
= GET_MODE (mem
);
30625 bool use_acquire
= TARGET_HAVE_LDACQ
30626 && !(mod_s
== MEMMODEL_RELAXED
30627 || mod_s
== MEMMODEL_CONSUME
30628 || mod_s
== MEMMODEL_RELEASE
);
30630 bool use_release
= TARGET_HAVE_LDACQ
30631 && !(mod_s
== MEMMODEL_RELAXED
30632 || mod_s
== MEMMODEL_CONSUME
30633 || mod_s
== MEMMODEL_ACQUIRE
);
30635 /* Checks whether a barrier is needed and emits one accordingly. */
30636 if (!(use_acquire
|| use_release
))
30637 arm_pre_atomic_barrier (mod_s
);
30642 label1
= gen_label_rtx ();
30643 emit_label (label1
);
30645 label2
= gen_label_rtx ();
30647 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30649 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30650 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30651 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30652 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30653 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30655 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30657 /* Weak or strong, we want EQ to be true for success, so that we
30658 match the flags that we got from the compare above. */
30659 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30660 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30661 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30665 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30666 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30667 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30668 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30671 if (mod_f
!= MEMMODEL_RELAXED
)
30672 emit_label (label2
);
30674 /* Checks whether a barrier is needed and emits one accordingly. */
30675 if (!(use_acquire
|| use_release
))
30676 arm_post_atomic_barrier (mod_s
);
30678 if (mod_f
== MEMMODEL_RELAXED
)
30679 emit_label (label2
);
30683 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30684 rtx value
, rtx model_rtx
, rtx cond
)
30686 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30687 machine_mode mode
= GET_MODE (mem
);
30688 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30689 rtx_code_label
*label
;
30692 bool use_acquire
= TARGET_HAVE_LDACQ
30693 && !(model
== MEMMODEL_RELAXED
30694 || model
== MEMMODEL_CONSUME
30695 || model
== MEMMODEL_RELEASE
);
30697 bool use_release
= TARGET_HAVE_LDACQ
30698 && !(model
== MEMMODEL_RELAXED
30699 || model
== MEMMODEL_CONSUME
30700 || model
== MEMMODEL_ACQUIRE
);
30702 /* Checks whether a barrier is needed and emits one accordingly. */
30703 if (!(use_acquire
|| use_release
))
30704 arm_pre_atomic_barrier (model
);
30706 label
= gen_label_rtx ();
30707 emit_label (label
);
30710 new_out
= gen_lowpart (wmode
, new_out
);
30712 old_out
= gen_lowpart (wmode
, old_out
);
30715 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30717 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30726 x
= gen_rtx_AND (wmode
, old_out
, value
);
30727 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30728 x
= gen_rtx_NOT (wmode
, new_out
);
30729 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30733 if (CONST_INT_P (value
))
30735 value
= GEN_INT (-INTVAL (value
));
30741 if (mode
== DImode
)
30743 /* DImode plus/minus need to clobber flags. */
30744 /* The adddi3 and subdi3 patterns are incorrectly written so that
30745 they require matching operands, even when we could easily support
30746 three operands. Thankfully, this can be fixed up post-splitting,
30747 as the individual add+adc patterns do accept three operands and
30748 post-reload cprop can make these moves go away. */
30749 emit_move_insn (new_out
, old_out
);
30751 x
= gen_adddi3 (new_out
, new_out
, value
);
30753 x
= gen_subdi3 (new_out
, new_out
, value
);
30760 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30761 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30765 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30768 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30769 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30771 /* Checks whether a barrier is needed and emits one accordingly. */
30772 if (!(use_acquire
|| use_release
))
30773 arm_post_atomic_barrier (model
);
30776 #define MAX_VECT_LEN 16
30778 struct expand_vec_perm_d
30780 rtx target
, op0
, op1
;
30781 unsigned char perm
[MAX_VECT_LEN
];
30782 machine_mode vmode
;
30783 unsigned char nelt
;
30788 /* Generate a variable permutation. */
30791 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30793 machine_mode vmode
= GET_MODE (target
);
30794 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30796 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30797 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30798 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30799 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30800 gcc_checking_assert (TARGET_NEON
);
30804 if (vmode
== V8QImode
)
30805 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30807 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30813 if (vmode
== V8QImode
)
30815 pair
= gen_reg_rtx (V16QImode
);
30816 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30817 pair
= gen_lowpart (TImode
, pair
);
30818 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30822 pair
= gen_reg_rtx (OImode
);
30823 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30824 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30830 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30832 machine_mode vmode
= GET_MODE (target
);
30833 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30834 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30835 rtx rmask
[MAX_VECT_LEN
], mask
;
30837 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30838 numbering of elements for big-endian, we must reverse the order. */
30839 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30841 /* The VTBL instruction does not use a modulo index, so we must take care
30842 of that ourselves. */
30843 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30844 for (i
= 0; i
< nelt
; ++i
)
30846 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30847 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30849 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30852 /* Generate or test for an insn that supports a constant permutation. */
30854 /* Recognize patterns for the VUZP insns. */
30857 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30859 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30860 rtx out0
, out1
, in0
, in1
, x
;
30861 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30863 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30866 /* Note that these are little-endian tests. Adjust for big-endian later. */
30867 if (d
->perm
[0] == 0)
30869 else if (d
->perm
[0] == 1)
30873 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30875 for (i
= 0; i
< nelt
; i
++)
30877 unsigned elt
= (i
* 2 + odd
) & mask
;
30878 if (d
->perm
[i
] != elt
)
30888 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30889 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30890 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30891 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30892 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30893 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30894 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30895 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30897 gcc_unreachable ();
30902 if (BYTES_BIG_ENDIAN
)
30904 x
= in0
, in0
= in1
, in1
= x
;
30909 out1
= gen_reg_rtx (d
->vmode
);
30911 x
= out0
, out0
= out1
, out1
= x
;
30913 emit_insn (gen (out0
, in0
, in1
, out1
));
30917 /* Recognize patterns for the VZIP insns. */
30920 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30922 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30923 rtx out0
, out1
, in0
, in1
, x
;
30924 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30926 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30929 /* Note that these are little-endian tests. Adjust for big-endian later. */
30931 if (d
->perm
[0] == high
)
30933 else if (d
->perm
[0] == 0)
30937 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30939 for (i
= 0; i
< nelt
/ 2; i
++)
30941 unsigned elt
= (i
+ high
) & mask
;
30942 if (d
->perm
[i
* 2] != elt
)
30944 elt
= (elt
+ nelt
) & mask
;
30945 if (d
->perm
[i
* 2 + 1] != elt
)
30955 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30956 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30957 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30958 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30959 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30960 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30961 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30962 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30964 gcc_unreachable ();
30969 if (BYTES_BIG_ENDIAN
)
30971 x
= in0
, in0
= in1
, in1
= x
;
30976 out1
= gen_reg_rtx (d
->vmode
);
30978 x
= out0
, out0
= out1
, out1
= x
;
30980 emit_insn (gen (out0
, in0
, in1
, out1
));
30984 /* Recognize patterns for the VREV insns. */
30987 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30989 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30990 rtx (*gen
)(rtx
, rtx
, rtx
);
30992 if (!d
->one_vector_p
)
31001 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
31002 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
31010 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
31011 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
31012 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
31013 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
31021 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
31022 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
31023 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
31024 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
31025 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
31026 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
31027 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
31028 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
31037 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
31038 for (j
= 0; j
<= diff
; j
+= 1)
31040 /* This is guaranteed to be true as the value of diff
31041 is 7, 3, 1 and we should have enough elements in the
31042 queue to generate this. Getting a vector mask with a
31043 value of diff other than these values implies that
31044 something is wrong by the time we get here. */
31045 gcc_assert (i
+ j
< nelt
);
31046 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31054 /* ??? The third operand is an artifact of the builtin infrastructure
31055 and is ignored by the actual instruction. */
31056 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
31060 /* Recognize patterns for the VTRN insns. */
31063 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31065 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
31066 rtx out0
, out1
, in0
, in1
, x
;
31067 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
31069 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31072 /* Note that these are little-endian tests. Adjust for big-endian later. */
31073 if (d
->perm
[0] == 0)
31075 else if (d
->perm
[0] == 1)
31079 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31081 for (i
= 0; i
< nelt
; i
+= 2)
31083 if (d
->perm
[i
] != i
+ odd
)
31085 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31095 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
31096 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
31097 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
31098 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
31099 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
31100 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
31101 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
31102 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
31104 gcc_unreachable ();
31109 if (BYTES_BIG_ENDIAN
)
31111 x
= in0
, in0
= in1
, in1
= x
;
31116 out1
= gen_reg_rtx (d
->vmode
);
31118 x
= out0
, out0
= out1
, out1
= x
;
31120 emit_insn (gen (out0
, in0
, in1
, out1
));
31124 /* Recognize patterns for the VEXT insns. */
31127 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31129 unsigned int i
, nelt
= d
->nelt
;
31130 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
31133 unsigned int location
;
31135 unsigned int next
= d
->perm
[0] + 1;
31137 /* TODO: Handle GCC's numbering of elements for big-endian. */
31138 if (BYTES_BIG_ENDIAN
)
31141 /* Check if the extracted indexes are increasing by one. */
31142 for (i
= 1; i
< nelt
; next
++, i
++)
31144 /* If we hit the most significant element of the 2nd vector in
31145 the previous iteration, no need to test further. */
31146 if (next
== 2 * nelt
)
31149 /* If we are operating on only one vector: it could be a
31150 rotation. If there are only two elements of size < 64, let
31151 arm_evpc_neon_vrev catch it. */
31152 if (d
->one_vector_p
&& (next
== nelt
))
31154 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
31160 if (d
->perm
[i
] != next
)
31164 location
= d
->perm
[0];
31168 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
31169 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
31170 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
31171 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
31172 case V2SImode
: gen
= gen_neon_vextv2si
; break;
31173 case V4SImode
: gen
= gen_neon_vextv4si
; break;
31174 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
31175 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
31176 case V2DImode
: gen
= gen_neon_vextv2di
; break;
31185 offset
= GEN_INT (location
);
31186 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
31190 /* The NEON VTBL instruction is a fully variable permuation that's even
31191 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31192 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31193 can do slightly better by expanding this as a constant where we don't
31194 have to apply a mask. */
31197 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31199 rtx rperm
[MAX_VECT_LEN
], sel
;
31200 machine_mode vmode
= d
->vmode
;
31201 unsigned int i
, nelt
= d
->nelt
;
31203 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31204 numbering of elements for big-endian, we must reverse the order. */
31205 if (BYTES_BIG_ENDIAN
)
31211 /* Generic code will try constant permutation twice. Once with the
31212 original mode and again with the elements lowered to QImode.
31213 So wait and don't do the selector expansion ourselves. */
31214 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31217 for (i
= 0; i
< nelt
; ++i
)
31218 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31219 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31220 sel
= force_reg (vmode
, sel
);
31222 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31227 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31229 /* Check if the input mask matches vext before reordering the
31232 if (arm_evpc_neon_vext (d
))
31235 /* The pattern matching functions above are written to look for a small
31236 number to begin the sequence (0, 1, N/2). If we begin with an index
31237 from the second operand, we can swap the operands. */
31238 if (d
->perm
[0] >= d
->nelt
)
31240 unsigned i
, nelt
= d
->nelt
;
31243 for (i
= 0; i
< nelt
; ++i
)
31244 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
31253 if (arm_evpc_neon_vuzp (d
))
31255 if (arm_evpc_neon_vzip (d
))
31257 if (arm_evpc_neon_vrev (d
))
31259 if (arm_evpc_neon_vtrn (d
))
31261 return arm_evpc_neon_vtbl (d
);
31266 /* Expand a vec_perm_const pattern. */
31269 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31271 struct expand_vec_perm_d d
;
31272 int i
, nelt
, which
;
31278 d
.vmode
= GET_MODE (target
);
31279 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31280 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31281 d
.testing_p
= false;
31283 for (i
= which
= 0; i
< nelt
; ++i
)
31285 rtx e
= XVECEXP (sel
, 0, i
);
31286 int ei
= INTVAL (e
) & (2 * nelt
- 1);
31287 which
|= (ei
< nelt
? 1 : 2);
31297 d
.one_vector_p
= false;
31298 if (!rtx_equal_p (op0
, op1
))
31301 /* The elements of PERM do not suggest that only the first operand
31302 is used, but both operands are identical. Allow easier matching
31303 of the permutation by folding the permutation into the single
31307 for (i
= 0; i
< nelt
; ++i
)
31308 d
.perm
[i
] &= nelt
- 1;
31310 d
.one_vector_p
= true;
31315 d
.one_vector_p
= true;
31319 return arm_expand_vec_perm_const_1 (&d
);
31322 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31325 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
31326 const unsigned char *sel
)
31328 struct expand_vec_perm_d d
;
31329 unsigned int i
, nelt
, which
;
31333 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31334 d
.testing_p
= true;
31335 memcpy (d
.perm
, sel
, nelt
);
31337 /* Categorize the set of elements in the selector. */
31338 for (i
= which
= 0; i
< nelt
; ++i
)
31340 unsigned char e
= d
.perm
[i
];
31341 gcc_assert (e
< 2 * nelt
);
31342 which
|= (e
< nelt
? 1 : 2);
31345 /* For all elements from second vector, fold the elements to first. */
31347 for (i
= 0; i
< nelt
; ++i
)
31350 /* Check whether the mask can be applied to the vector type. */
31351 d
.one_vector_p
= (which
!= 3);
31353 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31354 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31355 if (!d
.one_vector_p
)
31356 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31359 ret
= arm_expand_vec_perm_const_1 (&d
);
31366 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
31368 /* If we are soft float and we do not have ldrd
31369 then all auto increment forms are ok. */
31370 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31375 /* Post increment and Pre Decrement are supported for all
31376 instruction forms except for vector forms. */
31379 if (VECTOR_MODE_P (mode
))
31381 if (code
!= ARM_PRE_DEC
)
31391 /* Without LDRD and mode size greater than
31392 word size, there is no point in auto-incrementing
31393 because ldm and stm will not have these forms. */
31394 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31397 /* Vector and floating point modes do not support
31398 these auto increment forms. */
31399 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31412 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31413 on ARM, since we know that shifts by negative amounts are no-ops.
31414 Additionally, the default expansion code is not available or suitable
31415 for post-reload insn splits (this can occur when the register allocator
31416 chooses not to do a shift in NEON).
31418 This function is used in both initial expand and post-reload splits, and
31419 handles all kinds of 64-bit shifts.
31421 Input requirements:
31422 - It is safe for the input and output to be the same register, but
31423 early-clobber rules apply for the shift amount and scratch registers.
31424 - Shift by register requires both scratch registers. In all other cases
31425 the scratch registers may be NULL.
31426 - Ashiftrt by a register also clobbers the CC register. */
31428 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31429 rtx amount
, rtx scratch1
, rtx scratch2
)
31431 rtx out_high
= gen_highpart (SImode
, out
);
31432 rtx out_low
= gen_lowpart (SImode
, out
);
31433 rtx in_high
= gen_highpart (SImode
, in
);
31434 rtx in_low
= gen_lowpart (SImode
, in
);
31437 in = the register pair containing the input value.
31438 out = the destination register pair.
31439 up = the high- or low-part of each pair.
31440 down = the opposite part to "up".
31441 In a shift, we can consider bits to shift from "up"-stream to
31442 "down"-stream, so in a left-shift "up" is the low-part and "down"
31443 is the high-part of each register pair. */
31445 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31446 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31447 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31448 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31450 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31452 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31453 && GET_MODE (out
) == DImode
);
31455 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31456 && GET_MODE (in
) == DImode
);
31458 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31459 && GET_MODE (amount
) == SImode
)
31460 || CONST_INT_P (amount
)));
31461 gcc_assert (scratch1
== NULL
31462 || (GET_CODE (scratch1
) == SCRATCH
)
31463 || (GET_MODE (scratch1
) == SImode
31464 && REG_P (scratch1
)));
31465 gcc_assert (scratch2
== NULL
31466 || (GET_CODE (scratch2
) == SCRATCH
)
31467 || (GET_MODE (scratch2
) == SImode
31468 && REG_P (scratch2
)));
31469 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31470 || !HARD_REGISTER_P (out
)
31471 || (REGNO (out
) != REGNO (amount
)
31472 && REGNO (out
) + 1 != REGNO (amount
)));
31474 /* Macros to make following code more readable. */
31475 #define SUB_32(DEST,SRC) \
31476 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31477 #define RSB_32(DEST,SRC) \
31478 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31479 #define SUB_S_32(DEST,SRC) \
31480 gen_addsi3_compare0 ((DEST), (SRC), \
31482 #define SET(DEST,SRC) \
31483 gen_rtx_SET (SImode, (DEST), (SRC))
31484 #define SHIFT(CODE,SRC,AMOUNT) \
31485 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31486 #define LSHIFT(CODE,SRC,AMOUNT) \
31487 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31488 SImode, (SRC), (AMOUNT))
31489 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31490 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31491 SImode, (SRC), (AMOUNT))
31493 gen_rtx_IOR (SImode, (A), (B))
31494 #define BRANCH(COND,LABEL) \
31495 gen_arm_cond_branch ((LABEL), \
31496 gen_rtx_ ## COND (CCmode, cc_reg, \
31500 /* Shifts by register and shifts by constant are handled separately. */
31501 if (CONST_INT_P (amount
))
31503 /* We have a shift-by-constant. */
31505 /* First, handle out-of-range shift amounts.
31506 In both cases we try to match the result an ARM instruction in a
31507 shift-by-register would give. This helps reduce execution
31508 differences between optimization levels, but it won't stop other
31509 parts of the compiler doing different things. This is "undefined
31510 behaviour, in any case. */
31511 if (INTVAL (amount
) <= 0)
31512 emit_insn (gen_movdi (out
, in
));
31513 else if (INTVAL (amount
) >= 64)
31515 if (code
== ASHIFTRT
)
31517 rtx const31_rtx
= GEN_INT (31);
31518 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31519 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31522 emit_insn (gen_movdi (out
, const0_rtx
));
31525 /* Now handle valid shifts. */
31526 else if (INTVAL (amount
) < 32)
31528 /* Shifts by a constant less than 32. */
31529 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31531 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31532 emit_insn (SET (out_down
,
31533 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31535 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31539 /* Shifts by a constant greater than 31. */
31540 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31542 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31543 if (code
== ASHIFTRT
)
31544 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31547 emit_insn (SET (out_up
, const0_rtx
));
31552 /* We have a shift-by-register. */
31553 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
31555 /* This alternative requires the scratch registers. */
31556 gcc_assert (scratch1
&& REG_P (scratch1
));
31557 gcc_assert (scratch2
&& REG_P (scratch2
));
31559 /* We will need the values "amount-32" and "32-amount" later.
31560 Swapping them around now allows the later code to be more general. */
31564 emit_insn (SUB_32 (scratch1
, amount
));
31565 emit_insn (RSB_32 (scratch2
, amount
));
31568 emit_insn (RSB_32 (scratch1
, amount
));
31569 /* Also set CC = amount > 32. */
31570 emit_insn (SUB_S_32 (scratch2
, amount
));
31573 emit_insn (RSB_32 (scratch1
, amount
));
31574 emit_insn (SUB_32 (scratch2
, amount
));
31577 gcc_unreachable ();
31580 /* Emit code like this:
31583 out_down = in_down << amount;
31584 out_down = (in_up << (amount - 32)) | out_down;
31585 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31586 out_up = in_up << amount;
31589 out_down = in_down >> amount;
31590 out_down = (in_up << (32 - amount)) | out_down;
31592 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31593 out_up = in_up << amount;
31596 out_down = in_down >> amount;
31597 out_down = (in_up << (32 - amount)) | out_down;
31599 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31600 out_up = in_up << amount;
31602 The ARM and Thumb2 variants are the same but implemented slightly
31603 differently. If this were only called during expand we could just
31604 use the Thumb2 case and let combine do the right thing, but this
31605 can also be called from post-reload splitters. */
31607 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31609 if (!TARGET_THUMB2
)
31611 /* Emit code for ARM mode. */
31612 emit_insn (SET (out_down
,
31613 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31614 if (code
== ASHIFTRT
)
31616 rtx_code_label
*done_label
= gen_label_rtx ();
31617 emit_jump_insn (BRANCH (LT
, done_label
));
31618 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31620 emit_label (done_label
);
31623 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31628 /* Emit code for Thumb2 mode.
31629 Thumb2 can't do shift and or in one insn. */
31630 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31631 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31633 if (code
== ASHIFTRT
)
31635 rtx_code_label
*done_label
= gen_label_rtx ();
31636 emit_jump_insn (BRANCH (LT
, done_label
));
31637 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31638 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31639 emit_label (done_label
);
31643 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31644 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31648 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31663 /* Returns true if a valid comparison operation and makes
31664 the operands in a form that is valid. */
31666 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31668 enum rtx_code code
= GET_CODE (*comparison
);
31670 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31671 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31673 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31675 if (code
== UNEQ
|| code
== LTGT
)
31678 code_int
= (int)code
;
31679 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31680 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31685 if (!arm_add_operand (*op1
, mode
))
31686 *op1
= force_reg (mode
, *op1
);
31687 if (!arm_add_operand (*op2
, mode
))
31688 *op2
= force_reg (mode
, *op2
);
31692 if (!cmpdi_operand (*op1
, mode
))
31693 *op1
= force_reg (mode
, *op1
);
31694 if (!cmpdi_operand (*op2
, mode
))
31695 *op2
= force_reg (mode
, *op2
);
31700 if (!arm_float_compare_operand (*op1
, mode
))
31701 *op1
= force_reg (mode
, *op1
);
31702 if (!arm_float_compare_operand (*op2
, mode
))
31703 *op2
= force_reg (mode
, *op2
);
31713 /* Maximum number of instructions to set block of memory. */
31715 arm_block_set_max_insns (void)
31717 if (optimize_function_for_size_p (cfun
))
31720 return current_tune
->max_insns_inline_memset
;
31723 /* Return TRUE if it's profitable to set block of memory for
31724 non-vectorized case. VAL is the value to set the memory
31725 with. LENGTH is the number of bytes to set. ALIGN is the
31726 alignment of the destination memory in bytes. UNALIGNED_P
31727 is TRUE if we can only set the memory with instructions
31728 meeting alignment requirements. USE_STRD_P is TRUE if we
31729 can use strd to set the memory. */
31731 arm_block_set_non_vect_profit_p (rtx val
,
31732 unsigned HOST_WIDE_INT length
,
31733 unsigned HOST_WIDE_INT align
,
31734 bool unaligned_p
, bool use_strd_p
)
31737 /* For leftovers in bytes of 0-7, we can set the memory block using
31738 strb/strh/str with minimum instruction number. */
31739 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31743 num
= arm_const_inline_cost (SET
, val
);
31744 num
+= length
/ align
+ length
% align
;
31746 else if (use_strd_p
)
31748 num
= arm_const_double_inline_cost (val
);
31749 num
+= (length
>> 3) + leftover
[length
& 7];
31753 num
= arm_const_inline_cost (SET
, val
);
31754 num
+= (length
>> 2) + leftover
[length
& 3];
31757 /* We may be able to combine last pair STRH/STRB into a single STR
31758 by shifting one byte back. */
31759 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
31762 return (num
<= arm_block_set_max_insns ());
31765 /* Return TRUE if it's profitable to set block of memory for
31766 vectorized case. LENGTH is the number of bytes to set.
31767 ALIGN is the alignment of destination memory in bytes.
31768 MODE is the vector mode used to set the memory. */
31770 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
31771 unsigned HOST_WIDE_INT align
,
31775 bool unaligned_p
= ((align
& 3) != 0);
31776 unsigned int nelt
= GET_MODE_NUNITS (mode
);
31778 /* Instruction loading constant value. */
31780 /* Instructions storing the memory. */
31781 num
+= (length
+ nelt
- 1) / nelt
;
31782 /* Instructions adjusting the address expression. Only need to
31783 adjust address expression if it's 4 bytes aligned and bytes
31784 leftover can only be stored by mis-aligned store instruction. */
31785 if (!unaligned_p
&& (length
& 3) != 0)
31788 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31789 if (!unaligned_p
&& mode
== V16QImode
)
31792 return (num
<= arm_block_set_max_insns ());
31795 /* Set a block of memory using vectorization instructions for the
31796 unaligned case. We fill the first LENGTH bytes of the memory
31797 area starting from DSTBASE with byte constant VALUE. ALIGN is
31798 the alignment requirement of memory. Return TRUE if succeeded. */
31800 arm_block_set_unaligned_vect (rtx dstbase
,
31801 unsigned HOST_WIDE_INT length
,
31802 unsigned HOST_WIDE_INT value
,
31803 unsigned HOST_WIDE_INT align
)
31805 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
31807 rtx val_elt
, val_vec
, reg
;
31808 rtx rval
[MAX_VECT_LEN
];
31809 rtx (*gen_func
) (rtx
, rtx
);
31811 unsigned HOST_WIDE_INT v
= value
;
31813 gcc_assert ((align
& 0x3) != 0);
31814 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31815 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31816 if (length
>= nelt_v16
)
31819 gen_func
= gen_movmisalignv16qi
;
31824 gen_func
= gen_movmisalignv8qi
;
31826 nelt_mode
= GET_MODE_NUNITS (mode
);
31827 gcc_assert (length
>= nelt_mode
);
31828 /* Skip if it isn't profitable. */
31829 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31832 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31833 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31835 v
= sext_hwi (v
, BITS_PER_WORD
);
31836 val_elt
= GEN_INT (v
);
31837 for (j
= 0; j
< nelt_mode
; j
++)
31840 reg
= gen_reg_rtx (mode
);
31841 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
31842 /* Emit instruction loading the constant value. */
31843 emit_move_insn (reg
, val_vec
);
31845 /* Handle nelt_mode bytes in a vector. */
31846 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31848 emit_insn ((*gen_func
) (mem
, reg
));
31849 if (i
+ 2 * nelt_mode
<= length
)
31850 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
31853 /* If there are not less than nelt_v8 bytes leftover, we must be in
31855 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
31857 /* Handle (8, 16) bytes leftover. */
31858 if (i
+ nelt_v8
< length
)
31860 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
31861 /* We are shifting bytes back, set the alignment accordingly. */
31862 if ((length
& 1) != 0 && align
>= 2)
31863 set_mem_align (mem
, BITS_PER_UNIT
);
31865 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31867 /* Handle (0, 8] bytes leftover. */
31868 else if (i
< length
&& i
+ nelt_v8
>= length
)
31870 if (mode
== V16QImode
)
31872 reg
= gen_lowpart (V8QImode
, reg
);
31873 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
31875 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
31876 + (nelt_mode
- nelt_v8
))));
31877 /* We are shifting bytes back, set the alignment accordingly. */
31878 if ((length
& 1) != 0 && align
>= 2)
31879 set_mem_align (mem
, BITS_PER_UNIT
);
31881 emit_insn (gen_movmisalignv8qi (mem
, reg
));
31887 /* Set a block of memory using vectorization instructions for the
31888 aligned case. We fill the first LENGTH bytes of the memory area
31889 starting from DSTBASE with byte constant VALUE. ALIGN is the
31890 alignment requirement of memory. Return TRUE if succeeded. */
31892 arm_block_set_aligned_vect (rtx dstbase
,
31893 unsigned HOST_WIDE_INT length
,
31894 unsigned HOST_WIDE_INT value
,
31895 unsigned HOST_WIDE_INT align
)
31897 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
31898 rtx dst
, addr
, mem
;
31899 rtx val_elt
, val_vec
, reg
;
31900 rtx rval
[MAX_VECT_LEN
];
31902 unsigned HOST_WIDE_INT v
= value
;
31904 gcc_assert ((align
& 0x3) == 0);
31905 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31906 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31907 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
31912 nelt_mode
= GET_MODE_NUNITS (mode
);
31913 gcc_assert (length
>= nelt_mode
);
31914 /* Skip if it isn't profitable. */
31915 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31918 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31920 v
= sext_hwi (v
, BITS_PER_WORD
);
31921 val_elt
= GEN_INT (v
);
31922 for (j
= 0; j
< nelt_mode
; j
++)
31925 reg
= gen_reg_rtx (mode
);
31926 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
31927 /* Emit instruction loading the constant value. */
31928 emit_move_insn (reg
, val_vec
);
31931 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31932 if (mode
== V16QImode
)
31934 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31935 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31937 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31938 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
31940 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
31941 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31942 /* We are shifting bytes back, set the alignment accordingly. */
31943 if ((length
& 0x3) == 0)
31944 set_mem_align (mem
, BITS_PER_UNIT
* 4);
31945 else if ((length
& 0x1) == 0)
31946 set_mem_align (mem
, BITS_PER_UNIT
* 2);
31948 set_mem_align (mem
, BITS_PER_UNIT
);
31950 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31953 /* Fall through for bytes leftover. */
31955 nelt_mode
= GET_MODE_NUNITS (mode
);
31956 reg
= gen_lowpart (V8QImode
, reg
);
31959 /* Handle 8 bytes in a vector. */
31960 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31962 addr
= plus_constant (Pmode
, dst
, i
);
31963 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
31964 emit_move_insn (mem
, reg
);
31967 /* Handle single word leftover by shifting 4 bytes back. We can
31968 use aligned access for this case. */
31969 if (i
+ UNITS_PER_WORD
== length
)
31971 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
31972 mem
= adjust_automodify_address (dstbase
, mode
,
31973 addr
, i
- UNITS_PER_WORD
);
31974 /* We are shifting 4 bytes back, set the alignment accordingly. */
31975 if (align
> UNITS_PER_WORD
)
31976 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
31978 emit_move_insn (mem
, reg
);
31980 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31981 We have to use unaligned access for this case. */
31982 else if (i
< length
)
31984 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
31985 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31986 /* We are shifting bytes back, set the alignment accordingly. */
31987 if ((length
& 1) == 0)
31988 set_mem_align (mem
, BITS_PER_UNIT
* 2);
31990 set_mem_align (mem
, BITS_PER_UNIT
);
31992 emit_insn (gen_movmisalignv8qi (mem
, reg
));
31998 /* Set a block of memory using plain strh/strb instructions, only
31999 using instructions allowed by ALIGN on processor. We fill the
32000 first LENGTH bytes of the memory area starting from DSTBASE
32001 with byte constant VALUE. ALIGN is the alignment requirement
32004 arm_block_set_unaligned_non_vect (rtx dstbase
,
32005 unsigned HOST_WIDE_INT length
,
32006 unsigned HOST_WIDE_INT value
,
32007 unsigned HOST_WIDE_INT align
)
32010 rtx dst
, addr
, mem
;
32011 rtx val_exp
, val_reg
, reg
;
32013 HOST_WIDE_INT v
= value
;
32015 gcc_assert (align
== 1 || align
== 2);
32018 v
|= (value
<< BITS_PER_UNIT
);
32020 v
= sext_hwi (v
, BITS_PER_WORD
);
32021 val_exp
= GEN_INT (v
);
32022 /* Skip if it isn't profitable. */
32023 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32024 align
, true, false))
32027 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32028 mode
= (align
== 2 ? HImode
: QImode
);
32029 val_reg
= force_reg (SImode
, val_exp
);
32030 reg
= gen_lowpart (mode
, val_reg
);
32032 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
32034 addr
= plus_constant (Pmode
, dst
, i
);
32035 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
32036 emit_move_insn (mem
, reg
);
32039 /* Handle single byte leftover. */
32040 if (i
+ 1 == length
)
32042 reg
= gen_lowpart (QImode
, val_reg
);
32043 addr
= plus_constant (Pmode
, dst
, i
);
32044 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32045 emit_move_insn (mem
, reg
);
32049 gcc_assert (i
== length
);
32053 /* Set a block of memory using plain strd/str/strh/strb instructions,
32054 to permit unaligned copies on processors which support unaligned
32055 semantics for those instructions. We fill the first LENGTH bytes
32056 of the memory area starting from DSTBASE with byte constant VALUE.
32057 ALIGN is the alignment requirement of memory. */
32059 arm_block_set_aligned_non_vect (rtx dstbase
,
32060 unsigned HOST_WIDE_INT length
,
32061 unsigned HOST_WIDE_INT value
,
32062 unsigned HOST_WIDE_INT align
)
32065 rtx dst
, addr
, mem
;
32066 rtx val_exp
, val_reg
, reg
;
32067 unsigned HOST_WIDE_INT v
;
32070 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32071 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32073 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32074 if (length
< UNITS_PER_WORD
)
32075 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32078 v
|= (v
<< BITS_PER_WORD
);
32080 v
= sext_hwi (v
, BITS_PER_WORD
);
32082 val_exp
= GEN_INT (v
);
32083 /* Skip if it isn't profitable. */
32084 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32085 align
, false, use_strd_p
))
32090 /* Try without strd. */
32091 v
= (v
>> BITS_PER_WORD
);
32092 v
= sext_hwi (v
, BITS_PER_WORD
);
32093 val_exp
= GEN_INT (v
);
32094 use_strd_p
= false;
32095 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32096 align
, false, use_strd_p
))
32101 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32102 /* Handle double words using strd if possible. */
32105 val_reg
= force_reg (DImode
, val_exp
);
32107 for (; (i
+ 8 <= length
); i
+= 8)
32109 addr
= plus_constant (Pmode
, dst
, i
);
32110 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32111 emit_move_insn (mem
, reg
);
32115 val_reg
= force_reg (SImode
, val_exp
);
32117 /* Handle words. */
32118 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32119 for (; (i
+ 4 <= length
); i
+= 4)
32121 addr
= plus_constant (Pmode
, dst
, i
);
32122 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
32123 if ((align
& 3) == 0)
32124 emit_move_insn (mem
, reg
);
32126 emit_insn (gen_unaligned_storesi (mem
, reg
));
32129 /* Merge last pair of STRH and STRB into a STR if possible. */
32130 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
32132 addr
= plus_constant (Pmode
, dst
, i
- 1);
32133 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
32134 /* We are shifting one byte back, set the alignment accordingly. */
32135 if ((align
& 1) == 0)
32136 set_mem_align (mem
, BITS_PER_UNIT
);
32138 /* Most likely this is an unaligned access, and we can't tell at
32139 compilation time. */
32140 emit_insn (gen_unaligned_storesi (mem
, reg
));
32144 /* Handle half word leftover. */
32145 if (i
+ 2 <= length
)
32147 reg
= gen_lowpart (HImode
, val_reg
);
32148 addr
= plus_constant (Pmode
, dst
, i
);
32149 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
32150 if ((align
& 1) == 0)
32151 emit_move_insn (mem
, reg
);
32153 emit_insn (gen_unaligned_storehi (mem
, reg
));
32158 /* Handle single byte leftover. */
32159 if (i
+ 1 == length
)
32161 reg
= gen_lowpart (QImode
, val_reg
);
32162 addr
= plus_constant (Pmode
, dst
, i
);
32163 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32164 emit_move_insn (mem
, reg
);
32170 /* Set a block of memory using vectorization instructions for both
32171 aligned and unaligned cases. We fill the first LENGTH bytes of
32172 the memory area starting from DSTBASE with byte constant VALUE.
32173 ALIGN is the alignment requirement of memory. */
32175 arm_block_set_vect (rtx dstbase
,
32176 unsigned HOST_WIDE_INT length
,
32177 unsigned HOST_WIDE_INT value
,
32178 unsigned HOST_WIDE_INT align
)
32180 /* Check whether we need to use unaligned store instruction. */
32181 if (((align
& 3) != 0 || (length
& 3) != 0)
32182 /* Check whether unaligned store instruction is available. */
32183 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
32186 if ((align
& 3) == 0)
32187 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
32189 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
32192 /* Expand string store operation. Firstly we try to do that by using
32193 vectorization instructions, then try with ARM unaligned access and
32194 double-word store if profitable. OPERANDS[0] is the destination,
32195 OPERANDS[1] is the number of bytes, operands[2] is the value to
32196 initialize the memory, OPERANDS[3] is the known alignment of the
32199 arm_gen_setmem (rtx
*operands
)
32201 rtx dstbase
= operands
[0];
32202 unsigned HOST_WIDE_INT length
;
32203 unsigned HOST_WIDE_INT value
;
32204 unsigned HOST_WIDE_INT align
;
32206 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
32209 length
= UINTVAL (operands
[1]);
32213 value
= (UINTVAL (operands
[2]) & 0xFF);
32214 align
= UINTVAL (operands
[3]);
32215 if (TARGET_NEON
&& length
>= 8
32216 && current_tune
->string_ops_prefer_neon
32217 && arm_block_set_vect (dstbase
, length
, value
, align
))
32220 if (!unaligned_access
&& (align
& 3) != 0)
32221 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
32223 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
32226 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32228 static unsigned HOST_WIDE_INT
32229 arm_asan_shadow_offset (void)
32231 return (unsigned HOST_WIDE_INT
) 1 << 29;
32235 /* This is a temporary fix for PR60655. Ideally we need
32236 to handle most of these cases in the generic part but
32237 currently we reject minus (..) (sym_ref). We try to
32238 ameliorate the case with minus (sym_ref1) (sym_ref2)
32239 where they are in the same section. */
32242 arm_const_not_ok_for_debug_p (rtx p
)
32244 tree decl_op0
= NULL
;
32245 tree decl_op1
= NULL
;
32247 if (GET_CODE (p
) == MINUS
)
32249 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
32251 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
32253 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
32254 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
32256 if ((TREE_CODE (decl_op1
) == VAR_DECL
32257 || TREE_CODE (decl_op1
) == CONST_DECL
)
32258 && (TREE_CODE (decl_op0
) == VAR_DECL
32259 || TREE_CODE (decl_op0
) == CONST_DECL
))
32260 return (get_variable_section (decl_op1
, false)
32261 != get_variable_section (decl_op0
, false));
32263 if (TREE_CODE (decl_op1
) == LABEL_DECL
32264 && TREE_CODE (decl_op0
) == LABEL_DECL
)
32265 return (DECL_CONTEXT (decl_op1
)
32266 != DECL_CONTEXT (decl_op0
));
32277 arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
32279 const unsigned ARM_FE_INVALID
= 1;
32280 const unsigned ARM_FE_DIVBYZERO
= 2;
32281 const unsigned ARM_FE_OVERFLOW
= 4;
32282 const unsigned ARM_FE_UNDERFLOW
= 8;
32283 const unsigned ARM_FE_INEXACT
= 16;
32284 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT
= (ARM_FE_INVALID
32289 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT
= 8;
32290 tree fenv_var
, get_fpscr
, set_fpscr
, mask
, ld_fenv
, masked_fenv
;
32291 tree new_fenv_var
, reload_fenv
, restore_fnenv
;
32292 tree update_call
, atomic_feraiseexcept
, hold_fnclex
;
32294 if (!TARGET_VFP
|| !TARGET_HARD_FLOAT
)
32297 /* Generate the equivalent of :
32298 unsigned int fenv_var;
32299 fenv_var = __builtin_arm_get_fpscr ();
32301 unsigned int masked_fenv;
32302 masked_fenv = fenv_var & mask;
32304 __builtin_arm_set_fpscr (masked_fenv); */
32306 fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
32307 get_fpscr
= arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
];
32308 set_fpscr
= arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
];
32309 mask
= build_int_cst (unsigned_type_node
,
32310 ~((ARM_FE_ALL_EXCEPT
<< ARM_FE_EXCEPT_SHIFT
)
32311 | ARM_FE_ALL_EXCEPT
));
32312 ld_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
,
32313 fenv_var
, build_call_expr (get_fpscr
, 0));
32314 masked_fenv
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
, mask
);
32315 hold_fnclex
= build_call_expr (set_fpscr
, 1, masked_fenv
);
32316 *hold
= build2 (COMPOUND_EXPR
, void_type_node
,
32317 build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
),
32320 /* Store the value of masked_fenv to clear the exceptions:
32321 __builtin_arm_set_fpscr (masked_fenv); */
32323 *clear
= build_call_expr (set_fpscr
, 1, masked_fenv
);
32325 /* Generate the equivalent of :
32326 unsigned int new_fenv_var;
32327 new_fenv_var = __builtin_arm_get_fpscr ();
32329 __builtin_arm_set_fpscr (fenv_var);
32331 __atomic_feraiseexcept (new_fenv_var); */
32333 new_fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
32334 reload_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
, new_fenv_var
,
32335 build_call_expr (get_fpscr
, 0));
32336 restore_fnenv
= build_call_expr (set_fpscr
, 1, fenv_var
);
32337 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
32338 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
32339 fold_convert (integer_type_node
, new_fenv_var
));
32340 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
32341 build2 (COMPOUND_EXPR
, void_type_node
,
32342 reload_fenv
, restore_fnenv
), update_call
);
32345 /* return TRUE if x is a reference to a value in a constant pool */
32347 arm_is_constant_pool_ref (rtx x
)
32350 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
32351 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
32354 #include "gt-arm.h"