1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "double-int.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
50 #include "insn-attr.h"
55 #include "statistics.h"
57 #include "fixed-value.h"
64 #include "insn-codes.h"
66 #include "diagnostic-core.h"
69 #include "dominance.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
79 #include "plugin-api.h"
86 #include "sched-int.h"
87 #include "target-def.h"
89 #include "langhooks.h"
96 #include "gimple-expr.h"
98 #include "tm-constrs.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode
;
104 typedef struct minipool_fixup Mfix
;
106 void (*arm_lang_output_object_attributes_hook
)(void);
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx
);
115 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets
*arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
120 HOST_WIDE_INT
, rtx
, rtx
, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx
, int);
123 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
124 static bool is_called_in_ARM_mode (tree
);
125 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
126 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
127 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
128 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
129 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
130 inline static int thumb1_index_register_rtx_p (rtx
, int);
131 static int thumb_far_jump_used_p (void);
132 static bool thumb_force_lr_save (void);
133 static unsigned arm_size_return_regs (void);
134 static bool arm_assemble_integer (rtx
, unsigned int, int);
135 static void arm_print_operand (FILE *, rtx
, int);
136 static void arm_print_operand_address (FILE *, rtx
);
137 static bool arm_print_operand_punct_valid_p (unsigned char code
);
138 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
139 static arm_cc
get_arm_condition_code (rtx
);
140 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
141 static const char *output_multi_immediate (rtx
*, const char *, const char *,
143 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
144 static struct machine_function
*arm_init_machine_status (void);
145 static void thumb_exit (FILE *, int);
146 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
147 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
148 static Mnode
*add_minipool_forward_ref (Mfix
*);
149 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
150 static Mnode
*add_minipool_backward_ref (Mfix
*);
151 static void assign_minipool_offsets (Mfix
*);
152 static void arm_print_value (FILE *, rtx
);
153 static void dump_minipool (rtx_insn
*);
154 static int arm_barrier_cost (rtx_insn
*);
155 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
156 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
157 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
159 static void arm_reorg (void);
160 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
161 static unsigned long arm_compute_save_reg0_reg12_mask (void);
162 static unsigned long arm_compute_save_reg_mask (void);
163 static unsigned long arm_isr_value (tree
);
164 static unsigned long arm_compute_func_type (void);
165 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
166 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
167 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
168 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
169 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
171 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
172 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
173 static int arm_comp_type_attributes (const_tree
, const_tree
);
174 static void arm_set_default_type_attributes (tree
);
175 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
176 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
177 static int optimal_immediate_sequence (enum rtx_code code
,
178 unsigned HOST_WIDE_INT val
,
179 struct four_ints
*return_sequence
);
180 static int optimal_immediate_sequence_1 (enum rtx_code code
,
181 unsigned HOST_WIDE_INT val
,
182 struct four_ints
*return_sequence
,
184 static int arm_get_strip_length (int);
185 static bool arm_function_ok_for_sibcall (tree
, tree
);
186 static machine_mode
arm_promote_function_mode (const_tree
,
189 static bool arm_return_in_memory (const_tree
, const_tree
);
190 static rtx
arm_function_value (const_tree
, const_tree
, bool);
191 static rtx
arm_libcall_value_1 (machine_mode
);
192 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
193 static bool arm_function_value_regno_p (const unsigned int);
194 static void arm_internal_label (FILE *, const char *, unsigned long);
195 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
197 static bool arm_have_conditional_execution (void);
198 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
199 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
200 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
201 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
202 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
203 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
204 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
205 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
206 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
207 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
208 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
209 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
210 static void emit_constant_insn (rtx cond
, rtx pattern
);
211 static rtx_insn
*emit_set_insn (rtx
, rtx
);
212 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
213 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
215 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
217 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
219 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
220 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
222 static rtx
aapcs_libcall_value (machine_mode
);
223 static int aapcs_select_return_coproc (const_tree
, const_tree
);
225 #ifdef OBJECT_FORMAT_ELF
226 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
227 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
230 static void arm_encode_section_info (tree
, rtx
, int);
233 static void arm_file_end (void);
234 static void arm_file_start (void);
236 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
238 static bool arm_pass_by_reference (cumulative_args_t
,
239 machine_mode
, const_tree
, bool);
240 static bool arm_promote_prototypes (const_tree
);
241 static bool arm_default_short_enums (void);
242 static bool arm_align_anon_bitfield (void);
243 static bool arm_return_in_msb (const_tree
);
244 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
245 static bool arm_return_in_memory (const_tree
, const_tree
);
247 static void arm_unwind_emit (FILE *, rtx_insn
*);
248 static bool arm_output_ttype (rtx
);
249 static void arm_asm_emit_except_personality (rtx
);
250 static void arm_asm_init_sections (void);
252 static rtx
arm_dwarf_register_span (rtx
);
254 static tree
arm_cxx_guard_type (void);
255 static bool arm_cxx_guard_mask_bit (void);
256 static tree
arm_get_cookie_size (tree
);
257 static bool arm_cookie_has_size (void);
258 static bool arm_cxx_cdtor_returns_this (void);
259 static bool arm_cxx_key_method_may_be_inline (void);
260 static void arm_cxx_determine_class_data_visibility (tree
);
261 static bool arm_cxx_class_data_always_comdat (void);
262 static bool arm_cxx_use_aeabi_atexit (void);
263 static void arm_init_libfuncs (void);
264 static tree
arm_build_builtin_va_list (void);
265 static void arm_expand_builtin_va_start (tree
, rtx
);
266 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
267 static void arm_option_override (void);
268 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
269 static bool arm_macro_fusion_p (void);
270 static bool arm_cannot_copy_insn_p (rtx_insn
*);
271 static int arm_issue_rate (void);
272 static int arm_first_cycle_multipass_dfa_lookahead (void);
273 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
274 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
275 static bool arm_output_addr_const_extra (FILE *, rtx
);
276 static bool arm_allocate_stack_slots_for_args (void);
277 static bool arm_warn_func_return (tree
);
278 static const char *arm_invalid_parameter_type (const_tree t
);
279 static const char *arm_invalid_return_type (const_tree t
);
280 static tree
arm_promoted_type (const_tree t
);
281 static tree
arm_convert_to_type (tree type
, tree expr
);
282 static bool arm_scalar_mode_supported_p (machine_mode
);
283 static bool arm_frame_pointer_required (void);
284 static bool arm_can_eliminate (const int, const int);
285 static void arm_asm_trampoline_template (FILE *);
286 static void arm_trampoline_init (rtx
, tree
, rtx
);
287 static rtx
arm_trampoline_adjust_address (rtx
);
288 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
289 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
290 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
291 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
292 static bool arm_array_mode_supported_p (machine_mode
,
293 unsigned HOST_WIDE_INT
);
294 static machine_mode
arm_preferred_simd_mode (machine_mode
);
295 static bool arm_class_likely_spilled_p (reg_class_t
);
296 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
297 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
298 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
302 static void arm_conditional_register_usage (void);
303 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
304 static unsigned int arm_autovectorize_vector_sizes (void);
305 static int arm_default_branch_cost (bool, bool);
306 static int arm_cortex_a5_branch_cost (bool, bool);
307 static int arm_cortex_m_branch_cost (bool, bool);
308 static int arm_cortex_m7_branch_cost (bool, bool);
310 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
311 const unsigned char *sel
);
313 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
315 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
317 int misalign ATTRIBUTE_UNUSED
);
318 static unsigned arm_add_stmt_cost (void *data
, int count
,
319 enum vect_cost_for_stmt kind
,
320 struct _stmt_vec_info
*stmt_info
,
322 enum vect_cost_model_location where
);
324 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
325 bool op0_preserve_value
);
326 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
328 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
330 /* Table of machine attributes. */
331 static const struct attribute_spec arm_attribute_table
[] =
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
334 affects_type_identity } */
335 /* Function calls made to this symbol must be done indirectly, because
336 it may lie outside of the 26 bit addressing range of a normal function
338 { "long_call", 0, 0, false, true, true, NULL
, false },
339 /* Whereas these functions are always known to reside within the 26 bit
341 { "short_call", 0, 0, false, true, true, NULL
, false },
342 /* Specify the procedure call conventions for a function. */
343 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
345 /* Interrupt Service Routines have special prologue and epilogue requirements. */
346 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
348 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
350 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
353 /* ARM/PE has three new attributes:
355 dllexport - for exporting a function/variable that will live in a dll
356 dllimport - for importing a function/variable from a dll
358 Microsoft allows multiple declspecs in one __declspec, separating
359 them with spaces. We do NOT support this. Instead, use __declspec
362 { "dllimport", 0, 0, true, false, false, NULL
, false },
363 { "dllexport", 0, 0, true, false, false, NULL
, false },
364 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
366 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
367 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
368 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
369 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
372 { NULL
, 0, 0, false, false, false, NULL
, false }
375 /* Initialize the GCC target structure. */
376 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
377 #undef TARGET_MERGE_DECL_ATTRIBUTES
378 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
381 #undef TARGET_LEGITIMIZE_ADDRESS
382 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
385 #define TARGET_LRA_P hook_bool_void_true
387 #undef TARGET_ATTRIBUTE_TABLE
388 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
390 #undef TARGET_ASM_FILE_START
391 #define TARGET_ASM_FILE_START arm_file_start
392 #undef TARGET_ASM_FILE_END
393 #define TARGET_ASM_FILE_END arm_file_end
395 #undef TARGET_ASM_ALIGNED_SI_OP
396 #define TARGET_ASM_ALIGNED_SI_OP NULL
397 #undef TARGET_ASM_INTEGER
398 #define TARGET_ASM_INTEGER arm_assemble_integer
400 #undef TARGET_PRINT_OPERAND
401 #define TARGET_PRINT_OPERAND arm_print_operand
402 #undef TARGET_PRINT_OPERAND_ADDRESS
403 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
404 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
405 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
407 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
408 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
410 #undef TARGET_ASM_FUNCTION_PROLOGUE
411 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
413 #undef TARGET_ASM_FUNCTION_EPILOGUE
414 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
416 #undef TARGET_OPTION_OVERRIDE
417 #define TARGET_OPTION_OVERRIDE arm_option_override
419 #undef TARGET_COMP_TYPE_ATTRIBUTES
420 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
422 #undef TARGET_SCHED_MACRO_FUSION_P
423 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
425 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
426 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
428 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
429 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
431 #undef TARGET_SCHED_ADJUST_COST
432 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
434 #undef TARGET_SCHED_REORDER
435 #define TARGET_SCHED_REORDER arm_sched_reorder
437 #undef TARGET_REGISTER_MOVE_COST
438 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
440 #undef TARGET_MEMORY_MOVE_COST
441 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
443 #undef TARGET_ENCODE_SECTION_INFO
445 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
447 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
450 #undef TARGET_STRIP_NAME_ENCODING
451 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
453 #undef TARGET_ASM_INTERNAL_LABEL
454 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
456 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
457 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
459 #undef TARGET_FUNCTION_VALUE
460 #define TARGET_FUNCTION_VALUE arm_function_value
462 #undef TARGET_LIBCALL_VALUE
463 #define TARGET_LIBCALL_VALUE arm_libcall_value
465 #undef TARGET_FUNCTION_VALUE_REGNO_P
466 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
468 #undef TARGET_ASM_OUTPUT_MI_THUNK
469 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
473 #undef TARGET_RTX_COSTS
474 #define TARGET_RTX_COSTS arm_rtx_costs
475 #undef TARGET_ADDRESS_COST
476 #define TARGET_ADDRESS_COST arm_address_cost
478 #undef TARGET_SHIFT_TRUNCATION_MASK
479 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
480 #undef TARGET_VECTOR_MODE_SUPPORTED_P
481 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
482 #undef TARGET_ARRAY_MODE_SUPPORTED_P
483 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
484 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
485 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
486 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
487 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
488 arm_autovectorize_vector_sizes
490 #undef TARGET_MACHINE_DEPENDENT_REORG
491 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
493 #undef TARGET_INIT_BUILTINS
494 #define TARGET_INIT_BUILTINS arm_init_builtins
495 #undef TARGET_EXPAND_BUILTIN
496 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
497 #undef TARGET_BUILTIN_DECL
498 #define TARGET_BUILTIN_DECL arm_builtin_decl
500 #undef TARGET_INIT_LIBFUNCS
501 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
503 #undef TARGET_PROMOTE_FUNCTION_MODE
504 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
505 #undef TARGET_PROMOTE_PROTOTYPES
506 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
507 #undef TARGET_PASS_BY_REFERENCE
508 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
509 #undef TARGET_ARG_PARTIAL_BYTES
510 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
511 #undef TARGET_FUNCTION_ARG
512 #define TARGET_FUNCTION_ARG arm_function_arg
513 #undef TARGET_FUNCTION_ARG_ADVANCE
514 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
515 #undef TARGET_FUNCTION_ARG_BOUNDARY
516 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
521 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
522 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
524 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
525 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
526 #undef TARGET_TRAMPOLINE_INIT
527 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
528 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
529 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
531 #undef TARGET_WARN_FUNC_RETURN
532 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
534 #undef TARGET_DEFAULT_SHORT_ENUMS
535 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
537 #undef TARGET_ALIGN_ANON_BITFIELD
538 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
540 #undef TARGET_NARROW_VOLATILE_BITFIELD
541 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
543 #undef TARGET_CXX_GUARD_TYPE
544 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
546 #undef TARGET_CXX_GUARD_MASK_BIT
547 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
549 #undef TARGET_CXX_GET_COOKIE_SIZE
550 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
552 #undef TARGET_CXX_COOKIE_HAS_SIZE
553 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
555 #undef TARGET_CXX_CDTOR_RETURNS_THIS
556 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
558 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
559 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
561 #undef TARGET_CXX_USE_AEABI_ATEXIT
562 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
564 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
565 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
566 arm_cxx_determine_class_data_visibility
568 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
569 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
571 #undef TARGET_RETURN_IN_MSB
572 #define TARGET_RETURN_IN_MSB arm_return_in_msb
574 #undef TARGET_RETURN_IN_MEMORY
575 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
581 #undef TARGET_ASM_UNWIND_EMIT
582 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
584 /* EABI unwinding tables use a different format for the typeinfo tables. */
585 #undef TARGET_ASM_TTYPE
586 #define TARGET_ASM_TTYPE arm_output_ttype
588 #undef TARGET_ARM_EABI_UNWINDER
589 #define TARGET_ARM_EABI_UNWINDER true
591 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
592 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
596 #endif /* ARM_UNWIND_INFO */
598 #undef TARGET_DWARF_REGISTER_SPAN
599 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
601 #undef TARGET_CANNOT_COPY_INSN_P
602 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
605 #undef TARGET_HAVE_TLS
606 #define TARGET_HAVE_TLS true
609 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
610 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
612 #undef TARGET_LEGITIMATE_CONSTANT_P
613 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
618 #undef TARGET_MAX_ANCHOR_OFFSET
619 #define TARGET_MAX_ANCHOR_OFFSET 4095
621 /* The minimum is set such that the total size of the block
622 for a particular anchor is -4088 + 1 + 4095 bytes, which is
623 divisible by eight, ensuring natural spacing of anchors. */
624 #undef TARGET_MIN_ANCHOR_OFFSET
625 #define TARGET_MIN_ANCHOR_OFFSET -4088
627 #undef TARGET_SCHED_ISSUE_RATE
628 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
632 arm_first_cycle_multipass_dfa_lookahead
634 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
635 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
636 arm_first_cycle_multipass_dfa_lookahead_guard
638 #undef TARGET_MANGLE_TYPE
639 #define TARGET_MANGLE_TYPE arm_mangle_type
641 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
642 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
644 #undef TARGET_BUILD_BUILTIN_VA_LIST
645 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
646 #undef TARGET_EXPAND_BUILTIN_VA_START
647 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
648 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
649 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
656 #undef TARGET_LEGITIMATE_ADDRESS_P
657 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
659 #undef TARGET_PREFERRED_RELOAD_CLASS
660 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
662 #undef TARGET_INVALID_PARAMETER_TYPE
663 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
665 #undef TARGET_INVALID_RETURN_TYPE
666 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
668 #undef TARGET_PROMOTED_TYPE
669 #define TARGET_PROMOTED_TYPE arm_promoted_type
671 #undef TARGET_CONVERT_TO_TYPE
672 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
675 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
677 #undef TARGET_FRAME_POINTER_REQUIRED
678 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
680 #undef TARGET_CAN_ELIMINATE
681 #define TARGET_CAN_ELIMINATE arm_can_eliminate
683 #undef TARGET_CONDITIONAL_REGISTER_USAGE
684 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
686 #undef TARGET_CLASS_LIKELY_SPILLED_P
687 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
689 #undef TARGET_VECTORIZE_BUILTINS
690 #define TARGET_VECTORIZE_BUILTINS
692 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
693 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
694 arm_builtin_vectorized_function
696 #undef TARGET_VECTOR_ALIGNMENT
697 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
699 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
700 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
701 arm_vector_alignment_reachable
703 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
704 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
705 arm_builtin_support_vector_misalignment
707 #undef TARGET_PREFERRED_RENAME_CLASS
708 #define TARGET_PREFERRED_RENAME_CLASS \
709 arm_preferred_rename_class
711 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
712 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
713 arm_vectorize_vec_perm_const_ok
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
717 arm_builtin_vectorization_cost
718 #undef TARGET_VECTORIZE_ADD_STMT_COST
719 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
721 #undef TARGET_CANONICALIZE_COMPARISON
722 #define TARGET_CANONICALIZE_COMPARISON \
723 arm_canonicalize_comparison
725 #undef TARGET_ASAN_SHADOW_OFFSET
726 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
728 #undef MAX_INSN_PER_IT_BLOCK
729 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
731 #undef TARGET_CAN_USE_DOLOOP_P
732 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
734 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
735 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
737 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
738 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
740 #undef TARGET_SCHED_FUSION_PRIORITY
741 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
743 struct gcc_target targetm
= TARGET_INITIALIZER
;
745 /* Obstack for minipool constant handling. */
746 static struct obstack minipool_obstack
;
747 static char * minipool_startobj
;
749 /* The maximum number of insns skipped which
750 will be conditionalised if possible. */
751 static int max_insns_skipped
= 5;
753 extern FILE * asm_out_file
;
755 /* True if we are currently building a constant table. */
756 int making_const_table
;
758 /* The processor for which instructions should be scheduled. */
759 enum processor_type arm_tune
= arm_none
;
761 /* The current tuning set. */
762 const struct tune_params
*current_tune
;
764 /* Which floating point hardware to schedule for. */
767 /* Which floating popint hardware to use. */
768 const struct arm_fpu_desc
*arm_fpu_desc
;
770 /* Used for Thumb call_via trampolines. */
771 rtx thumb_call_via_label
[14];
772 static int thumb_call_reg_needed
;
774 /* The bits in this mask specify which
775 instructions we are allowed to generate. */
776 unsigned long insn_flags
= 0;
778 /* The bits in this mask specify which instruction scheduling options should
780 unsigned long tune_flags
= 0;
782 /* The highest ARM architecture version supported by the
784 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
786 /* The following are used in the arm.md file as equivalents to bits
787 in the above two flag variables. */
789 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
792 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
795 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
798 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
801 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
804 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
807 /* Nonzero if this chip supports the ARM 6K extensions. */
810 /* Nonzero if instructions present in ARMv6-M can be used. */
813 /* Nonzero if this chip supports the ARM 7 extensions. */
816 /* Nonzero if instructions not present in the 'M' profile can be used. */
817 int arm_arch_notm
= 0;
819 /* Nonzero if instructions present in ARMv7E-M can be used. */
822 /* Nonzero if instructions present in ARMv8 can be used. */
825 /* Nonzero if this chip can benefit from load scheduling. */
826 int arm_ld_sched
= 0;
828 /* Nonzero if this chip is a StrongARM. */
829 int arm_tune_strongarm
= 0;
831 /* Nonzero if this chip supports Intel Wireless MMX technology. */
832 int arm_arch_iwmmxt
= 0;
834 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
835 int arm_arch_iwmmxt2
= 0;
837 /* Nonzero if this chip is an XScale. */
838 int arm_arch_xscale
= 0;
840 /* Nonzero if tuning for XScale */
841 int arm_tune_xscale
= 0;
843 /* Nonzero if we want to tune for stores that access the write-buffer.
844 This typically means an ARM6 or ARM7 with MMU or MPU. */
845 int arm_tune_wbuf
= 0;
847 /* Nonzero if tuning for Cortex-A9. */
848 int arm_tune_cortex_a9
= 0;
850 /* Nonzero if we should define __THUMB_INTERWORK__ in the
852 XXX This is a bit of a hack, it's intended to help work around
853 problems in GLD which doesn't understand that armv5t code is
854 interworking clean. */
855 int arm_cpp_interwork
= 0;
857 /* Nonzero if chip supports Thumb 2. */
860 /* Nonzero if chip supports integer division instruction. */
861 int arm_arch_arm_hwdiv
;
862 int arm_arch_thumb_hwdiv
;
864 /* Nonzero if chip disallows volatile memory access in IT block. */
865 int arm_arch_no_volatile_ce
;
867 /* Nonzero if we should use Neon to handle 64-bits operations rather
868 than core registers. */
869 int prefer_neon_for_64bits
= 0;
871 /* Nonzero if we shouldn't use literal pools. */
872 bool arm_disable_literal_pool
= false;
874 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
875 we must report the mode of the memory reference from
876 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
877 machine_mode output_memory_reference_mode
;
879 /* The register number to be used for the PIC offset register. */
880 unsigned arm_pic_register
= INVALID_REGNUM
;
882 enum arm_pcs arm_pcs_default
;
884 /* For an explanation of these variables, see final_prescan_insn below. */
886 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
887 enum arm_cond_code arm_current_cc
;
890 int arm_target_label
;
891 /* The number of conditionally executed insns, including the current insn. */
892 int arm_condexec_count
= 0;
893 /* A bitmask specifying the patterns for the IT block.
894 Zero means do not output an IT block before this insn. */
895 int arm_condexec_mask
= 0;
896 /* The number of bits used in arm_condexec_mask. */
897 int arm_condexec_masklen
= 0;
899 /* Nonzero if chip supports the ARMv8 CRC instructions. */
900 int arm_arch_crc
= 0;
902 /* Nonzero if the core has a very small, high-latency, multiply unit. */
903 int arm_m_profile_small_mul
= 0;
905 /* The condition codes of the ARM, and the inverse function. */
906 static const char * const arm_condition_codes
[] =
908 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
909 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
912 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
913 int arm_regs_in_sequence
[] =
915 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
918 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
919 #define streq(string1, string2) (strcmp (string1, string2) == 0)
921 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
922 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
923 | (1 << PIC_OFFSET_TABLE_REGNUM)))
925 /* Initialization code. */
929 const char *const name
;
930 enum processor_type core
;
932 enum base_architecture base_arch
;
933 const unsigned long flags
;
934 const struct tune_params
*const tune
;
938 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
939 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
946 /* arm generic vectorizer costs. */
948 struct cpu_vec_costs arm_default_vec_cost
= {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs
=
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
987 true /* non_exec_costs_exec. */
992 COSTS_N_INSNS (3), /* simple. */
993 COSTS_N_INSNS (3), /* flag_setting. */
994 COSTS_N_INSNS (2), /* extend. */
995 COSTS_N_INSNS (3), /* add. */
996 COSTS_N_INSNS (2), /* extend_add. */
997 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1001 0, /* simple (N/A). */
1002 0, /* flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* extend. */
1005 COSTS_N_INSNS (4), /* extend_add. */
1011 COSTS_N_INSNS (2), /* load. */
1012 COSTS_N_INSNS (2), /* load_sign_extend. */
1013 COSTS_N_INSNS (2), /* ldrd. */
1014 COSTS_N_INSNS (2), /* ldm_1st. */
1015 1, /* ldm_regs_per_insn_1st. */
1016 2, /* ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* loadf. */
1018 COSTS_N_INSNS (5), /* loadd. */
1019 COSTS_N_INSNS (1), /* load_unaligned. */
1020 COSTS_N_INSNS (2), /* store. */
1021 COSTS_N_INSNS (2), /* strd. */
1022 COSTS_N_INSNS (2), /* stm_1st. */
1023 1, /* stm_regs_per_insn_1st. */
1024 2, /* stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* storef. */
1026 COSTS_N_INSNS (1), /* stored. */
1027 COSTS_N_INSNS (1), /* store_unaligned. */
1028 COSTS_N_INSNS (1), /* loadv. */
1029 COSTS_N_INSNS (1) /* storev. */
1034 COSTS_N_INSNS (14), /* div. */
1035 COSTS_N_INSNS (4), /* mult. */
1036 COSTS_N_INSNS (7), /* mult_addsub. */
1037 COSTS_N_INSNS (30), /* fma. */
1038 COSTS_N_INSNS (3), /* addsub. */
1039 COSTS_N_INSNS (1), /* fpconst. */
1040 COSTS_N_INSNS (1), /* neg. */
1041 COSTS_N_INSNS (3), /* compare. */
1042 COSTS_N_INSNS (3), /* widen. */
1043 COSTS_N_INSNS (3), /* narrow. */
1044 COSTS_N_INSNS (3), /* toint. */
1045 COSTS_N_INSNS (3), /* fromint. */
1046 COSTS_N_INSNS (3) /* roundint. */
1050 COSTS_N_INSNS (24), /* div. */
1051 COSTS_N_INSNS (5), /* mult. */
1052 COSTS_N_INSNS (8), /* mult_addsub. */
1053 COSTS_N_INSNS (30), /* fma. */
1054 COSTS_N_INSNS (3), /* addsub. */
1055 COSTS_N_INSNS (1), /* fpconst. */
1056 COSTS_N_INSNS (1), /* neg. */
1057 COSTS_N_INSNS (3), /* compare. */
1058 COSTS_N_INSNS (3), /* widen. */
1059 COSTS_N_INSNS (3), /* narrow. */
1060 COSTS_N_INSNS (3), /* toint. */
1061 COSTS_N_INSNS (3), /* fromint. */
1062 COSTS_N_INSNS (3) /* roundint. */
1067 COSTS_N_INSNS (1) /* alu. */
1071 const struct cpu_cost_table cortexa8_extra_costs
=
1077 COSTS_N_INSNS (1), /* shift. */
1079 COSTS_N_INSNS (1), /* arith_shift. */
1080 0, /* arith_shift_reg. */
1081 COSTS_N_INSNS (1), /* log_shift. */
1082 0, /* log_shift_reg. */
1084 0, /* extend_arith. */
1090 true /* non_exec_costs_exec. */
1095 COSTS_N_INSNS (1), /* simple. */
1096 COSTS_N_INSNS (1), /* flag_setting. */
1097 COSTS_N_INSNS (1), /* extend. */
1098 COSTS_N_INSNS (1), /* add. */
1099 COSTS_N_INSNS (1), /* extend_add. */
1100 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1104 0, /* simple (N/A). */
1105 0, /* flag_setting (N/A). */
1106 COSTS_N_INSNS (2), /* extend. */
1108 COSTS_N_INSNS (2), /* extend_add. */
1114 COSTS_N_INSNS (1), /* load. */
1115 COSTS_N_INSNS (1), /* load_sign_extend. */
1116 COSTS_N_INSNS (1), /* ldrd. */
1117 COSTS_N_INSNS (1), /* ldm_1st. */
1118 1, /* ldm_regs_per_insn_1st. */
1119 2, /* ldm_regs_per_insn_subsequent. */
1120 COSTS_N_INSNS (1), /* loadf. */
1121 COSTS_N_INSNS (1), /* loadd. */
1122 COSTS_N_INSNS (1), /* load_unaligned. */
1123 COSTS_N_INSNS (1), /* store. */
1124 COSTS_N_INSNS (1), /* strd. */
1125 COSTS_N_INSNS (1), /* stm_1st. */
1126 1, /* stm_regs_per_insn_1st. */
1127 2, /* stm_regs_per_insn_subsequent. */
1128 COSTS_N_INSNS (1), /* storef. */
1129 COSTS_N_INSNS (1), /* stored. */
1130 COSTS_N_INSNS (1), /* store_unaligned. */
1131 COSTS_N_INSNS (1), /* loadv. */
1132 COSTS_N_INSNS (1) /* storev. */
1137 COSTS_N_INSNS (36), /* div. */
1138 COSTS_N_INSNS (11), /* mult. */
1139 COSTS_N_INSNS (20), /* mult_addsub. */
1140 COSTS_N_INSNS (30), /* fma. */
1141 COSTS_N_INSNS (9), /* addsub. */
1142 COSTS_N_INSNS (3), /* fpconst. */
1143 COSTS_N_INSNS (3), /* neg. */
1144 COSTS_N_INSNS (6), /* compare. */
1145 COSTS_N_INSNS (4), /* widen. */
1146 COSTS_N_INSNS (4), /* narrow. */
1147 COSTS_N_INSNS (8), /* toint. */
1148 COSTS_N_INSNS (8), /* fromint. */
1149 COSTS_N_INSNS (8) /* roundint. */
1153 COSTS_N_INSNS (64), /* div. */
1154 COSTS_N_INSNS (16), /* mult. */
1155 COSTS_N_INSNS (25), /* mult_addsub. */
1156 COSTS_N_INSNS (30), /* fma. */
1157 COSTS_N_INSNS (9), /* addsub. */
1158 COSTS_N_INSNS (3), /* fpconst. */
1159 COSTS_N_INSNS (3), /* neg. */
1160 COSTS_N_INSNS (6), /* compare. */
1161 COSTS_N_INSNS (6), /* widen. */
1162 COSTS_N_INSNS (6), /* narrow. */
1163 COSTS_N_INSNS (8), /* toint. */
1164 COSTS_N_INSNS (8), /* fromint. */
1165 COSTS_N_INSNS (8) /* roundint. */
1170 COSTS_N_INSNS (1) /* alu. */
1174 const struct cpu_cost_table cortexa5_extra_costs
=
1180 COSTS_N_INSNS (1), /* shift. */
1181 COSTS_N_INSNS (1), /* shift_reg. */
1182 COSTS_N_INSNS (1), /* arith_shift. */
1183 COSTS_N_INSNS (1), /* arith_shift_reg. */
1184 COSTS_N_INSNS (1), /* log_shift. */
1185 COSTS_N_INSNS (1), /* log_shift_reg. */
1186 COSTS_N_INSNS (1), /* extend. */
1187 COSTS_N_INSNS (1), /* extend_arith. */
1188 COSTS_N_INSNS (1), /* bfi. */
1189 COSTS_N_INSNS (1), /* bfx. */
1190 COSTS_N_INSNS (1), /* clz. */
1191 COSTS_N_INSNS (1), /* rev. */
1193 true /* non_exec_costs_exec. */
1200 COSTS_N_INSNS (1), /* flag_setting. */
1201 COSTS_N_INSNS (1), /* extend. */
1202 COSTS_N_INSNS (1), /* add. */
1203 COSTS_N_INSNS (1), /* extend_add. */
1204 COSTS_N_INSNS (7) /* idiv. */
1208 0, /* simple (N/A). */
1209 0, /* flag_setting (N/A). */
1210 COSTS_N_INSNS (1), /* extend. */
1212 COSTS_N_INSNS (2), /* extend_add. */
1218 COSTS_N_INSNS (1), /* load. */
1219 COSTS_N_INSNS (1), /* load_sign_extend. */
1220 COSTS_N_INSNS (6), /* ldrd. */
1221 COSTS_N_INSNS (1), /* ldm_1st. */
1222 1, /* ldm_regs_per_insn_1st. */
1223 2, /* ldm_regs_per_insn_subsequent. */
1224 COSTS_N_INSNS (2), /* loadf. */
1225 COSTS_N_INSNS (4), /* loadd. */
1226 COSTS_N_INSNS (1), /* load_unaligned. */
1227 COSTS_N_INSNS (1), /* store. */
1228 COSTS_N_INSNS (3), /* strd. */
1229 COSTS_N_INSNS (1), /* stm_1st. */
1230 1, /* stm_regs_per_insn_1st. */
1231 2, /* stm_regs_per_insn_subsequent. */
1232 COSTS_N_INSNS (2), /* storef. */
1233 COSTS_N_INSNS (2), /* stored. */
1234 COSTS_N_INSNS (1), /* store_unaligned. */
1235 COSTS_N_INSNS (1), /* loadv. */
1236 COSTS_N_INSNS (1) /* storev. */
1241 COSTS_N_INSNS (15), /* div. */
1242 COSTS_N_INSNS (3), /* mult. */
1243 COSTS_N_INSNS (7), /* mult_addsub. */
1244 COSTS_N_INSNS (7), /* fma. */
1245 COSTS_N_INSNS (3), /* addsub. */
1246 COSTS_N_INSNS (3), /* fpconst. */
1247 COSTS_N_INSNS (3), /* neg. */
1248 COSTS_N_INSNS (3), /* compare. */
1249 COSTS_N_INSNS (3), /* widen. */
1250 COSTS_N_INSNS (3), /* narrow. */
1251 COSTS_N_INSNS (3), /* toint. */
1252 COSTS_N_INSNS (3), /* fromint. */
1253 COSTS_N_INSNS (3) /* roundint. */
1257 COSTS_N_INSNS (30), /* div. */
1258 COSTS_N_INSNS (6), /* mult. */
1259 COSTS_N_INSNS (10), /* mult_addsub. */
1260 COSTS_N_INSNS (7), /* fma. */
1261 COSTS_N_INSNS (3), /* addsub. */
1262 COSTS_N_INSNS (3), /* fpconst. */
1263 COSTS_N_INSNS (3), /* neg. */
1264 COSTS_N_INSNS (3), /* compare. */
1265 COSTS_N_INSNS (3), /* widen. */
1266 COSTS_N_INSNS (3), /* narrow. */
1267 COSTS_N_INSNS (3), /* toint. */
1268 COSTS_N_INSNS (3), /* fromint. */
1269 COSTS_N_INSNS (3) /* roundint. */
1274 COSTS_N_INSNS (1) /* alu. */
1279 const struct cpu_cost_table cortexa7_extra_costs
=
1285 COSTS_N_INSNS (1), /* shift. */
1286 COSTS_N_INSNS (1), /* shift_reg. */
1287 COSTS_N_INSNS (1), /* arith_shift. */
1288 COSTS_N_INSNS (1), /* arith_shift_reg. */
1289 COSTS_N_INSNS (1), /* log_shift. */
1290 COSTS_N_INSNS (1), /* log_shift_reg. */
1291 COSTS_N_INSNS (1), /* extend. */
1292 COSTS_N_INSNS (1), /* extend_arith. */
1293 COSTS_N_INSNS (1), /* bfi. */
1294 COSTS_N_INSNS (1), /* bfx. */
1295 COSTS_N_INSNS (1), /* clz. */
1296 COSTS_N_INSNS (1), /* rev. */
1298 true /* non_exec_costs_exec. */
1305 COSTS_N_INSNS (1), /* flag_setting. */
1306 COSTS_N_INSNS (1), /* extend. */
1307 COSTS_N_INSNS (1), /* add. */
1308 COSTS_N_INSNS (1), /* extend_add. */
1309 COSTS_N_INSNS (7) /* idiv. */
1313 0, /* simple (N/A). */
1314 0, /* flag_setting (N/A). */
1315 COSTS_N_INSNS (1), /* extend. */
1317 COSTS_N_INSNS (2), /* extend_add. */
1323 COSTS_N_INSNS (1), /* load. */
1324 COSTS_N_INSNS (1), /* load_sign_extend. */
1325 COSTS_N_INSNS (3), /* ldrd. */
1326 COSTS_N_INSNS (1), /* ldm_1st. */
1327 1, /* ldm_regs_per_insn_1st. */
1328 2, /* ldm_regs_per_insn_subsequent. */
1329 COSTS_N_INSNS (2), /* loadf. */
1330 COSTS_N_INSNS (2), /* loadd. */
1331 COSTS_N_INSNS (1), /* load_unaligned. */
1332 COSTS_N_INSNS (1), /* store. */
1333 COSTS_N_INSNS (3), /* strd. */
1334 COSTS_N_INSNS (1), /* stm_1st. */
1335 1, /* stm_regs_per_insn_1st. */
1336 2, /* stm_regs_per_insn_subsequent. */
1337 COSTS_N_INSNS (2), /* storef. */
1338 COSTS_N_INSNS (2), /* stored. */
1339 COSTS_N_INSNS (1), /* store_unaligned. */
1340 COSTS_N_INSNS (1), /* loadv. */
1341 COSTS_N_INSNS (1) /* storev. */
1346 COSTS_N_INSNS (15), /* div. */
1347 COSTS_N_INSNS (3), /* mult. */
1348 COSTS_N_INSNS (7), /* mult_addsub. */
1349 COSTS_N_INSNS (7), /* fma. */
1350 COSTS_N_INSNS (3), /* addsub. */
1351 COSTS_N_INSNS (3), /* fpconst. */
1352 COSTS_N_INSNS (3), /* neg. */
1353 COSTS_N_INSNS (3), /* compare. */
1354 COSTS_N_INSNS (3), /* widen. */
1355 COSTS_N_INSNS (3), /* narrow. */
1356 COSTS_N_INSNS (3), /* toint. */
1357 COSTS_N_INSNS (3), /* fromint. */
1358 COSTS_N_INSNS (3) /* roundint. */
1362 COSTS_N_INSNS (30), /* div. */
1363 COSTS_N_INSNS (6), /* mult. */
1364 COSTS_N_INSNS (10), /* mult_addsub. */
1365 COSTS_N_INSNS (7), /* fma. */
1366 COSTS_N_INSNS (3), /* addsub. */
1367 COSTS_N_INSNS (3), /* fpconst. */
1368 COSTS_N_INSNS (3), /* neg. */
1369 COSTS_N_INSNS (3), /* compare. */
1370 COSTS_N_INSNS (3), /* widen. */
1371 COSTS_N_INSNS (3), /* narrow. */
1372 COSTS_N_INSNS (3), /* toint. */
1373 COSTS_N_INSNS (3), /* fromint. */
1374 COSTS_N_INSNS (3) /* roundint. */
1379 COSTS_N_INSNS (1) /* alu. */
1383 const struct cpu_cost_table cortexa12_extra_costs
=
1390 COSTS_N_INSNS (1), /* shift_reg. */
1391 COSTS_N_INSNS (1), /* arith_shift. */
1392 COSTS_N_INSNS (1), /* arith_shift_reg. */
1393 COSTS_N_INSNS (1), /* log_shift. */
1394 COSTS_N_INSNS (1), /* log_shift_reg. */
1396 COSTS_N_INSNS (1), /* extend_arith. */
1398 COSTS_N_INSNS (1), /* bfx. */
1399 COSTS_N_INSNS (1), /* clz. */
1400 COSTS_N_INSNS (1), /* rev. */
1402 true /* non_exec_costs_exec. */
1407 COSTS_N_INSNS (2), /* simple. */
1408 COSTS_N_INSNS (3), /* flag_setting. */
1409 COSTS_N_INSNS (2), /* extend. */
1410 COSTS_N_INSNS (3), /* add. */
1411 COSTS_N_INSNS (2), /* extend_add. */
1412 COSTS_N_INSNS (18) /* idiv. */
1416 0, /* simple (N/A). */
1417 0, /* flag_setting (N/A). */
1418 COSTS_N_INSNS (3), /* extend. */
1420 COSTS_N_INSNS (3), /* extend_add. */
1426 COSTS_N_INSNS (3), /* load. */
1427 COSTS_N_INSNS (3), /* load_sign_extend. */
1428 COSTS_N_INSNS (3), /* ldrd. */
1429 COSTS_N_INSNS (3), /* ldm_1st. */
1430 1, /* ldm_regs_per_insn_1st. */
1431 2, /* ldm_regs_per_insn_subsequent. */
1432 COSTS_N_INSNS (3), /* loadf. */
1433 COSTS_N_INSNS (3), /* loadd. */
1434 0, /* load_unaligned. */
1438 1, /* stm_regs_per_insn_1st. */
1439 2, /* stm_regs_per_insn_subsequent. */
1440 COSTS_N_INSNS (2), /* storef. */
1441 COSTS_N_INSNS (2), /* stored. */
1442 0, /* store_unaligned. */
1443 COSTS_N_INSNS (1), /* loadv. */
1444 COSTS_N_INSNS (1) /* storev. */
1449 COSTS_N_INSNS (17), /* div. */
1450 COSTS_N_INSNS (4), /* mult. */
1451 COSTS_N_INSNS (8), /* mult_addsub. */
1452 COSTS_N_INSNS (8), /* fma. */
1453 COSTS_N_INSNS (4), /* addsub. */
1454 COSTS_N_INSNS (2), /* fpconst. */
1455 COSTS_N_INSNS (2), /* neg. */
1456 COSTS_N_INSNS (2), /* compare. */
1457 COSTS_N_INSNS (4), /* widen. */
1458 COSTS_N_INSNS (4), /* narrow. */
1459 COSTS_N_INSNS (4), /* toint. */
1460 COSTS_N_INSNS (4), /* fromint. */
1461 COSTS_N_INSNS (4) /* roundint. */
1465 COSTS_N_INSNS (31), /* div. */
1466 COSTS_N_INSNS (4), /* mult. */
1467 COSTS_N_INSNS (8), /* mult_addsub. */
1468 COSTS_N_INSNS (8), /* fma. */
1469 COSTS_N_INSNS (4), /* addsub. */
1470 COSTS_N_INSNS (2), /* fpconst. */
1471 COSTS_N_INSNS (2), /* neg. */
1472 COSTS_N_INSNS (2), /* compare. */
1473 COSTS_N_INSNS (4), /* widen. */
1474 COSTS_N_INSNS (4), /* narrow. */
1475 COSTS_N_INSNS (4), /* toint. */
1476 COSTS_N_INSNS (4), /* fromint. */
1477 COSTS_N_INSNS (4) /* roundint. */
1482 COSTS_N_INSNS (1) /* alu. */
1486 const struct cpu_cost_table cortexa15_extra_costs
=
1494 COSTS_N_INSNS (1), /* arith_shift. */
1495 COSTS_N_INSNS (1), /* arith_shift_reg. */
1496 COSTS_N_INSNS (1), /* log_shift. */
1497 COSTS_N_INSNS (1), /* log_shift_reg. */
1499 COSTS_N_INSNS (1), /* extend_arith. */
1500 COSTS_N_INSNS (1), /* bfi. */
1505 true /* non_exec_costs_exec. */
1510 COSTS_N_INSNS (2), /* simple. */
1511 COSTS_N_INSNS (3), /* flag_setting. */
1512 COSTS_N_INSNS (2), /* extend. */
1513 COSTS_N_INSNS (2), /* add. */
1514 COSTS_N_INSNS (2), /* extend_add. */
1515 COSTS_N_INSNS (18) /* idiv. */
1519 0, /* simple (N/A). */
1520 0, /* flag_setting (N/A). */
1521 COSTS_N_INSNS (3), /* extend. */
1523 COSTS_N_INSNS (3), /* extend_add. */
1529 COSTS_N_INSNS (3), /* load. */
1530 COSTS_N_INSNS (3), /* load_sign_extend. */
1531 COSTS_N_INSNS (3), /* ldrd. */
1532 COSTS_N_INSNS (4), /* ldm_1st. */
1533 1, /* ldm_regs_per_insn_1st. */
1534 2, /* ldm_regs_per_insn_subsequent. */
1535 COSTS_N_INSNS (4), /* loadf. */
1536 COSTS_N_INSNS (4), /* loadd. */
1537 0, /* load_unaligned. */
1540 COSTS_N_INSNS (1), /* stm_1st. */
1541 1, /* stm_regs_per_insn_1st. */
1542 2, /* stm_regs_per_insn_subsequent. */
1545 0, /* store_unaligned. */
1546 COSTS_N_INSNS (1), /* loadv. */
1547 COSTS_N_INSNS (1) /* storev. */
1552 COSTS_N_INSNS (17), /* div. */
1553 COSTS_N_INSNS (4), /* mult. */
1554 COSTS_N_INSNS (8), /* mult_addsub. */
1555 COSTS_N_INSNS (8), /* fma. */
1556 COSTS_N_INSNS (4), /* addsub. */
1557 COSTS_N_INSNS (2), /* fpconst. */
1558 COSTS_N_INSNS (2), /* neg. */
1559 COSTS_N_INSNS (5), /* compare. */
1560 COSTS_N_INSNS (4), /* widen. */
1561 COSTS_N_INSNS (4), /* narrow. */
1562 COSTS_N_INSNS (4), /* toint. */
1563 COSTS_N_INSNS (4), /* fromint. */
1564 COSTS_N_INSNS (4) /* roundint. */
1568 COSTS_N_INSNS (31), /* div. */
1569 COSTS_N_INSNS (4), /* mult. */
1570 COSTS_N_INSNS (8), /* mult_addsub. */
1571 COSTS_N_INSNS (8), /* fma. */
1572 COSTS_N_INSNS (4), /* addsub. */
1573 COSTS_N_INSNS (2), /* fpconst. */
1574 COSTS_N_INSNS (2), /* neg. */
1575 COSTS_N_INSNS (2), /* compare. */
1576 COSTS_N_INSNS (4), /* widen. */
1577 COSTS_N_INSNS (4), /* narrow. */
1578 COSTS_N_INSNS (4), /* toint. */
1579 COSTS_N_INSNS (4), /* fromint. */
1580 COSTS_N_INSNS (4) /* roundint. */
1585 COSTS_N_INSNS (1) /* alu. */
1589 const struct cpu_cost_table v7m_extra_costs
=
1597 0, /* arith_shift. */
1598 COSTS_N_INSNS (1), /* arith_shift_reg. */
1600 COSTS_N_INSNS (1), /* log_shift_reg. */
1602 COSTS_N_INSNS (1), /* extend_arith. */
1607 COSTS_N_INSNS (1), /* non_exec. */
1608 false /* non_exec_costs_exec. */
1613 COSTS_N_INSNS (1), /* simple. */
1614 COSTS_N_INSNS (1), /* flag_setting. */
1615 COSTS_N_INSNS (2), /* extend. */
1616 COSTS_N_INSNS (1), /* add. */
1617 COSTS_N_INSNS (3), /* extend_add. */
1618 COSTS_N_INSNS (8) /* idiv. */
1622 0, /* simple (N/A). */
1623 0, /* flag_setting (N/A). */
1624 COSTS_N_INSNS (2), /* extend. */
1626 COSTS_N_INSNS (3), /* extend_add. */
1632 COSTS_N_INSNS (2), /* load. */
1633 0, /* load_sign_extend. */
1634 COSTS_N_INSNS (3), /* ldrd. */
1635 COSTS_N_INSNS (2), /* ldm_1st. */
1636 1, /* ldm_regs_per_insn_1st. */
1637 1, /* ldm_regs_per_insn_subsequent. */
1638 COSTS_N_INSNS (2), /* loadf. */
1639 COSTS_N_INSNS (3), /* loadd. */
1640 COSTS_N_INSNS (1), /* load_unaligned. */
1641 COSTS_N_INSNS (2), /* store. */
1642 COSTS_N_INSNS (3), /* strd. */
1643 COSTS_N_INSNS (2), /* stm_1st. */
1644 1, /* stm_regs_per_insn_1st. */
1645 1, /* stm_regs_per_insn_subsequent. */
1646 COSTS_N_INSNS (2), /* storef. */
1647 COSTS_N_INSNS (3), /* stored. */
1648 COSTS_N_INSNS (1), /* store_unaligned. */
1649 COSTS_N_INSNS (1), /* loadv. */
1650 COSTS_N_INSNS (1) /* storev. */
1655 COSTS_N_INSNS (7), /* div. */
1656 COSTS_N_INSNS (2), /* mult. */
1657 COSTS_N_INSNS (5), /* mult_addsub. */
1658 COSTS_N_INSNS (3), /* fma. */
1659 COSTS_N_INSNS (1), /* addsub. */
1671 COSTS_N_INSNS (15), /* div. */
1672 COSTS_N_INSNS (5), /* mult. */
1673 COSTS_N_INSNS (7), /* mult_addsub. */
1674 COSTS_N_INSNS (7), /* fma. */
1675 COSTS_N_INSNS (3), /* addsub. */
1688 COSTS_N_INSNS (1) /* alu. */
1692 const struct tune_params arm_slowmul_tune
=
1694 arm_slowmul_rtx_costs
,
1695 NULL
, /* Insn extra costs. */
1696 NULL
, /* Sched adj cost. */
1697 arm_default_branch_cost
,
1698 &arm_default_vec_cost
,
1699 3, /* Constant limit. */
1700 5, /* Max cond insns. */
1701 8, /* Memset max inline. */
1702 1, /* Issue rate. */
1703 ARM_PREFETCH_NOT_BENEFICIAL
,
1704 tune_params::PREF_CONST_POOL_TRUE
,
1705 tune_params::PREF_LDRD_FALSE
,
1706 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1707 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1708 tune_params::DISPARAGE_FLAGS_NEITHER
,
1709 tune_params::PREF_NEON_64_FALSE
,
1710 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1711 tune_params::FUSE_NOTHING
,
1712 tune_params::SCHED_AUTOPREF_OFF
1715 const struct tune_params arm_fastmul_tune
=
1717 arm_fastmul_rtx_costs
,
1718 NULL
, /* Insn extra costs. */
1719 NULL
, /* Sched adj cost. */
1720 arm_default_branch_cost
,
1721 &arm_default_vec_cost
,
1722 1, /* Constant limit. */
1723 5, /* Max cond insns. */
1724 8, /* Memset max inline. */
1725 1, /* Issue rate. */
1726 ARM_PREFETCH_NOT_BENEFICIAL
,
1727 tune_params::PREF_CONST_POOL_TRUE
,
1728 tune_params::PREF_LDRD_FALSE
,
1729 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1730 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1731 tune_params::DISPARAGE_FLAGS_NEITHER
,
1732 tune_params::PREF_NEON_64_FALSE
,
1733 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1734 tune_params::FUSE_NOTHING
,
1735 tune_params::SCHED_AUTOPREF_OFF
1738 /* StrongARM has early execution of branches, so a sequence that is worth
1739 skipping is shorter. Set max_insns_skipped to a lower value. */
1741 const struct tune_params arm_strongarm_tune
=
1743 arm_fastmul_rtx_costs
,
1744 NULL
, /* Insn extra costs. */
1745 NULL
, /* Sched adj cost. */
1746 arm_default_branch_cost
,
1747 &arm_default_vec_cost
,
1748 1, /* Constant limit. */
1749 3, /* Max cond insns. */
1750 8, /* Memset max inline. */
1751 1, /* Issue rate. */
1752 ARM_PREFETCH_NOT_BENEFICIAL
,
1753 tune_params::PREF_CONST_POOL_TRUE
,
1754 tune_params::PREF_LDRD_FALSE
,
1755 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1756 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1757 tune_params::DISPARAGE_FLAGS_NEITHER
,
1758 tune_params::PREF_NEON_64_FALSE
,
1759 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1760 tune_params::FUSE_NOTHING
,
1761 tune_params::SCHED_AUTOPREF_OFF
1764 const struct tune_params arm_xscale_tune
=
1766 arm_xscale_rtx_costs
,
1767 NULL
, /* Insn extra costs. */
1768 xscale_sched_adjust_cost
,
1769 arm_default_branch_cost
,
1770 &arm_default_vec_cost
,
1771 2, /* Constant limit. */
1772 3, /* Max cond insns. */
1773 8, /* Memset max inline. */
1774 1, /* Issue rate. */
1775 ARM_PREFETCH_NOT_BENEFICIAL
,
1776 tune_params::PREF_CONST_POOL_TRUE
,
1777 tune_params::PREF_LDRD_FALSE
,
1778 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1779 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1780 tune_params::DISPARAGE_FLAGS_NEITHER
,
1781 tune_params::PREF_NEON_64_FALSE
,
1782 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1783 tune_params::FUSE_NOTHING
,
1784 tune_params::SCHED_AUTOPREF_OFF
1787 const struct tune_params arm_9e_tune
=
1790 NULL
, /* Insn extra costs. */
1791 NULL
, /* Sched adj cost. */
1792 arm_default_branch_cost
,
1793 &arm_default_vec_cost
,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL
,
1799 tune_params::PREF_CONST_POOL_TRUE
,
1800 tune_params::PREF_LDRD_FALSE
,
1801 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1802 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER
,
1804 tune_params::PREF_NEON_64_FALSE
,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1806 tune_params::FUSE_NOTHING
,
1807 tune_params::SCHED_AUTOPREF_OFF
1810 const struct tune_params arm_marvell_pj4_tune
=
1813 NULL
, /* Insn extra costs. */
1814 NULL
, /* Sched adj cost. */
1815 arm_default_branch_cost
,
1816 &arm_default_vec_cost
,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL
,
1822 tune_params::PREF_CONST_POOL_TRUE
,
1823 tune_params::PREF_LDRD_FALSE
,
1824 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1825 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER
,
1827 tune_params::PREF_NEON_64_FALSE
,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1829 tune_params::FUSE_NOTHING
,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune
=
1836 NULL
, /* Insn extra costs. */
1837 NULL
, /* Sched adj cost. */
1838 arm_default_branch_cost
,
1839 &arm_default_vec_cost
,
1840 1, /* Constant limit. */
1841 5, /* Max cond insns. */
1842 8, /* Memset max inline. */
1843 1, /* Issue rate. */
1844 ARM_PREFETCH_NOT_BENEFICIAL
,
1845 tune_params::PREF_CONST_POOL_FALSE
,
1846 tune_params::PREF_LDRD_FALSE
,
1847 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1848 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1849 tune_params::DISPARAGE_FLAGS_NEITHER
,
1850 tune_params::PREF_NEON_64_FALSE
,
1851 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1852 tune_params::FUSE_NOTHING
,
1853 tune_params::SCHED_AUTOPREF_OFF
1857 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1858 const struct tune_params arm_cortex_tune
=
1861 &generic_extra_costs
,
1862 NULL
, /* Sched adj cost. */
1863 arm_default_branch_cost
,
1864 &arm_default_vec_cost
,
1865 1, /* Constant limit. */
1866 5, /* Max cond insns. */
1867 8, /* Memset max inline. */
1868 2, /* Issue rate. */
1869 ARM_PREFETCH_NOT_BENEFICIAL
,
1870 tune_params::PREF_CONST_POOL_FALSE
,
1871 tune_params::PREF_LDRD_FALSE
,
1872 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1873 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1874 tune_params::DISPARAGE_FLAGS_NEITHER
,
1875 tune_params::PREF_NEON_64_FALSE
,
1876 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1877 tune_params::FUSE_NOTHING
,
1878 tune_params::SCHED_AUTOPREF_OFF
1881 const struct tune_params arm_cortex_a8_tune
=
1884 &cortexa8_extra_costs
,
1885 NULL
, /* Sched adj cost. */
1886 arm_default_branch_cost
,
1887 &arm_default_vec_cost
,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 2, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL
,
1893 tune_params::PREF_CONST_POOL_FALSE
,
1894 tune_params::PREF_LDRD_FALSE
,
1895 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1896 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER
,
1898 tune_params::PREF_NEON_64_FALSE
,
1899 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1900 tune_params::FUSE_NOTHING
,
1901 tune_params::SCHED_AUTOPREF_OFF
1904 const struct tune_params arm_cortex_a7_tune
=
1907 &cortexa7_extra_costs
,
1908 NULL
, /* Sched adj cost. */
1909 arm_default_branch_cost
,
1910 &arm_default_vec_cost
,
1911 1, /* Constant limit. */
1912 5, /* Max cond insns. */
1913 8, /* Memset max inline. */
1914 2, /* Issue rate. */
1915 ARM_PREFETCH_NOT_BENEFICIAL
,
1916 tune_params::PREF_CONST_POOL_FALSE
,
1917 tune_params::PREF_LDRD_FALSE
,
1918 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1919 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1920 tune_params::DISPARAGE_FLAGS_NEITHER
,
1921 tune_params::PREF_NEON_64_FALSE
,
1922 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1923 tune_params::FUSE_NOTHING
,
1924 tune_params::SCHED_AUTOPREF_OFF
1927 const struct tune_params arm_cortex_a15_tune
=
1930 &cortexa15_extra_costs
,
1931 NULL
, /* Sched adj cost. */
1932 arm_default_branch_cost
,
1933 &arm_default_vec_cost
,
1934 1, /* Constant limit. */
1935 2, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 3, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL
,
1939 tune_params::PREF_CONST_POOL_FALSE
,
1940 tune_params::PREF_LDRD_TRUE
,
1941 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1942 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_ALL
,
1944 tune_params::PREF_NEON_64_FALSE
,
1945 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1946 tune_params::FUSE_NOTHING
,
1947 tune_params::SCHED_AUTOPREF_FULL
1950 const struct tune_params arm_cortex_a53_tune
=
1953 &cortexa53_extra_costs
,
1954 NULL
, /* Sched adj cost. */
1955 arm_default_branch_cost
,
1956 &arm_default_vec_cost
,
1957 1, /* Constant limit. */
1958 5, /* Max cond insns. */
1959 8, /* Memset max inline. */
1960 2, /* Issue rate. */
1961 ARM_PREFETCH_NOT_BENEFICIAL
,
1962 tune_params::PREF_CONST_POOL_FALSE
,
1963 tune_params::PREF_LDRD_FALSE
,
1964 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1965 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1966 tune_params::DISPARAGE_FLAGS_NEITHER
,
1967 tune_params::PREF_NEON_64_FALSE
,
1968 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1969 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1970 tune_params::SCHED_AUTOPREF_OFF
1973 const struct tune_params arm_cortex_a57_tune
=
1976 &cortexa57_extra_costs
,
1977 NULL
, /* Sched adj cost. */
1978 arm_default_branch_cost
,
1979 &arm_default_vec_cost
,
1980 1, /* Constant limit. */
1981 2, /* Max cond insns. */
1982 8, /* Memset max inline. */
1983 3, /* Issue rate. */
1984 ARM_PREFETCH_NOT_BENEFICIAL
,
1985 tune_params::PREF_CONST_POOL_FALSE
,
1986 tune_params::PREF_LDRD_TRUE
,
1987 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
1988 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
1989 tune_params::DISPARAGE_FLAGS_ALL
,
1990 tune_params::PREF_NEON_64_FALSE
,
1991 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1992 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1993 tune_params::SCHED_AUTOPREF_FULL
1996 const struct tune_params arm_xgene1_tune
=
1999 &xgene1_extra_costs
,
2000 NULL
, /* Sched adj cost. */
2001 arm_default_branch_cost
,
2002 &arm_default_vec_cost
,
2003 1, /* Constant limit. */
2004 2, /* Max cond insns. */
2005 32, /* Memset max inline. */
2006 4, /* Issue rate. */
2007 ARM_PREFETCH_NOT_BENEFICIAL
,
2008 tune_params::PREF_CONST_POOL_FALSE
,
2009 tune_params::PREF_LDRD_TRUE
,
2010 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
2011 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
2012 tune_params::DISPARAGE_FLAGS_ALL
,
2013 tune_params::PREF_NEON_64_FALSE
,
2014 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2015 tune_params::FUSE_NOTHING
,
2016 tune_params::SCHED_AUTOPREF_OFF
2019 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2020 less appealing. Set max_insns_skipped to a low value. */
2022 const struct tune_params arm_cortex_a5_tune
=
2025 &cortexa5_extra_costs
,
2026 NULL
, /* Sched adj cost. */
2027 arm_cortex_a5_branch_cost
,
2028 &arm_default_vec_cost
,
2029 1, /* Constant limit. */
2030 1, /* Max cond insns. */
2031 8, /* Memset max inline. */
2032 2, /* Issue rate. */
2033 ARM_PREFETCH_NOT_BENEFICIAL
,
2034 tune_params::PREF_CONST_POOL_FALSE
,
2035 tune_params::PREF_LDRD_FALSE
,
2036 tune_params::LOG_OP_NON_SC_FALSE
, /* Thumb. */
2037 tune_params::LOG_OP_NON_SC_FALSE
, /* ARM. */
2038 tune_params::DISPARAGE_FLAGS_NEITHER
,
2039 tune_params::PREF_NEON_64_FALSE
,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2041 tune_params::FUSE_NOTHING
,
2042 tune_params::SCHED_AUTOPREF_OFF
2045 const struct tune_params arm_cortex_a9_tune
=
2048 &cortexa9_extra_costs
,
2049 cortex_a9_sched_adjust_cost
,
2050 arm_default_branch_cost
,
2051 &arm_default_vec_cost
,
2052 1, /* Constant limit. */
2053 5, /* Max cond insns. */
2054 8, /* Memset max inline. */
2055 2, /* Issue rate. */
2056 ARM_PREFETCH_BENEFICIAL(4,32,32),
2057 tune_params::PREF_CONST_POOL_FALSE
,
2058 tune_params::PREF_LDRD_FALSE
,
2059 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
2060 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
2061 tune_params::DISPARAGE_FLAGS_NEITHER
,
2062 tune_params::PREF_NEON_64_FALSE
,
2063 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2064 tune_params::FUSE_NOTHING
,
2065 tune_params::SCHED_AUTOPREF_OFF
2068 const struct tune_params arm_cortex_a12_tune
=
2071 &cortexa12_extra_costs
,
2072 NULL
, /* Sched adj cost. */
2073 arm_default_branch_cost
,
2074 &arm_default_vec_cost
, /* Vectorizer costs. */
2075 1, /* Constant limit. */
2076 2, /* Max cond insns. */
2077 8, /* Memset max inline. */
2078 2, /* Issue rate. */
2079 ARM_PREFETCH_NOT_BENEFICIAL
,
2080 tune_params::PREF_CONST_POOL_FALSE
,
2081 tune_params::PREF_LDRD_TRUE
,
2082 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
2083 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
2084 tune_params::DISPARAGE_FLAGS_ALL
,
2085 tune_params::PREF_NEON_64_FALSE
,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2087 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2088 tune_params::SCHED_AUTOPREF_OFF
2091 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2092 cycle to execute each. An LDR from the constant pool also takes two cycles
2093 to execute, but mildly increases pipelining opportunity (consecutive
2094 loads/stores can be pipelined together, saving one cycle), and may also
2095 improve icache utilisation. Hence we prefer the constant pool for such
2098 const struct tune_params arm_v7m_tune
=
2102 NULL
, /* Sched adj cost. */
2103 arm_cortex_m_branch_cost
,
2104 &arm_default_vec_cost
,
2105 1, /* Constant limit. */
2106 2, /* Max cond insns. */
2107 8, /* Memset max inline. */
2108 1, /* Issue rate. */
2109 ARM_PREFETCH_NOT_BENEFICIAL
,
2110 tune_params::PREF_CONST_POOL_TRUE
,
2111 tune_params::PREF_LDRD_FALSE
,
2112 tune_params::LOG_OP_NON_SC_FALSE
, /* Thumb. */
2113 tune_params::LOG_OP_NON_SC_FALSE
, /* ARM. */
2114 tune_params::DISPARAGE_FLAGS_NEITHER
,
2115 tune_params::PREF_NEON_64_FALSE
,
2116 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2117 tune_params::FUSE_NOTHING
,
2118 tune_params::SCHED_AUTOPREF_OFF
2121 /* Cortex-M7 tuning. */
2123 const struct tune_params arm_cortex_m7_tune
=
2127 NULL
, /* Sched adj cost. */
2128 arm_cortex_m7_branch_cost
,
2129 &arm_default_vec_cost
,
2130 0, /* Constant limit. */
2131 1, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL
,
2135 tune_params::PREF_CONST_POOL_TRUE
,
2136 tune_params::PREF_LDRD_FALSE
,
2137 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
2138 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_NEITHER
,
2140 tune_params::PREF_NEON_64_FALSE
,
2141 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2142 tune_params::FUSE_NOTHING
,
2143 tune_params::SCHED_AUTOPREF_OFF
2146 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2147 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2148 const struct tune_params arm_v6m_tune
=
2151 NULL
, /* Insn extra costs. */
2152 NULL
, /* Sched adj cost. */
2153 arm_default_branch_cost
,
2154 &arm_default_vec_cost
, /* Vectorizer costs. */
2155 1, /* Constant limit. */
2156 5, /* Max cond insns. */
2157 8, /* Memset max inline. */
2158 1, /* Issue rate. */
2159 ARM_PREFETCH_NOT_BENEFICIAL
,
2160 tune_params::PREF_CONST_POOL_FALSE
,
2161 tune_params::PREF_LDRD_FALSE
,
2162 tune_params::LOG_OP_NON_SC_FALSE
, /* Thumb. */
2163 tune_params::LOG_OP_NON_SC_FALSE
, /* ARM. */
2164 tune_params::DISPARAGE_FLAGS_NEITHER
,
2165 tune_params::PREF_NEON_64_FALSE
,
2166 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2167 tune_params::FUSE_NOTHING
,
2168 tune_params::SCHED_AUTOPREF_OFF
2171 const struct tune_params arm_fa726te_tune
=
2174 NULL
, /* Insn extra costs. */
2175 fa726te_sched_adjust_cost
,
2176 arm_default_branch_cost
,
2177 &arm_default_vec_cost
,
2178 1, /* Constant limit. */
2179 5, /* Max cond insns. */
2180 8, /* Memset max inline. */
2181 2, /* Issue rate. */
2182 ARM_PREFETCH_NOT_BENEFICIAL
,
2183 tune_params::PREF_CONST_POOL_TRUE
,
2184 tune_params::PREF_LDRD_FALSE
,
2185 tune_params::LOG_OP_NON_SC_TRUE
, /* Thumb. */
2186 tune_params::LOG_OP_NON_SC_TRUE
, /* ARM. */
2187 tune_params::DISPARAGE_FLAGS_NEITHER
,
2188 tune_params::PREF_NEON_64_FALSE
,
2189 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2190 tune_params::FUSE_NOTHING
,
2191 tune_params::SCHED_AUTOPREF_OFF
2195 /* Not all of these give usefully different compilation alternatives,
2196 but there is no simple way of generalizing them. */
2197 static const struct processors all_cores
[] =
2200 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2201 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2202 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2203 #include "arm-cores.def"
2205 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2208 static const struct processors all_architectures
[] =
2210 /* ARM Architectures */
2211 /* We don't specify tuning costs here as it will be figured out
2214 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2215 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2216 #include "arm-arches.def"
2218 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2222 /* These are populated as commandline arguments are processed, or NULL
2223 if not specified. */
2224 static const struct processors
*arm_selected_arch
;
2225 static const struct processors
*arm_selected_cpu
;
2226 static const struct processors
*arm_selected_tune
;
2228 /* The name of the preprocessor macro to define for this architecture. */
2230 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2232 /* Available values for -mfpu=. */
2234 static const struct arm_fpu_desc all_fpus
[] =
2236 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2237 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2238 #include "arm-fpus.def"
2243 /* Supported TLS relocations. */
2251 TLS_DESCSEQ
/* GNU scheme */
2254 /* The maximum number of insns to be used when loading a constant. */
2256 arm_constant_limit (bool size_p
)
2258 return size_p
? 1 : current_tune
->constant_limit
;
2261 /* Emit an insn that's a simple single-set. Both the operands must be known
2263 inline static rtx_insn
*
2264 emit_set_insn (rtx x
, rtx y
)
2266 return emit_insn (gen_rtx_SET (x
, y
));
2269 /* Return the number of bits set in VALUE. */
2271 bit_count (unsigned long value
)
2273 unsigned long count
= 0;
2278 value
&= value
- 1; /* Clear the least-significant set bit. */
2288 } arm_fixed_mode_set
;
2290 /* A small helper for setting fixed-point library libfuncs. */
2293 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2294 const char *funcname
, const char *modename
,
2299 if (num_suffix
== 0)
2300 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2302 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2304 set_optab_libfunc (optable
, mode
, buffer
);
2308 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2309 machine_mode from
, const char *funcname
,
2310 const char *toname
, const char *fromname
)
2313 const char *maybe_suffix_2
= "";
2315 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2316 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2317 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2318 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2319 maybe_suffix_2
= "2";
2321 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2324 set_conv_libfunc (optable
, to
, from
, buffer
);
2327 /* Set up library functions unique to ARM. */
2330 arm_init_libfuncs (void)
2332 /* For Linux, we have access to kernel support for atomic operations. */
2333 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2334 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2336 /* There are no special library functions unless we are using the
2341 /* The functions below are described in Section 4 of the "Run-Time
2342 ABI for the ARM architecture", Version 1.0. */
2344 /* Double-precision floating-point arithmetic. Table 2. */
2345 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2346 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2347 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2348 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2349 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2351 /* Double-precision comparisons. Table 3. */
2352 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2353 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2354 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2355 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2356 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2357 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2358 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2360 /* Single-precision floating-point arithmetic. Table 4. */
2361 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2362 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2363 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2364 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2365 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2367 /* Single-precision comparisons. Table 5. */
2368 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2369 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2370 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2371 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2372 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2373 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2374 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2376 /* Floating-point to integer conversions. Table 6. */
2377 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2378 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2379 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2380 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2381 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2382 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2383 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2384 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2386 /* Conversions between floating types. Table 7. */
2387 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2388 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2390 /* Integer to floating-point conversions. Table 8. */
2391 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2392 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2393 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2394 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2395 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2396 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2397 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2398 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2400 /* Long long. Table 9. */
2401 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2402 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2403 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2404 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2405 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2406 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2407 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2408 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2410 /* Integer (32/32->32) division. \S 4.3.1. */
2411 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2412 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2414 /* The divmod functions are designed so that they can be used for
2415 plain division, even though they return both the quotient and the
2416 remainder. The quotient is returned in the usual location (i.e.,
2417 r0 for SImode, {r0, r1} for DImode), just as would be expected
2418 for an ordinary division routine. Because the AAPCS calling
2419 conventions specify that all of { r0, r1, r2, r3 } are
2420 callee-saved registers, there is no need to tell the compiler
2421 explicitly that those registers are clobbered by these
2423 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2424 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2426 /* For SImode division the ABI provides div-without-mod routines,
2427 which are faster. */
2428 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2429 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2431 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2432 divmod libcalls instead. */
2433 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2434 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2435 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2436 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2438 /* Half-precision float operations. The compiler handles all operations
2439 with NULL libfuncs by converting the SFmode. */
2440 switch (arm_fp16_format
)
2442 case ARM_FP16_FORMAT_IEEE
:
2443 case ARM_FP16_FORMAT_ALTERNATIVE
:
2446 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2447 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2449 : "__gnu_f2h_alternative"));
2450 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2451 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2453 : "__gnu_h2f_alternative"));
2456 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2457 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2458 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2459 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2460 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2463 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2464 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2465 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2466 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2467 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2468 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2469 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2476 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2478 const arm_fixed_mode_set fixed_arith_modes
[] =
2499 const arm_fixed_mode_set fixed_conv_modes
[] =
2529 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2531 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2532 "add", fixed_arith_modes
[i
].name
, 3);
2533 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2534 "ssadd", fixed_arith_modes
[i
].name
, 3);
2535 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2536 "usadd", fixed_arith_modes
[i
].name
, 3);
2537 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2538 "sub", fixed_arith_modes
[i
].name
, 3);
2539 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2540 "sssub", fixed_arith_modes
[i
].name
, 3);
2541 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2542 "ussub", fixed_arith_modes
[i
].name
, 3);
2543 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2544 "mul", fixed_arith_modes
[i
].name
, 3);
2545 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2546 "ssmul", fixed_arith_modes
[i
].name
, 3);
2547 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2548 "usmul", fixed_arith_modes
[i
].name
, 3);
2549 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2550 "div", fixed_arith_modes
[i
].name
, 3);
2551 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2552 "udiv", fixed_arith_modes
[i
].name
, 3);
2553 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2554 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2555 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2556 "usdiv", fixed_arith_modes
[i
].name
, 3);
2557 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2558 "neg", fixed_arith_modes
[i
].name
, 2);
2559 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2560 "ssneg", fixed_arith_modes
[i
].name
, 2);
2561 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2562 "usneg", fixed_arith_modes
[i
].name
, 2);
2563 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2564 "ashl", fixed_arith_modes
[i
].name
, 3);
2565 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2566 "ashr", fixed_arith_modes
[i
].name
, 3);
2567 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2568 "lshr", fixed_arith_modes
[i
].name
, 3);
2569 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2570 "ssashl", fixed_arith_modes
[i
].name
, 3);
2571 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2572 "usashl", fixed_arith_modes
[i
].name
, 3);
2573 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2574 "cmp", fixed_arith_modes
[i
].name
, 2);
2577 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2578 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2581 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2582 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2585 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2586 fixed_conv_modes
[j
].mode
, "fract",
2587 fixed_conv_modes
[i
].name
,
2588 fixed_conv_modes
[j
].name
);
2589 arm_set_fixed_conv_libfunc (satfract_optab
,
2590 fixed_conv_modes
[i
].mode
,
2591 fixed_conv_modes
[j
].mode
, "satfract",
2592 fixed_conv_modes
[i
].name
,
2593 fixed_conv_modes
[j
].name
);
2594 arm_set_fixed_conv_libfunc (fractuns_optab
,
2595 fixed_conv_modes
[i
].mode
,
2596 fixed_conv_modes
[j
].mode
, "fractuns",
2597 fixed_conv_modes
[i
].name
,
2598 fixed_conv_modes
[j
].name
);
2599 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2600 fixed_conv_modes
[i
].mode
,
2601 fixed_conv_modes
[j
].mode
, "satfractuns",
2602 fixed_conv_modes
[i
].name
,
2603 fixed_conv_modes
[j
].name
);
2607 if (TARGET_AAPCS_BASED
)
2608 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2611 /* On AAPCS systems, this is the "struct __va_list". */
2612 static GTY(()) tree va_list_type
;
2614 /* Return the type to use as __builtin_va_list. */
2616 arm_build_builtin_va_list (void)
2621 if (!TARGET_AAPCS_BASED
)
2622 return std_build_builtin_va_list ();
2624 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2632 The C Library ABI further reinforces this definition in \S
2635 We must follow this definition exactly. The structure tag
2636 name is visible in C++ mangled names, and thus forms a part
2637 of the ABI. The field name may be used by people who
2638 #include <stdarg.h>. */
2639 /* Create the type. */
2640 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2641 /* Give it the required name. */
2642 va_list_name
= build_decl (BUILTINS_LOCATION
,
2644 get_identifier ("__va_list"),
2646 DECL_ARTIFICIAL (va_list_name
) = 1;
2647 TYPE_NAME (va_list_type
) = va_list_name
;
2648 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2649 /* Create the __ap field. */
2650 ap_field
= build_decl (BUILTINS_LOCATION
,
2652 get_identifier ("__ap"),
2654 DECL_ARTIFICIAL (ap_field
) = 1;
2655 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2656 TYPE_FIELDS (va_list_type
) = ap_field
;
2657 /* Compute its layout. */
2658 layout_type (va_list_type
);
2660 return va_list_type
;
2663 /* Return an expression of type "void *" pointing to the next
2664 available argument in a variable-argument list. VALIST is the
2665 user-level va_list object, of type __builtin_va_list. */
2667 arm_extract_valist_ptr (tree valist
)
2669 if (TREE_TYPE (valist
) == error_mark_node
)
2670 return error_mark_node
;
2672 /* On an AAPCS target, the pointer is stored within "struct
2674 if (TARGET_AAPCS_BASED
)
2676 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2677 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2678 valist
, ap_field
, NULL_TREE
);
2684 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2686 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2688 valist
= arm_extract_valist_ptr (valist
);
2689 std_expand_builtin_va_start (valist
, nextarg
);
2692 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2694 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2697 valist
= arm_extract_valist_ptr (valist
);
2698 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2701 /* Check any incompatible options that the user has specified. */
2703 arm_option_check_internal (struct gcc_options
*opts
)
2705 /* Make sure that the processor choice does not conflict with any of the
2706 other command line choices. */
2707 if (TREE_TARGET_ARM (opts
) && !(insn_flags
& FL_NOTM
))
2708 error ("target CPU does not support ARM mode");
2710 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711 from here where no function is being compiled currently. */
2712 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TREE_TARGET_ARM (opts
))
2713 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2715 if (TREE_TARGET_ARM (opts
) && TARGET_CALLEE_INTERWORKING
)
2716 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2718 /* If this target is normally configured to use APCS frames, warn if they
2719 are turned off and debugging is turned on. */
2720 if (TREE_TARGET_ARM (opts
)
2721 && write_symbols
!= NO_DEBUG
2722 && !TARGET_APCS_FRAME
2723 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2724 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2726 /* iWMMXt unsupported under Thumb mode. */
2727 if (TREE_TARGET_THUMB (opts
) && TARGET_IWMMXT
)
2728 error ("iWMMXt unsupported under Thumb mode");
2730 if (TARGET_HARD_TP
&& TREE_TARGET_THUMB1 (opts
))
2731 error ("can not use -mtp=cp15 with 16-bit Thumb");
2733 if (TREE_TARGET_THUMB (opts
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2735 error ("RTP PIC is incompatible with Thumb");
2739 /* We only support -mslow-flash-data on armv7-m targets. */
2740 if (target_slow_flash_data
2741 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2742 || (TREE_TARGET_THUMB1 (opts
) || flag_pic
|| TARGET_NEON
)))
2743 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2746 /* Set params depending on attributes and optimization options. */
2748 arm_option_params_internal (struct gcc_options
*opts
)
2750 /* If we are not using the default (ARM mode) section anchor offset
2751 ranges, then set the correct ranges now. */
2752 if (TREE_TARGET_THUMB1 (opts
))
2754 /* Thumb-1 LDR instructions cannot have negative offsets.
2755 Permissible positive offset ranges are 5-bit (for byte loads),
2756 6-bit (for halfword loads), or 7-bit (for word loads).
2757 Empirical results suggest a 7-bit anchor range gives the best
2758 overall code size. */
2759 targetm
.min_anchor_offset
= 0;
2760 targetm
.max_anchor_offset
= 127;
2762 else if (TREE_TARGET_THUMB2 (opts
))
2764 /* The minimum is set such that the total size of the block
2765 for a particular anchor is 248 + 1 + 4095 bytes, which is
2766 divisible by eight, ensuring natural spacing of anchors. */
2767 targetm
.min_anchor_offset
= -248;
2768 targetm
.max_anchor_offset
= 4095;
2772 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2773 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2778 /* If optimizing for size, bump the number of instructions that we
2779 are prepared to conditionally execute (even on a StrongARM). */
2780 max_insns_skipped
= 6;
2782 /* For THUMB2, we limit the conditional sequence to one IT block. */
2783 if (TREE_TARGET_THUMB2 (opts
))
2784 max_insns_skipped
= opts
->x_arm_restrict_it
? 1 : 4;
2787 max_insns_skipped
= current_tune
->max_insns_skipped
;
2790 /* Reset options between modes that the user has specified. */
2792 arm_option_override_internal (struct gcc_options
*opts
,
2793 struct gcc_options
*opts_set
)
2795 if (TREE_TARGET_THUMB (opts
) && !(insn_flags
& FL_THUMB
))
2797 warning (0, "target CPU does not support THUMB instructions");
2798 opts
->x_target_flags
&= ~MASK_THUMB
;
2801 if (TARGET_APCS_FRAME
&& TREE_TARGET_THUMB (opts
))
2803 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2804 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2807 /* Callee super interworking implies thumb interworking. Adding
2808 this to the flags here simplifies the logic elsewhere. */
2809 if (TREE_TARGET_THUMB (opts
) && TARGET_CALLEE_INTERWORKING
)
2810 opts
->x_target_flags
|= MASK_INTERWORK
;
2812 if (! opts_set
->x_arm_restrict_it
)
2813 opts
->x_arm_restrict_it
= arm_arch8
;
2815 if (!TREE_TARGET_THUMB2 (opts
))
2816 opts
->x_arm_restrict_it
= 0;
2818 if (TREE_TARGET_THUMB1 (opts
))
2820 /* Don't warn since it's on by default in -O2. */
2821 opts
->x_flag_schedule_insns
= 0;
2824 /* Disable shrink-wrap when optimizing function for size, since it tends to
2825 generate additional returns. */
2826 if (optimize_function_for_size_p (cfun
) && TREE_TARGET_THUMB2 (opts
))
2827 opts
->x_flag_shrink_wrap
= false;
2829 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2830 - epilogue_insns - does not accurately model the corresponding insns
2831 emitted in the asm file. In particular, see the comment in thumb_exit
2832 'Find out how many of the (return) argument registers we can corrupt'.
2833 As a consequence, the epilogue may clobber registers without fipa-ra
2834 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2835 TODO: Accurately model clobbers for epilogue_insns and reenable
2837 if (TREE_TARGET_THUMB1 (opts
))
2838 opts
->x_flag_ipa_ra
= 0;
2840 /* Thumb2 inline assembly code should always use unified syntax.
2841 This will apply to ARM and Thumb1 eventually. */
2842 opts
->x_inline_asm_unified
= TREE_TARGET_THUMB2 (opts
);
2845 /* Fix up any incompatible options that the user has specified. */
2847 arm_option_override (void)
2849 arm_selected_arch
= NULL
;
2850 arm_selected_cpu
= NULL
;
2851 arm_selected_tune
= NULL
;
2853 if (global_options_set
.x_arm_arch_option
)
2854 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2856 if (global_options_set
.x_arm_cpu_option
)
2858 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2859 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2862 if (global_options_set
.x_arm_tune_option
)
2863 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2865 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2866 SUBTARGET_OVERRIDE_OPTIONS
;
2869 if (arm_selected_arch
)
2871 if (arm_selected_cpu
)
2873 /* Check for conflict between mcpu and march. */
2874 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2876 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2877 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2878 /* -march wins for code generation.
2879 -mcpu wins for default tuning. */
2880 if (!arm_selected_tune
)
2881 arm_selected_tune
= arm_selected_cpu
;
2883 arm_selected_cpu
= arm_selected_arch
;
2887 arm_selected_arch
= NULL
;
2890 /* Pick a CPU based on the architecture. */
2891 arm_selected_cpu
= arm_selected_arch
;
2894 /* If the user did not specify a processor, choose one for them. */
2895 if (!arm_selected_cpu
)
2897 const struct processors
* sel
;
2898 unsigned int sought
;
2900 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2901 if (!arm_selected_cpu
->name
)
2903 #ifdef SUBTARGET_CPU_DEFAULT
2904 /* Use the subtarget default CPU if none was specified by
2906 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2908 /* Default to ARM6. */
2909 if (!arm_selected_cpu
->name
)
2910 arm_selected_cpu
= &all_cores
[arm6
];
2913 sel
= arm_selected_cpu
;
2914 insn_flags
= sel
->flags
;
2916 /* Now check to see if the user has specified some command line
2917 switch that require certain abilities from the cpu. */
2920 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2922 sought
|= (FL_THUMB
| FL_MODE32
);
2924 /* There are no ARM processors that support both APCS-26 and
2925 interworking. Therefore we force FL_MODE26 to be removed
2926 from insn_flags here (if it was set), so that the search
2927 below will always be able to find a compatible processor. */
2928 insn_flags
&= ~FL_MODE26
;
2931 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2933 /* Try to locate a CPU type that supports all of the abilities
2934 of the default CPU, plus the extra abilities requested by
2936 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2937 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2940 if (sel
->name
== NULL
)
2942 unsigned current_bit_count
= 0;
2943 const struct processors
* best_fit
= NULL
;
2945 /* Ideally we would like to issue an error message here
2946 saying that it was not possible to find a CPU compatible
2947 with the default CPU, but which also supports the command
2948 line options specified by the programmer, and so they
2949 ought to use the -mcpu=<name> command line option to
2950 override the default CPU type.
2952 If we cannot find a cpu that has both the
2953 characteristics of the default cpu and the given
2954 command line options we scan the array again looking
2955 for a best match. */
2956 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2957 if ((sel
->flags
& sought
) == sought
)
2961 count
= bit_count (sel
->flags
& insn_flags
);
2963 if (count
>= current_bit_count
)
2966 current_bit_count
= count
;
2970 gcc_assert (best_fit
);
2974 arm_selected_cpu
= sel
;
2978 gcc_assert (arm_selected_cpu
);
2979 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2980 if (!arm_selected_tune
)
2981 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2983 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2984 insn_flags
= arm_selected_cpu
->flags
;
2985 arm_base_arch
= arm_selected_cpu
->base_arch
;
2987 arm_tune
= arm_selected_tune
->core
;
2988 tune_flags
= arm_selected_tune
->flags
;
2989 current_tune
= arm_selected_tune
->tune
;
2991 /* TBD: Dwarf info for apcs frame is not handled yet. */
2992 if (TARGET_APCS_FRAME
)
2993 flag_shrink_wrap
= false;
2995 /* BPABI targets use linker tricks to allow interworking on cores
2996 without thumb support. */
2997 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2999 warning (0, "target CPU does not support interworking" );
3000 target_flags
&= ~MASK_INTERWORK
;
3003 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3005 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3006 target_flags
|= MASK_APCS_FRAME
;
3009 if (TARGET_POKE_FUNCTION_NAME
)
3010 target_flags
|= MASK_APCS_FRAME
;
3012 if (TARGET_APCS_REENT
&& flag_pic
)
3013 error ("-fpic and -mapcs-reent are incompatible");
3015 if (TARGET_APCS_REENT
)
3016 warning (0, "APCS reentrant code not supported. Ignored");
3018 if (TARGET_APCS_FLOAT
)
3019 warning (0, "passing floating point arguments in fp regs not yet supported");
3021 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3022 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
3023 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
3024 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
3025 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
3026 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
3027 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
3028 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
3029 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
3030 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3031 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
3032 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
3033 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
3034 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
3035 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
3037 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
3038 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
3039 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
3040 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
3041 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
3042 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
3043 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
3044 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
3045 arm_arch_no_volatile_ce
= (insn_flags
& FL_NO_VOLATILE_CE
) != 0;
3046 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3047 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
3048 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
3050 /* V5 code we generate is completely interworking capable, so we turn off
3051 TARGET_INTERWORK here to avoid many tests later on. */
3053 /* XXX However, we must pass the right pre-processor defines to CPP
3054 or GLD can get confused. This is a hack. */
3055 if (TARGET_INTERWORK
)
3056 arm_cpp_interwork
= 1;
3059 target_flags
&= ~MASK_INTERWORK
;
3061 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3062 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3064 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3065 error ("iwmmxt abi requires an iwmmxt capable cpu");
3067 if (!global_options_set
.x_arm_fpu_index
)
3069 const char *target_fpu_name
;
3072 #ifdef FPUTYPE_DEFAULT
3073 target_fpu_name
= FPUTYPE_DEFAULT
;
3075 target_fpu_name
= "vfp";
3078 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3083 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
3085 switch (arm_fpu_desc
->model
)
3087 case ARM_FP_MODEL_VFP
:
3088 arm_fpu_attr
= FPU_VFP
;
3095 if (TARGET_AAPCS_BASED
)
3097 if (TARGET_CALLER_INTERWORKING
)
3098 error ("AAPCS does not support -mcaller-super-interworking");
3100 if (TARGET_CALLEE_INTERWORKING
)
3101 error ("AAPCS does not support -mcallee-super-interworking");
3104 /* iWMMXt and NEON are incompatible. */
3105 if (TARGET_IWMMXT
&& TARGET_NEON
)
3106 error ("iWMMXt and NEON are incompatible");
3108 /* __fp16 support currently assumes the core has ldrh. */
3109 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3110 sorry ("__fp16 and no ldrh");
3112 /* If soft-float is specified then don't use FPU. */
3113 if (TARGET_SOFT_FLOAT
)
3114 arm_fpu_attr
= FPU_NONE
;
3116 if (TARGET_AAPCS_BASED
)
3118 if (arm_abi
== ARM_ABI_IWMMXT
)
3119 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3120 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3121 && TARGET_HARD_FLOAT
3123 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3125 arm_pcs_default
= ARM_PCS_AAPCS
;
3129 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3130 sorry ("-mfloat-abi=hard and VFP");
3132 if (arm_abi
== ARM_ABI_APCS
)
3133 arm_pcs_default
= ARM_PCS_APCS
;
3135 arm_pcs_default
= ARM_PCS_ATPCS
;
3138 /* For arm2/3 there is no need to do any scheduling if we are doing
3139 software floating-point. */
3140 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
3141 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3143 /* Use the cp15 method if it is available. */
3144 if (target_thread_pointer
== TP_AUTO
)
3146 if (arm_arch6k
&& !TARGET_THUMB1
)
3147 target_thread_pointer
= TP_CP15
;
3149 target_thread_pointer
= TP_SOFT
;
3152 /* Override the default structure alignment for AAPCS ABI. */
3153 if (!global_options_set
.x_arm_structure_size_boundary
)
3155 if (TARGET_AAPCS_BASED
)
3156 arm_structure_size_boundary
= 8;
3160 if (arm_structure_size_boundary
!= 8
3161 && arm_structure_size_boundary
!= 32
3162 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3164 if (ARM_DOUBLEWORD_ALIGN
)
3166 "structure size boundary can only be set to 8, 32 or 64");
3168 warning (0, "structure size boundary can only be set to 8 or 32");
3169 arm_structure_size_boundary
3170 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3174 /* If stack checking is disabled, we can use r10 as the PIC register,
3175 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3176 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3178 if (TARGET_VXWORKS_RTP
)
3179 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3180 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3183 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3184 arm_pic_register
= 9;
3186 if (arm_pic_register_string
!= NULL
)
3188 int pic_register
= decode_reg_name (arm_pic_register_string
);
3191 warning (0, "-mpic-register= is useless without -fpic");
3193 /* Prevent the user from choosing an obviously stupid PIC register. */
3194 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3195 || pic_register
== HARD_FRAME_POINTER_REGNUM
3196 || pic_register
== STACK_POINTER_REGNUM
3197 || pic_register
>= PC_REGNUM
3198 || (TARGET_VXWORKS_RTP
3199 && (unsigned int) pic_register
!= arm_pic_register
))
3200 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3202 arm_pic_register
= pic_register
;
3205 if (TARGET_VXWORKS_RTP
3206 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3207 arm_pic_data_is_text_relative
= 0;
3209 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3210 if (fix_cm3_ldrd
== 2)
3212 if (arm_selected_cpu
->core
== cortexm3
)
3218 /* Enable -munaligned-access by default for
3219 - all ARMv6 architecture-based processors
3220 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3221 - ARMv8 architecture-base processors.
3223 Disable -munaligned-access by default for
3224 - all pre-ARMv6 architecture-based processors
3225 - ARMv6-M architecture-based processors. */
3227 if (unaligned_access
== 2)
3229 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3230 unaligned_access
= 1;
3232 unaligned_access
= 0;
3234 else if (unaligned_access
== 1
3235 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3237 warning (0, "target CPU does not support unaligned accesses");
3238 unaligned_access
= 0;
3241 /* Hot/Cold partitioning is not currently supported, since we can't
3242 handle literal pool placement in that case. */
3243 if (flag_reorder_blocks_and_partition
)
3245 inform (input_location
,
3246 "-freorder-blocks-and-partition not supported on this architecture");
3247 flag_reorder_blocks_and_partition
= 0;
3248 flag_reorder_blocks
= 1;
3252 /* Hoisting PIC address calculations more aggressively provides a small,
3253 but measurable, size reduction for PIC code. Therefore, we decrease
3254 the bar for unrestricted expression hoisting to the cost of PIC address
3255 calculation, which is 2 instructions. */
3256 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3257 global_options
.x_param_values
,
3258 global_options_set
.x_param_values
);
3260 /* ARM EABI defaults to strict volatile bitfields. */
3261 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3262 && abi_version_at_least(2))
3263 flag_strict_volatile_bitfields
= 1;
3265 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3266 have deemed it beneficial (signified by setting
3267 prefetch.num_slots to 1 or more). */
3268 if (flag_prefetch_loop_arrays
< 0
3271 && current_tune
->prefetch
.num_slots
> 0)
3272 flag_prefetch_loop_arrays
= 1;
3274 /* Set up parameters to be used in prefetching algorithm. Do not
3275 override the defaults unless we are tuning for a core we have
3276 researched values for. */
3277 if (current_tune
->prefetch
.num_slots
> 0)
3278 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3279 current_tune
->prefetch
.num_slots
,
3280 global_options
.x_param_values
,
3281 global_options_set
.x_param_values
);
3282 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3283 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3284 current_tune
->prefetch
.l1_cache_line_size
,
3285 global_options
.x_param_values
,
3286 global_options_set
.x_param_values
);
3287 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3288 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3289 current_tune
->prefetch
.l1_cache_size
,
3290 global_options
.x_param_values
,
3291 global_options_set
.x_param_values
);
3293 /* Use Neon to perform 64-bits operations rather than core
3295 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3296 if (use_neon_for_64bits
== 1)
3297 prefer_neon_for_64bits
= true;
3299 /* Use the alternative scheduling-pressure algorithm by default. */
3300 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3301 global_options
.x_param_values
,
3302 global_options_set
.x_param_values
);
3304 /* Look through ready list and all of queue for instructions
3305 relevant for L2 auto-prefetcher. */
3306 int param_sched_autopref_queue_depth
;
3308 switch (current_tune
->sched_autopref
)
3310 case tune_params::SCHED_AUTOPREF_OFF
:
3311 param_sched_autopref_queue_depth
= -1;
3314 case tune_params::SCHED_AUTOPREF_RANK
:
3315 param_sched_autopref_queue_depth
= 0;
3318 case tune_params::SCHED_AUTOPREF_FULL
:
3319 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3326 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3327 param_sched_autopref_queue_depth
,
3328 global_options
.x_param_values
,
3329 global_options_set
.x_param_values
);
3331 /* Currently, for slow flash data, we just disable literal pools. */
3332 if (target_slow_flash_data
)
3333 arm_disable_literal_pool
= true;
3335 /* Disable scheduling fusion by default if it's not armv7 processor
3336 or doesn't prefer ldrd/strd. */
3337 if (flag_schedule_fusion
== 2
3338 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3339 flag_schedule_fusion
= 0;
3341 arm_option_override_internal (&global_options
, &global_options_set
);
3342 arm_option_check_internal (&global_options
);
3343 arm_option_params_internal (&global_options
);
3345 /* Register global variables with the garbage collector. */
3346 arm_add_gc_roots ();
3350 arm_add_gc_roots (void)
3352 gcc_obstack_init(&minipool_obstack
);
3353 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3356 /* A table of known ARM exception types.
3357 For use with the interrupt function attribute. */
3361 const char *const arg
;
3362 const unsigned long return_value
;
3366 static const isr_attribute_arg isr_attribute_args
[] =
3368 { "IRQ", ARM_FT_ISR
},
3369 { "irq", ARM_FT_ISR
},
3370 { "FIQ", ARM_FT_FIQ
},
3371 { "fiq", ARM_FT_FIQ
},
3372 { "ABORT", ARM_FT_ISR
},
3373 { "abort", ARM_FT_ISR
},
3374 { "ABORT", ARM_FT_ISR
},
3375 { "abort", ARM_FT_ISR
},
3376 { "UNDEF", ARM_FT_EXCEPTION
},
3377 { "undef", ARM_FT_EXCEPTION
},
3378 { "SWI", ARM_FT_EXCEPTION
},
3379 { "swi", ARM_FT_EXCEPTION
},
3380 { NULL
, ARM_FT_NORMAL
}
3383 /* Returns the (interrupt) function type of the current
3384 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3386 static unsigned long
3387 arm_isr_value (tree argument
)
3389 const isr_attribute_arg
* ptr
;
3393 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3395 /* No argument - default to IRQ. */
3396 if (argument
== NULL_TREE
)
3399 /* Get the value of the argument. */
3400 if (TREE_VALUE (argument
) == NULL_TREE
3401 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3402 return ARM_FT_UNKNOWN
;
3404 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3406 /* Check it against the list of known arguments. */
3407 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3408 if (streq (arg
, ptr
->arg
))
3409 return ptr
->return_value
;
3411 /* An unrecognized interrupt type. */
3412 return ARM_FT_UNKNOWN
;
3415 /* Computes the type of the current function. */
3417 static unsigned long
3418 arm_compute_func_type (void)
3420 unsigned long type
= ARM_FT_UNKNOWN
;
3424 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3426 /* Decide if the current function is volatile. Such functions
3427 never return, and many memory cycles can be saved by not storing
3428 register values that will never be needed again. This optimization
3429 was added to speed up context switching in a kernel application. */
3431 && (TREE_NOTHROW (current_function_decl
)
3432 || !(flag_unwind_tables
3434 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3435 && TREE_THIS_VOLATILE (current_function_decl
))
3436 type
|= ARM_FT_VOLATILE
;
3438 if (cfun
->static_chain_decl
!= NULL
)
3439 type
|= ARM_FT_NESTED
;
3441 attr
= DECL_ATTRIBUTES (current_function_decl
);
3443 a
= lookup_attribute ("naked", attr
);
3445 type
|= ARM_FT_NAKED
;
3447 a
= lookup_attribute ("isr", attr
);
3449 a
= lookup_attribute ("interrupt", attr
);
3452 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3454 type
|= arm_isr_value (TREE_VALUE (a
));
3459 /* Returns the type of the current function. */
3462 arm_current_func_type (void)
3464 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3465 cfun
->machine
->func_type
= arm_compute_func_type ();
3467 return cfun
->machine
->func_type
;
3471 arm_allocate_stack_slots_for_args (void)
3473 /* Naked functions should not allocate stack slots for arguments. */
3474 return !IS_NAKED (arm_current_func_type ());
3478 arm_warn_func_return (tree decl
)
3480 /* Naked functions are implemented entirely in assembly, including the
3481 return sequence, so suppress warnings about this. */
3482 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3486 /* Output assembler code for a block containing the constant parts
3487 of a trampoline, leaving space for the variable parts.
3489 On the ARM, (if r8 is the static chain regnum, and remembering that
3490 referencing pc adds an offset of 8) the trampoline looks like:
3493 .word static chain value
3494 .word function's address
3495 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3498 arm_asm_trampoline_template (FILE *f
)
3502 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3503 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3505 else if (TARGET_THUMB2
)
3507 /* The Thumb-2 trampoline is similar to the arm implementation.
3508 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3509 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3510 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3511 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3515 ASM_OUTPUT_ALIGN (f
, 2);
3516 fprintf (f
, "\t.code\t16\n");
3517 fprintf (f
, ".Ltrampoline_start:\n");
3518 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3519 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3520 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3521 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3522 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3523 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3525 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3526 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3529 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3532 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3534 rtx fnaddr
, mem
, a_tramp
;
3536 emit_block_move (m_tramp
, assemble_trampoline_template (),
3537 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3539 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3540 emit_move_insn (mem
, chain_value
);
3542 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3543 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3544 emit_move_insn (mem
, fnaddr
);
3546 a_tramp
= XEXP (m_tramp
, 0);
3547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3548 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3549 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3552 /* Thumb trampolines should be entered in thumb mode, so set
3553 the bottom bit of the address. */
3556 arm_trampoline_adjust_address (rtx addr
)
3559 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3560 NULL
, 0, OPTAB_LIB_WIDEN
);
3564 /* Return 1 if it is possible to return using a single instruction.
3565 If SIBLING is non-null, this is a test for a return before a sibling
3566 call. SIBLING is the call insn, so we can examine its register usage. */
3569 use_return_insn (int iscond
, rtx sibling
)
3572 unsigned int func_type
;
3573 unsigned long saved_int_regs
;
3574 unsigned HOST_WIDE_INT stack_adjust
;
3575 arm_stack_offsets
*offsets
;
3577 /* Never use a return instruction before reload has run. */
3578 if (!reload_completed
)
3581 func_type
= arm_current_func_type ();
3583 /* Naked, volatile and stack alignment functions need special
3585 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3588 /* So do interrupt functions that use the frame pointer and Thumb
3589 interrupt functions. */
3590 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3593 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3594 && !optimize_function_for_size_p (cfun
))
3597 offsets
= arm_get_frame_offsets ();
3598 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3600 /* As do variadic functions. */
3601 if (crtl
->args
.pretend_args_size
3602 || cfun
->machine
->uses_anonymous_args
3603 /* Or if the function calls __builtin_eh_return () */
3604 || crtl
->calls_eh_return
3605 /* Or if the function calls alloca */
3606 || cfun
->calls_alloca
3607 /* Or if there is a stack adjustment. However, if the stack pointer
3608 is saved on the stack, we can use a pre-incrementing stack load. */
3609 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3610 && stack_adjust
== 4)))
3613 saved_int_regs
= offsets
->saved_regs_mask
;
3615 /* Unfortunately, the insn
3617 ldmib sp, {..., sp, ...}
3619 triggers a bug on most SA-110 based devices, such that the stack
3620 pointer won't be correctly restored if the instruction takes a
3621 page fault. We work around this problem by popping r3 along with
3622 the other registers, since that is never slower than executing
3623 another instruction.
3625 We test for !arm_arch5 here, because code for any architecture
3626 less than this could potentially be run on one of the buggy
3628 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3630 /* Validate that r3 is a call-clobbered register (always true in
3631 the default abi) ... */
3632 if (!call_used_regs
[3])
3635 /* ... that it isn't being used for a return value ... */
3636 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3639 /* ... or for a tail-call argument ... */
3642 gcc_assert (CALL_P (sibling
));
3644 if (find_regno_fusage (sibling
, USE
, 3))
3648 /* ... and that there are no call-saved registers in r0-r2
3649 (always true in the default ABI). */
3650 if (saved_int_regs
& 0x7)
3654 /* Can't be done if interworking with Thumb, and any registers have been
3656 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3659 /* On StrongARM, conditional returns are expensive if they aren't
3660 taken and multiple registers have been stacked. */
3661 if (iscond
&& arm_tune_strongarm
)
3663 /* Conditional return when just the LR is stored is a simple
3664 conditional-load instruction, that's not expensive. */
3665 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3669 && arm_pic_register
!= INVALID_REGNUM
3670 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3674 /* If there are saved registers but the LR isn't saved, then we need
3675 two instructions for the return. */
3676 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3679 /* Can't be done if any of the VFP regs are pushed,
3680 since this also requires an insn. */
3681 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3682 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3683 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3686 if (TARGET_REALLY_IWMMXT
)
3687 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3688 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3694 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3695 shrink-wrapping if possible. This is the case if we need to emit a
3696 prologue, which we can test by looking at the offsets. */
3698 use_simple_return_p (void)
3700 arm_stack_offsets
*offsets
;
3702 offsets
= arm_get_frame_offsets ();
3703 return offsets
->outgoing_args
!= 0;
3706 /* Return TRUE if int I is a valid immediate ARM constant. */
3709 const_ok_for_arm (HOST_WIDE_INT i
)
3713 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3714 be all zero, or all one. */
3715 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3716 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3717 != ((~(unsigned HOST_WIDE_INT
) 0)
3718 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3721 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3723 /* Fast return for 0 and small values. We must do this for zero, since
3724 the code below can't handle that one case. */
3725 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3728 /* Get the number of trailing zeros. */
3729 lowbit
= ffs((int) i
) - 1;
3731 /* Only even shifts are allowed in ARM mode so round down to the
3732 nearest even number. */
3736 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3741 /* Allow rotated constants in ARM mode. */
3743 && ((i
& ~0xc000003f) == 0
3744 || (i
& ~0xf000000f) == 0
3745 || (i
& ~0xfc000003) == 0))
3752 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3755 if (i
== v
|| i
== (v
| (v
<< 8)))
3758 /* Allow repeated pattern 0xXY00XY00. */
3768 /* Return true if I is a valid constant for the operation CODE. */
3770 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3772 if (const_ok_for_arm (i
))
3778 /* See if we can use movw. */
3779 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3782 /* Otherwise, try mvn. */
3783 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3786 /* See if we can use addw or subw. */
3788 && ((i
& 0xfffff000) == 0
3789 || ((-i
) & 0xfffff000) == 0))
3791 /* else fall through. */
3811 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3813 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3819 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3823 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3830 /* Return true if I is a valid di mode constant for the operation CODE. */
3832 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3834 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3835 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3836 rtx hi
= GEN_INT (hi_val
);
3837 rtx lo
= GEN_INT (lo_val
);
3847 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3848 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3850 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3857 /* Emit a sequence of insns to handle a large constant.
3858 CODE is the code of the operation required, it can be any of SET, PLUS,
3859 IOR, AND, XOR, MINUS;
3860 MODE is the mode in which the operation is being performed;
3861 VAL is the integer to operate on;
3862 SOURCE is the other operand (a register, or a null-pointer for SET);
3863 SUBTARGETS means it is safe to create scratch registers if that will
3864 either produce a simpler sequence, or we will want to cse the values.
3865 Return value is the number of insns emitted. */
3867 /* ??? Tweak this for thumb2. */
3869 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3870 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3874 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3875 cond
= COND_EXEC_TEST (PATTERN (insn
));
3879 if (subtargets
|| code
== SET
3880 || (REG_P (target
) && REG_P (source
)
3881 && REGNO (target
) != REGNO (source
)))
3883 /* After arm_reorg has been called, we can't fix up expensive
3884 constants by pushing them into memory so we must synthesize
3885 them in-line, regardless of the cost. This is only likely to
3886 be more costly on chips that have load delay slots and we are
3887 compiling without running the scheduler (so no splitting
3888 occurred before the final instruction emission).
3890 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3892 if (!cfun
->machine
->after_arm_reorg
3894 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3896 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3901 /* Currently SET is the only monadic value for CODE, all
3902 the rest are diadic. */
3903 if (TARGET_USE_MOVT
)
3904 arm_emit_movpair (target
, GEN_INT (val
));
3906 emit_set_insn (target
, GEN_INT (val
));
3912 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3914 if (TARGET_USE_MOVT
)
3915 arm_emit_movpair (temp
, GEN_INT (val
));
3917 emit_set_insn (temp
, GEN_INT (val
));
3919 /* For MINUS, the value is subtracted from, since we never
3920 have subtraction of a constant. */
3922 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3924 emit_set_insn (target
,
3925 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3931 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3935 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3936 ARM/THUMB2 immediates, and add up to VAL.
3937 Thr function return value gives the number of insns required. */
3939 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3940 struct four_ints
*return_sequence
)
3942 int best_consecutive_zeros
= 0;
3946 struct four_ints tmp_sequence
;
3948 /* If we aren't targeting ARM, the best place to start is always at
3949 the bottom, otherwise look more closely. */
3952 for (i
= 0; i
< 32; i
+= 2)
3954 int consecutive_zeros
= 0;
3956 if (!(val
& (3 << i
)))
3958 while ((i
< 32) && !(val
& (3 << i
)))
3960 consecutive_zeros
+= 2;
3963 if (consecutive_zeros
> best_consecutive_zeros
)
3965 best_consecutive_zeros
= consecutive_zeros
;
3966 best_start
= i
- consecutive_zeros
;
3973 /* So long as it won't require any more insns to do so, it's
3974 desirable to emit a small constant (in bits 0...9) in the last
3975 insn. This way there is more chance that it can be combined with
3976 a later addressing insn to form a pre-indexed load or store
3977 operation. Consider:
3979 *((volatile int *)0xe0000100) = 1;
3980 *((volatile int *)0xe0000110) = 2;
3982 We want this to wind up as:
3986 str rB, [rA, #0x100]
3988 str rB, [rA, #0x110]
3990 rather than having to synthesize both large constants from scratch.
3992 Therefore, we calculate how many insns would be required to emit
3993 the constant starting from `best_start', and also starting from
3994 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3995 yield a shorter sequence, we may as well use zero. */
3996 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3998 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
4000 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4001 if (insns2
<= insns1
)
4003 *return_sequence
= tmp_sequence
;
4011 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4013 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4014 struct four_ints
*return_sequence
, int i
)
4016 int remainder
= val
& 0xffffffff;
4019 /* Try and find a way of doing the job in either two or three
4022 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4023 location. We start at position I. This may be the MSB, or
4024 optimial_immediate_sequence may have positioned it at the largest block
4025 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4026 wrapping around to the top of the word when we drop off the bottom.
4027 In the worst case this code should produce no more than four insns.
4029 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4030 constants, shifted to any arbitrary location. We should always start
4035 unsigned int b1
, b2
, b3
, b4
;
4036 unsigned HOST_WIDE_INT result
;
4039 gcc_assert (insns
< 4);
4044 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4045 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4048 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4049 /* We can use addw/subw for the last 12 bits. */
4053 /* Use an 8-bit shifted/rotated immediate. */
4057 result
= remainder
& ((0x0ff << end
)
4058 | ((i
< end
) ? (0xff >> (32 - end
))
4065 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4066 arbitrary shifts. */
4067 i
-= TARGET_ARM
? 2 : 1;
4071 /* Next, see if we can do a better job with a thumb2 replicated
4074 We do it this way around to catch the cases like 0x01F001E0 where
4075 two 8-bit immediates would work, but a replicated constant would
4078 TODO: 16-bit constants that don't clear all the bits, but still win.
4079 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4082 b1
= (remainder
& 0xff000000) >> 24;
4083 b2
= (remainder
& 0x00ff0000) >> 16;
4084 b3
= (remainder
& 0x0000ff00) >> 8;
4085 b4
= remainder
& 0xff;
4089 /* The 8-bit immediate already found clears b1 (and maybe b2),
4090 but must leave b3 and b4 alone. */
4092 /* First try to find a 32-bit replicated constant that clears
4093 almost everything. We can assume that we can't do it in one,
4094 or else we wouldn't be here. */
4095 unsigned int tmp
= b1
& b2
& b3
& b4
;
4096 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4098 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4099 + (tmp
== b3
) + (tmp
== b4
);
4101 && (matching_bytes
>= 3
4102 || (matching_bytes
== 2
4103 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4105 /* At least 3 of the bytes match, and the fourth has at
4106 least as many bits set, or two of the bytes match
4107 and it will only require one more insn to finish. */
4115 /* Second, try to find a 16-bit replicated constant that can
4116 leave three of the bytes clear. If b2 or b4 is already
4117 zero, then we can. If the 8-bit from above would not
4118 clear b2 anyway, then we still win. */
4119 else if (b1
== b3
&& (!b2
|| !b4
4120 || (remainder
& 0x00ff0000 & ~result
)))
4122 result
= remainder
& 0xff00ff00;
4128 /* The 8-bit immediate already found clears b2 (and maybe b3)
4129 and we don't get here unless b1 is alredy clear, but it will
4130 leave b4 unchanged. */
4132 /* If we can clear b2 and b4 at once, then we win, since the
4133 8-bits couldn't possibly reach that far. */
4136 result
= remainder
& 0x00ff00ff;
4142 return_sequence
->i
[insns
++] = result
;
4143 remainder
&= ~result
;
4145 if (code
== SET
|| code
== MINUS
)
4153 /* Emit an instruction with the indicated PATTERN. If COND is
4154 non-NULL, conditionalize the execution of the instruction on COND
4158 emit_constant_insn (rtx cond
, rtx pattern
)
4161 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4162 emit_insn (pattern
);
4165 /* As above, but extra parameter GENERATE which, if clear, suppresses
4169 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4170 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4175 int final_invert
= 0;
4177 int set_sign_bit_copies
= 0;
4178 int clear_sign_bit_copies
= 0;
4179 int clear_zero_bit_copies
= 0;
4180 int set_zero_bit_copies
= 0;
4181 int insns
= 0, neg_insns
, inv_insns
;
4182 unsigned HOST_WIDE_INT temp1
, temp2
;
4183 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4184 struct four_ints
*immediates
;
4185 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4187 /* Find out which operations are safe for a given CODE. Also do a quick
4188 check for degenerate cases; these can occur when DImode operations
4201 if (remainder
== 0xffffffff)
4204 emit_constant_insn (cond
,
4205 gen_rtx_SET (target
,
4206 GEN_INT (ARM_SIGN_EXTEND (val
))));
4212 if (reload_completed
&& rtx_equal_p (target
, source
))
4216 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4225 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4228 if (remainder
== 0xffffffff)
4230 if (reload_completed
&& rtx_equal_p (target
, source
))
4233 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4242 if (reload_completed
&& rtx_equal_p (target
, source
))
4245 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4249 if (remainder
== 0xffffffff)
4252 emit_constant_insn (cond
,
4253 gen_rtx_SET (target
,
4254 gen_rtx_NOT (mode
, source
)));
4261 /* We treat MINUS as (val - source), since (source - val) is always
4262 passed as (source + (-val)). */
4266 emit_constant_insn (cond
,
4267 gen_rtx_SET (target
,
4268 gen_rtx_NEG (mode
, source
)));
4271 if (const_ok_for_arm (val
))
4274 emit_constant_insn (cond
,
4275 gen_rtx_SET (target
,
4276 gen_rtx_MINUS (mode
, GEN_INT (val
),
4287 /* If we can do it in one insn get out quickly. */
4288 if (const_ok_for_op (val
, code
))
4291 emit_constant_insn (cond
,
4292 gen_rtx_SET (target
,
4294 ? gen_rtx_fmt_ee (code
, mode
, source
,
4300 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4302 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4303 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4307 if (mode
== SImode
&& i
== 16)
4308 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4310 emit_constant_insn (cond
,
4311 gen_zero_extendhisi2
4312 (target
, gen_lowpart (HImode
, source
)));
4314 /* Extz only supports SImode, but we can coerce the operands
4316 emit_constant_insn (cond
,
4317 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4318 gen_lowpart (SImode
, source
),
4319 GEN_INT (i
), const0_rtx
));
4325 /* Calculate a few attributes that may be useful for specific
4327 /* Count number of leading zeros. */
4328 for (i
= 31; i
>= 0; i
--)
4330 if ((remainder
& (1 << i
)) == 0)
4331 clear_sign_bit_copies
++;
4336 /* Count number of leading 1's. */
4337 for (i
= 31; i
>= 0; i
--)
4339 if ((remainder
& (1 << i
)) != 0)
4340 set_sign_bit_copies
++;
4345 /* Count number of trailing zero's. */
4346 for (i
= 0; i
<= 31; i
++)
4348 if ((remainder
& (1 << i
)) == 0)
4349 clear_zero_bit_copies
++;
4354 /* Count number of trailing 1's. */
4355 for (i
= 0; i
<= 31; i
++)
4357 if ((remainder
& (1 << i
)) != 0)
4358 set_zero_bit_copies
++;
4366 /* See if we can do this by sign_extending a constant that is known
4367 to be negative. This is a good, way of doing it, since the shift
4368 may well merge into a subsequent insn. */
4369 if (set_sign_bit_copies
> 1)
4371 if (const_ok_for_arm
4372 (temp1
= ARM_SIGN_EXTEND (remainder
4373 << (set_sign_bit_copies
- 1))))
4377 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4378 emit_constant_insn (cond
,
4379 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4380 emit_constant_insn (cond
,
4381 gen_ashrsi3 (target
, new_src
,
4382 GEN_INT (set_sign_bit_copies
- 1)));
4386 /* For an inverted constant, we will need to set the low bits,
4387 these will be shifted out of harm's way. */
4388 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4389 if (const_ok_for_arm (~temp1
))
4393 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4394 emit_constant_insn (cond
,
4395 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4396 emit_constant_insn (cond
,
4397 gen_ashrsi3 (target
, new_src
,
4398 GEN_INT (set_sign_bit_copies
- 1)));
4404 /* See if we can calculate the value as the difference between two
4405 valid immediates. */
4406 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4408 int topshift
= clear_sign_bit_copies
& ~1;
4410 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4411 & (0xff000000 >> topshift
));
4413 /* If temp1 is zero, then that means the 9 most significant
4414 bits of remainder were 1 and we've caused it to overflow.
4415 When topshift is 0 we don't need to do anything since we
4416 can borrow from 'bit 32'. */
4417 if (temp1
== 0 && topshift
!= 0)
4418 temp1
= 0x80000000 >> (topshift
- 1);
4420 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4422 if (const_ok_for_arm (temp2
))
4426 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4427 emit_constant_insn (cond
,
4428 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4429 emit_constant_insn (cond
,
4430 gen_addsi3 (target
, new_src
,
4438 /* See if we can generate this by setting the bottom (or the top)
4439 16 bits, and then shifting these into the other half of the
4440 word. We only look for the simplest cases, to do more would cost
4441 too much. Be careful, however, not to generate this when the
4442 alternative would take fewer insns. */
4443 if (val
& 0xffff0000)
4445 temp1
= remainder
& 0xffff0000;
4446 temp2
= remainder
& 0x0000ffff;
4448 /* Overlaps outside this range are best done using other methods. */
4449 for (i
= 9; i
< 24; i
++)
4451 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4452 && !const_ok_for_arm (temp2
))
4454 rtx new_src
= (subtargets
4455 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4457 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4458 source
, subtargets
, generate
);
4466 gen_rtx_ASHIFT (mode
, source
,
4473 /* Don't duplicate cases already considered. */
4474 for (i
= 17; i
< 24; i
++)
4476 if (((temp1
| (temp1
>> i
)) == remainder
)
4477 && !const_ok_for_arm (temp1
))
4479 rtx new_src
= (subtargets
4480 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4482 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4483 source
, subtargets
, generate
);
4488 gen_rtx_SET (target
,
4491 gen_rtx_LSHIFTRT (mode
, source
,
4502 /* If we have IOR or XOR, and the constant can be loaded in a
4503 single instruction, and we can find a temporary to put it in,
4504 then this can be done in two instructions instead of 3-4. */
4506 /* TARGET can't be NULL if SUBTARGETS is 0 */
4507 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4509 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4513 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4515 emit_constant_insn (cond
,
4516 gen_rtx_SET (sub
, GEN_INT (val
)));
4517 emit_constant_insn (cond
,
4518 gen_rtx_SET (target
,
4519 gen_rtx_fmt_ee (code
, mode
,
4530 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4531 and the remainder 0s for e.g. 0xfff00000)
4532 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4534 This can be done in 2 instructions by using shifts with mov or mvn.
4539 mvn r0, r0, lsr #12 */
4540 if (set_sign_bit_copies
> 8
4541 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4545 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4546 rtx shift
= GEN_INT (set_sign_bit_copies
);
4552 gen_rtx_ASHIFT (mode
,
4557 gen_rtx_SET (target
,
4559 gen_rtx_LSHIFTRT (mode
, sub
,
4566 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4568 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4570 For eg. r0 = r0 | 0xfff
4575 if (set_zero_bit_copies
> 8
4576 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4580 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4581 rtx shift
= GEN_INT (set_zero_bit_copies
);
4587 gen_rtx_LSHIFTRT (mode
,
4592 gen_rtx_SET (target
,
4594 gen_rtx_ASHIFT (mode
, sub
,
4600 /* This will never be reached for Thumb2 because orn is a valid
4601 instruction. This is for Thumb1 and the ARM 32 bit cases.
4603 x = y | constant (such that ~constant is a valid constant)
4605 x = ~(~y & ~constant).
4607 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4611 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4612 emit_constant_insn (cond
,
4614 gen_rtx_NOT (mode
, source
)));
4617 sub
= gen_reg_rtx (mode
);
4618 emit_constant_insn (cond
,
4620 gen_rtx_AND (mode
, source
,
4622 emit_constant_insn (cond
,
4623 gen_rtx_SET (target
,
4624 gen_rtx_NOT (mode
, sub
)));
4631 /* See if two shifts will do 2 or more insn's worth of work. */
4632 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4634 HOST_WIDE_INT shift_mask
= ((0xffffffff
4635 << (32 - clear_sign_bit_copies
))
4638 if ((remainder
| shift_mask
) != 0xffffffff)
4640 HOST_WIDE_INT new_val
4641 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4645 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4646 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4647 new_src
, source
, subtargets
, 1);
4652 rtx targ
= subtargets
? NULL_RTX
: target
;
4653 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4654 targ
, source
, subtargets
, 0);
4660 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4661 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4663 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4664 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4670 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4672 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4674 if ((remainder
| shift_mask
) != 0xffffffff)
4676 HOST_WIDE_INT new_val
4677 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4680 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4682 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4683 new_src
, source
, subtargets
, 1);
4688 rtx targ
= subtargets
? NULL_RTX
: target
;
4690 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4691 targ
, source
, subtargets
, 0);
4697 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4698 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4700 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4701 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4713 /* Calculate what the instruction sequences would be if we generated it
4714 normally, negated, or inverted. */
4716 /* AND cannot be split into multiple insns, so invert and use BIC. */
4719 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4722 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4727 if (can_invert
|| final_invert
)
4728 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4733 immediates
= &pos_immediates
;
4735 /* Is the negated immediate sequence more efficient? */
4736 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4739 immediates
= &neg_immediates
;
4744 /* Is the inverted immediate sequence more efficient?
4745 We must allow for an extra NOT instruction for XOR operations, although
4746 there is some chance that the final 'mvn' will get optimized later. */
4747 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4750 immediates
= &inv_immediates
;
4758 /* Now output the chosen sequence as instructions. */
4761 for (i
= 0; i
< insns
; i
++)
4763 rtx new_src
, temp1_rtx
;
4765 temp1
= immediates
->i
[i
];
4767 if (code
== SET
|| code
== MINUS
)
4768 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4769 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4770 new_src
= gen_reg_rtx (mode
);
4776 else if (can_negate
)
4779 temp1
= trunc_int_for_mode (temp1
, mode
);
4780 temp1_rtx
= GEN_INT (temp1
);
4784 else if (code
== MINUS
)
4785 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4787 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4789 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4794 can_negate
= can_invert
;
4798 else if (code
== MINUS
)
4806 emit_constant_insn (cond
, gen_rtx_SET (target
,
4807 gen_rtx_NOT (mode
, source
)));
4814 /* Canonicalize a comparison so that we are more likely to recognize it.
4815 This can be done for a few constant compares, where we can make the
4816 immediate value easier to load. */
4819 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4820 bool op0_preserve_value
)
4823 unsigned HOST_WIDE_INT i
, maxval
;
4825 mode
= GET_MODE (*op0
);
4826 if (mode
== VOIDmode
)
4827 mode
= GET_MODE (*op1
);
4829 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4831 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4832 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4833 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4834 for GTU/LEU in Thumb mode. */
4838 if (*code
== GT
|| *code
== LE
4839 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4841 /* Missing comparison. First try to use an available
4843 if (CONST_INT_P (*op1
))
4851 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4853 *op1
= GEN_INT (i
+ 1);
4854 *code
= *code
== GT
? GE
: LT
;
4860 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4861 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4863 *op1
= GEN_INT (i
+ 1);
4864 *code
= *code
== GTU
? GEU
: LTU
;
4873 /* If that did not work, reverse the condition. */
4874 if (!op0_preserve_value
)
4876 std::swap (*op0
, *op1
);
4877 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4883 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4884 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4885 to facilitate possible combining with a cmp into 'ands'. */
4887 && GET_CODE (*op0
) == ZERO_EXTEND
4888 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4889 && GET_MODE (XEXP (*op0
, 0)) == QImode
4890 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4891 && subreg_lowpart_p (XEXP (*op0
, 0))
4892 && *op1
== const0_rtx
)
4893 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4896 /* Comparisons smaller than DImode. Only adjust comparisons against
4897 an out-of-range constant. */
4898 if (!CONST_INT_P (*op1
)
4899 || const_ok_for_arm (INTVAL (*op1
))
4900 || const_ok_for_arm (- INTVAL (*op1
)))
4914 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4916 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4917 *code
= *code
== GT
? GE
: LT
;
4925 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4927 *op1
= GEN_INT (i
- 1);
4928 *code
= *code
== GE
? GT
: LE
;
4935 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4936 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4938 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4939 *code
= *code
== GTU
? GEU
: LTU
;
4947 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4949 *op1
= GEN_INT (i
- 1);
4950 *code
= *code
== GEU
? GTU
: LEU
;
4961 /* Define how to find the value returned by a function. */
4964 arm_function_value(const_tree type
, const_tree func
,
4965 bool outgoing ATTRIBUTE_UNUSED
)
4968 int unsignedp ATTRIBUTE_UNUSED
;
4969 rtx r ATTRIBUTE_UNUSED
;
4971 mode
= TYPE_MODE (type
);
4973 if (TARGET_AAPCS_BASED
)
4974 return aapcs_allocate_return_reg (mode
, type
, func
);
4976 /* Promote integer types. */
4977 if (INTEGRAL_TYPE_P (type
))
4978 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4980 /* Promotes small structs returned in a register to full-word size
4981 for big-endian AAPCS. */
4982 if (arm_return_in_msb (type
))
4984 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4985 if (size
% UNITS_PER_WORD
!= 0)
4987 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4988 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4992 return arm_libcall_value_1 (mode
);
4995 /* libcall hashtable helpers. */
4997 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4999 typedef const rtx_def
*value_type
;
5000 typedef const rtx_def
*compare_type
;
5001 static inline hashval_t
hash (const rtx_def
*);
5002 static inline bool equal (const rtx_def
*, const rtx_def
*);
5003 static inline void remove (rtx_def
*);
5007 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5009 return rtx_equal_p (p1
, p2
);
5013 libcall_hasher::hash (const rtx_def
*p1
)
5015 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5018 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5021 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5023 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5027 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5029 static bool init_done
= false;
5030 static libcall_table_type
*libcall_htab
= NULL
;
5036 libcall_htab
= new libcall_table_type (31);
5037 add_libcall (libcall_htab
,
5038 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5039 add_libcall (libcall_htab
,
5040 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5041 add_libcall (libcall_htab
,
5042 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5043 add_libcall (libcall_htab
,
5044 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5046 add_libcall (libcall_htab
,
5047 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5048 add_libcall (libcall_htab
,
5049 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5050 add_libcall (libcall_htab
,
5051 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5052 add_libcall (libcall_htab
,
5053 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5055 add_libcall (libcall_htab
,
5056 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5057 add_libcall (libcall_htab
,
5058 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5059 add_libcall (libcall_htab
,
5060 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5061 add_libcall (libcall_htab
,
5062 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5063 add_libcall (libcall_htab
,
5064 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5065 add_libcall (libcall_htab
,
5066 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5067 add_libcall (libcall_htab
,
5068 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5069 add_libcall (libcall_htab
,
5070 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5072 /* Values from double-precision helper functions are returned in core
5073 registers if the selected core only supports single-precision
5074 arithmetic, even if we are using the hard-float ABI. The same is
5075 true for single-precision helpers, but we will never be using the
5076 hard-float ABI on a CPU which doesn't support single-precision
5077 operations in hardware. */
5078 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5079 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5080 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5081 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5082 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5083 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5084 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5085 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5086 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5087 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5088 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5089 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5091 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5095 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5099 arm_libcall_value_1 (machine_mode mode
)
5101 if (TARGET_AAPCS_BASED
)
5102 return aapcs_libcall_value (mode
);
5103 else if (TARGET_IWMMXT_ABI
5104 && arm_vector_mode_supported_p (mode
))
5105 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5107 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5110 /* Define how to find the value returned by a library function
5111 assuming the value has mode MODE. */
5114 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5116 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5117 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5119 /* The following libcalls return their result in integer registers,
5120 even though they return a floating point value. */
5121 if (arm_libcall_uses_aapcs_base (libcall
))
5122 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5126 return arm_libcall_value_1 (mode
);
5129 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5132 arm_function_value_regno_p (const unsigned int regno
)
5134 if (regno
== ARG_REGISTER (1)
5136 && TARGET_AAPCS_BASED
5138 && TARGET_HARD_FLOAT
5139 && regno
== FIRST_VFP_REGNUM
)
5140 || (TARGET_IWMMXT_ABI
5141 && regno
== FIRST_IWMMXT_REGNUM
))
5147 /* Determine the amount of memory needed to store the possible return
5148 registers of an untyped call. */
5150 arm_apply_result_size (void)
5156 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5158 if (TARGET_IWMMXT_ABI
)
5165 /* Decide whether TYPE should be returned in memory (true)
5166 or in a register (false). FNTYPE is the type of the function making
5169 arm_return_in_memory (const_tree type
, const_tree fntype
)
5173 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5175 if (TARGET_AAPCS_BASED
)
5177 /* Simple, non-aggregate types (ie not including vectors and
5178 complex) are always returned in a register (or registers).
5179 We don't care about which register here, so we can short-cut
5180 some of the detail. */
5181 if (!AGGREGATE_TYPE_P (type
)
5182 && TREE_CODE (type
) != VECTOR_TYPE
5183 && TREE_CODE (type
) != COMPLEX_TYPE
)
5186 /* Any return value that is no larger than one word can be
5188 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5191 /* Check any available co-processors to see if they accept the
5192 type as a register candidate (VFP, for example, can return
5193 some aggregates in consecutive registers). These aren't
5194 available if the call is variadic. */
5195 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5198 /* Vector values should be returned using ARM registers, not
5199 memory (unless they're over 16 bytes, which will break since
5200 we only have four call-clobbered registers to play with). */
5201 if (TREE_CODE (type
) == VECTOR_TYPE
)
5202 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5204 /* The rest go in memory. */
5208 if (TREE_CODE (type
) == VECTOR_TYPE
)
5209 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5211 if (!AGGREGATE_TYPE_P (type
) &&
5212 (TREE_CODE (type
) != VECTOR_TYPE
))
5213 /* All simple types are returned in registers. */
5216 if (arm_abi
!= ARM_ABI_APCS
)
5218 /* ATPCS and later return aggregate types in memory only if they are
5219 larger than a word (or are variable size). */
5220 return (size
< 0 || size
> UNITS_PER_WORD
);
5223 /* For the arm-wince targets we choose to be compatible with Microsoft's
5224 ARM and Thumb compilers, which always return aggregates in memory. */
5226 /* All structures/unions bigger than one word are returned in memory.
5227 Also catch the case where int_size_in_bytes returns -1. In this case
5228 the aggregate is either huge or of variable size, and in either case
5229 we will want to return it via memory and not in a register. */
5230 if (size
< 0 || size
> UNITS_PER_WORD
)
5233 if (TREE_CODE (type
) == RECORD_TYPE
)
5237 /* For a struct the APCS says that we only return in a register
5238 if the type is 'integer like' and every addressable element
5239 has an offset of zero. For practical purposes this means
5240 that the structure can have at most one non bit-field element
5241 and that this element must be the first one in the structure. */
5243 /* Find the first field, ignoring non FIELD_DECL things which will
5244 have been created by C++. */
5245 for (field
= TYPE_FIELDS (type
);
5246 field
&& TREE_CODE (field
) != FIELD_DECL
;
5247 field
= DECL_CHAIN (field
))
5251 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5253 /* Check that the first field is valid for returning in a register. */
5255 /* ... Floats are not allowed */
5256 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5259 /* ... Aggregates that are not themselves valid for returning in
5260 a register are not allowed. */
5261 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5264 /* Now check the remaining fields, if any. Only bitfields are allowed,
5265 since they are not addressable. */
5266 for (field
= DECL_CHAIN (field
);
5268 field
= DECL_CHAIN (field
))
5270 if (TREE_CODE (field
) != FIELD_DECL
)
5273 if (!DECL_BIT_FIELD_TYPE (field
))
5280 if (TREE_CODE (type
) == UNION_TYPE
)
5284 /* Unions can be returned in registers if every element is
5285 integral, or can be returned in an integer register. */
5286 for (field
= TYPE_FIELDS (type
);
5288 field
= DECL_CHAIN (field
))
5290 if (TREE_CODE (field
) != FIELD_DECL
)
5293 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5296 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5302 #endif /* not ARM_WINCE */
5304 /* Return all other types in memory. */
5308 const struct pcs_attribute_arg
5312 } pcs_attribute_args
[] =
5314 {"aapcs", ARM_PCS_AAPCS
},
5315 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5317 /* We could recognize these, but changes would be needed elsewhere
5318 * to implement them. */
5319 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5320 {"atpcs", ARM_PCS_ATPCS
},
5321 {"apcs", ARM_PCS_APCS
},
5323 {NULL
, ARM_PCS_UNKNOWN
}
5327 arm_pcs_from_attribute (tree attr
)
5329 const struct pcs_attribute_arg
*ptr
;
5332 /* Get the value of the argument. */
5333 if (TREE_VALUE (attr
) == NULL_TREE
5334 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5335 return ARM_PCS_UNKNOWN
;
5337 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5339 /* Check it against the list of known arguments. */
5340 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5341 if (streq (arg
, ptr
->arg
))
5344 /* An unrecognized interrupt type. */
5345 return ARM_PCS_UNKNOWN
;
5348 /* Get the PCS variant to use for this call. TYPE is the function's type
5349 specification, DECL is the specific declartion. DECL may be null if
5350 the call could be indirect or if this is a library call. */
5352 arm_get_pcs_model (const_tree type
, const_tree decl
)
5354 bool user_convention
= false;
5355 enum arm_pcs user_pcs
= arm_pcs_default
;
5360 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5363 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5364 user_convention
= true;
5367 if (TARGET_AAPCS_BASED
)
5369 /* Detect varargs functions. These always use the base rules
5370 (no argument is ever a candidate for a co-processor
5372 bool base_rules
= stdarg_p (type
);
5374 if (user_convention
)
5376 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5377 sorry ("non-AAPCS derived PCS variant");
5378 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5379 error ("variadic functions must use the base AAPCS variant");
5383 return ARM_PCS_AAPCS
;
5384 else if (user_convention
)
5386 else if (decl
&& flag_unit_at_a_time
)
5388 /* Local functions never leak outside this compilation unit,
5389 so we are free to use whatever conventions are
5391 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5392 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5394 return ARM_PCS_AAPCS_LOCAL
;
5397 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5398 sorry ("PCS variant");
5400 /* For everything else we use the target's default. */
5401 return arm_pcs_default
;
5406 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5407 const_tree fntype ATTRIBUTE_UNUSED
,
5408 rtx libcall ATTRIBUTE_UNUSED
,
5409 const_tree fndecl ATTRIBUTE_UNUSED
)
5411 /* Record the unallocated VFP registers. */
5412 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5413 pcum
->aapcs_vfp_reg_alloc
= 0;
5416 /* Walk down the type tree of TYPE counting consecutive base elements.
5417 If *MODEP is VOIDmode, then set it to the first valid floating point
5418 type. If a non-floating point type is found, or if a floating point
5419 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5420 otherwise return the count in the sub-tree. */
5422 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5427 switch (TREE_CODE (type
))
5430 mode
= TYPE_MODE (type
);
5431 if (mode
!= DFmode
&& mode
!= SFmode
)
5434 if (*modep
== VOIDmode
)
5443 mode
= TYPE_MODE (TREE_TYPE (type
));
5444 if (mode
!= DFmode
&& mode
!= SFmode
)
5447 if (*modep
== VOIDmode
)
5456 /* Use V2SImode and V4SImode as representatives of all 64-bit
5457 and 128-bit vector types, whether or not those modes are
5458 supported with the present options. */
5459 size
= int_size_in_bytes (type
);
5472 if (*modep
== VOIDmode
)
5475 /* Vector modes are considered to be opaque: two vectors are
5476 equivalent for the purposes of being homogeneous aggregates
5477 if they are the same size. */
5486 tree index
= TYPE_DOMAIN (type
);
5488 /* Can't handle incomplete types nor sizes that are not
5490 if (!COMPLETE_TYPE_P (type
)
5491 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5494 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5497 || !TYPE_MAX_VALUE (index
)
5498 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5499 || !TYPE_MIN_VALUE (index
)
5500 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5504 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5505 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5507 /* There must be no padding. */
5508 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5520 /* Can't handle incomplete types nor sizes that are not
5522 if (!COMPLETE_TYPE_P (type
)
5523 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5526 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5528 if (TREE_CODE (field
) != FIELD_DECL
)
5531 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5537 /* There must be no padding. */
5538 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5545 case QUAL_UNION_TYPE
:
5547 /* These aren't very interesting except in a degenerate case. */
5552 /* Can't handle incomplete types nor sizes that are not
5554 if (!COMPLETE_TYPE_P (type
)
5555 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5558 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5560 if (TREE_CODE (field
) != FIELD_DECL
)
5563 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5566 count
= count
> sub_count
? count
: sub_count
;
5569 /* There must be no padding. */
5570 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5583 /* Return true if PCS_VARIANT should use VFP registers. */
5585 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5587 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5589 static bool seen_thumb1_vfp
= false;
5591 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5593 sorry ("Thumb-1 hard-float VFP ABI");
5594 /* sorry() is not immediately fatal, so only display this once. */
5595 seen_thumb1_vfp
= true;
5601 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5604 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5605 (TARGET_VFP_DOUBLE
|| !is_double
));
5608 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5609 suitable for passing or returning in VFP registers for the PCS
5610 variant selected. If it is, then *BASE_MODE is updated to contain
5611 a machine mode describing each element of the argument's type and
5612 *COUNT to hold the number of such elements. */
5614 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5615 machine_mode mode
, const_tree type
,
5616 machine_mode
*base_mode
, int *count
)
5618 machine_mode new_mode
= VOIDmode
;
5620 /* If we have the type information, prefer that to working things
5621 out from the mode. */
5624 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5626 if (ag_count
> 0 && ag_count
<= 4)
5631 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5632 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5633 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5638 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5641 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5647 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5650 *base_mode
= new_mode
;
5655 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5656 machine_mode mode
, const_tree type
)
5658 int count ATTRIBUTE_UNUSED
;
5659 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5661 if (!use_vfp_abi (pcs_variant
, false))
5663 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5668 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5671 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5674 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5675 &pcum
->aapcs_vfp_rmode
,
5676 &pcum
->aapcs_vfp_rcount
);
5680 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5681 const_tree type ATTRIBUTE_UNUSED
)
5683 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5684 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5687 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5688 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5690 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5692 || (mode
== TImode
&& ! TARGET_NEON
)
5693 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5696 int rcount
= pcum
->aapcs_vfp_rcount
;
5698 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5702 /* Avoid using unsupported vector modes. */
5703 if (rmode
== V2SImode
)
5705 else if (rmode
== V4SImode
)
5712 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5713 for (i
= 0; i
< rcount
; i
++)
5715 rtx tmp
= gen_rtx_REG (rmode
,
5716 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5717 tmp
= gen_rtx_EXPR_LIST
5719 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5720 XVECEXP (par
, 0, i
) = tmp
;
5723 pcum
->aapcs_reg
= par
;
5726 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5733 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5735 const_tree type ATTRIBUTE_UNUSED
)
5737 if (!use_vfp_abi (pcs_variant
, false))
5740 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5743 machine_mode ag_mode
;
5748 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5753 if (ag_mode
== V2SImode
)
5755 else if (ag_mode
== V4SImode
)
5761 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5762 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5763 for (i
= 0; i
< count
; i
++)
5765 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5766 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5767 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5768 XVECEXP (par
, 0, i
) = tmp
;
5774 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5778 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5779 machine_mode mode ATTRIBUTE_UNUSED
,
5780 const_tree type ATTRIBUTE_UNUSED
)
5782 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5783 pcum
->aapcs_vfp_reg_alloc
= 0;
5787 #define AAPCS_CP(X) \
5789 aapcs_ ## X ## _cum_init, \
5790 aapcs_ ## X ## _is_call_candidate, \
5791 aapcs_ ## X ## _allocate, \
5792 aapcs_ ## X ## _is_return_candidate, \
5793 aapcs_ ## X ## _allocate_return_reg, \
5794 aapcs_ ## X ## _advance \
5797 /* Table of co-processors that can be used to pass arguments in
5798 registers. Idealy no arugment should be a candidate for more than
5799 one co-processor table entry, but the table is processed in order
5800 and stops after the first match. If that entry then fails to put
5801 the argument into a co-processor register, the argument will go on
5805 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5806 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5808 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5809 BLKmode) is a candidate for this co-processor's registers; this
5810 function should ignore any position-dependent state in
5811 CUMULATIVE_ARGS and only use call-type dependent information. */
5812 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5814 /* Return true if the argument does get a co-processor register; it
5815 should set aapcs_reg to an RTX of the register allocated as is
5816 required for a return from FUNCTION_ARG. */
5817 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5819 /* Return true if a result of mode MODE (or type TYPE if MODE is
5820 BLKmode) is can be returned in this co-processor's registers. */
5821 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5823 /* Allocate and return an RTX element to hold the return type of a
5824 call, this routine must not fail and will only be called if
5825 is_return_candidate returned true with the same parameters. */
5826 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5828 /* Finish processing this argument and prepare to start processing
5830 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5831 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5839 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5844 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5845 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5852 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5854 /* We aren't passed a decl, so we can't check that a call is local.
5855 However, it isn't clear that that would be a win anyway, since it
5856 might limit some tail-calling opportunities. */
5857 enum arm_pcs pcs_variant
;
5861 const_tree fndecl
= NULL_TREE
;
5863 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5866 fntype
= TREE_TYPE (fntype
);
5869 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5872 pcs_variant
= arm_pcs_default
;
5874 if (pcs_variant
!= ARM_PCS_AAPCS
)
5878 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5879 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5888 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5891 /* We aren't passed a decl, so we can't check that a call is local.
5892 However, it isn't clear that that would be a win anyway, since it
5893 might limit some tail-calling opportunities. */
5894 enum arm_pcs pcs_variant
;
5895 int unsignedp ATTRIBUTE_UNUSED
;
5899 const_tree fndecl
= NULL_TREE
;
5901 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5904 fntype
= TREE_TYPE (fntype
);
5907 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5910 pcs_variant
= arm_pcs_default
;
5912 /* Promote integer types. */
5913 if (type
&& INTEGRAL_TYPE_P (type
))
5914 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5916 if (pcs_variant
!= ARM_PCS_AAPCS
)
5920 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5921 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5923 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5927 /* Promotes small structs returned in a register to full-word size
5928 for big-endian AAPCS. */
5929 if (type
&& arm_return_in_msb (type
))
5931 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5932 if (size
% UNITS_PER_WORD
!= 0)
5934 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5935 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5939 return gen_rtx_REG (mode
, R0_REGNUM
);
5943 aapcs_libcall_value (machine_mode mode
)
5945 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5946 && GET_MODE_SIZE (mode
) <= 4)
5949 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5952 /* Lay out a function argument using the AAPCS rules. The rule
5953 numbers referred to here are those in the AAPCS. */
5955 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5956 const_tree type
, bool named
)
5961 /* We only need to do this once per argument. */
5962 if (pcum
->aapcs_arg_processed
)
5965 pcum
->aapcs_arg_processed
= true;
5967 /* Special case: if named is false then we are handling an incoming
5968 anonymous argument which is on the stack. */
5972 /* Is this a potential co-processor register candidate? */
5973 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5975 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5976 pcum
->aapcs_cprc_slot
= slot
;
5978 /* We don't have to apply any of the rules from part B of the
5979 preparation phase, these are handled elsewhere in the
5984 /* A Co-processor register candidate goes either in its own
5985 class of registers or on the stack. */
5986 if (!pcum
->aapcs_cprc_failed
[slot
])
5988 /* C1.cp - Try to allocate the argument to co-processor
5990 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5993 /* C2.cp - Put the argument on the stack and note that we
5994 can't assign any more candidates in this slot. We also
5995 need to note that we have allocated stack space, so that
5996 we won't later try to split a non-cprc candidate between
5997 core registers and the stack. */
5998 pcum
->aapcs_cprc_failed
[slot
] = true;
5999 pcum
->can_split
= false;
6002 /* We didn't get a register, so this argument goes on the
6004 gcc_assert (pcum
->can_split
== false);
6009 /* C3 - For double-word aligned arguments, round the NCRN up to the
6010 next even number. */
6011 ncrn
= pcum
->aapcs_ncrn
;
6012 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6015 nregs
= ARM_NUM_REGS2(mode
, type
);
6017 /* Sigh, this test should really assert that nregs > 0, but a GCC
6018 extension allows empty structs and then gives them empty size; it
6019 then allows such a structure to be passed by value. For some of
6020 the code below we have to pretend that such an argument has
6021 non-zero size so that we 'locate' it correctly either in
6022 registers or on the stack. */
6023 gcc_assert (nregs
>= 0);
6025 nregs2
= nregs
? nregs
: 1;
6027 /* C4 - Argument fits entirely in core registers. */
6028 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6030 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6031 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6035 /* C5 - Some core registers left and there are no arguments already
6036 on the stack: split this argument between the remaining core
6037 registers and the stack. */
6038 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6040 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6041 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6042 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6046 /* C6 - NCRN is set to 4. */
6047 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6049 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6053 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6054 for a call to a function whose data type is FNTYPE.
6055 For a library call, FNTYPE is NULL. */
6057 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6059 tree fndecl ATTRIBUTE_UNUSED
)
6061 /* Long call handling. */
6063 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6065 pcum
->pcs_variant
= arm_pcs_default
;
6067 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6069 if (arm_libcall_uses_aapcs_base (libname
))
6070 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6072 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6073 pcum
->aapcs_reg
= NULL_RTX
;
6074 pcum
->aapcs_partial
= 0;
6075 pcum
->aapcs_arg_processed
= false;
6076 pcum
->aapcs_cprc_slot
= -1;
6077 pcum
->can_split
= true;
6079 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6083 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6085 pcum
->aapcs_cprc_failed
[i
] = false;
6086 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6094 /* On the ARM, the offset starts at 0. */
6096 pcum
->iwmmxt_nregs
= 0;
6097 pcum
->can_split
= true;
6099 /* Varargs vectors are treated the same as long long.
6100 named_count avoids having to change the way arm handles 'named' */
6101 pcum
->named_count
= 0;
6104 if (TARGET_REALLY_IWMMXT
&& fntype
)
6108 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6110 fn_arg
= TREE_CHAIN (fn_arg
))
6111 pcum
->named_count
+= 1;
6113 if (! pcum
->named_count
)
6114 pcum
->named_count
= INT_MAX
;
6118 /* Return true if mode/type need doubleword alignment. */
6120 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6122 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
6123 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
6127 /* Determine where to put an argument to a function.
6128 Value is zero to push the argument on the stack,
6129 or a hard register in which to store the argument.
6131 MODE is the argument's machine mode.
6132 TYPE is the data type of the argument (as a tree).
6133 This is null for libcalls where that information may
6135 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6136 the preceding args and about the function being called.
6137 NAMED is nonzero if this argument is a named parameter
6138 (otherwise it is an extra parameter matching an ellipsis).
6140 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6141 other arguments are passed on the stack. If (NAMED == 0) (which happens
6142 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6143 defined), say it is passed in the stack (function_prologue will
6144 indeed make it pass in the stack if necessary). */
6147 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6148 const_tree type
, bool named
)
6150 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6153 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6154 a call insn (op3 of a call_value insn). */
6155 if (mode
== VOIDmode
)
6158 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6160 aapcs_layout_arg (pcum
, mode
, type
, named
);
6161 return pcum
->aapcs_reg
;
6164 /* Varargs vectors are treated the same as long long.
6165 named_count avoids having to change the way arm handles 'named' */
6166 if (TARGET_IWMMXT_ABI
6167 && arm_vector_mode_supported_p (mode
)
6168 && pcum
->named_count
> pcum
->nargs
+ 1)
6170 if (pcum
->iwmmxt_nregs
<= 9)
6171 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6174 pcum
->can_split
= false;
6179 /* Put doubleword aligned quantities in even register pairs. */
6181 && ARM_DOUBLEWORD_ALIGN
6182 && arm_needs_doubleword_align (mode
, type
))
6185 /* Only allow splitting an arg between regs and memory if all preceding
6186 args were allocated to regs. For args passed by reference we only count
6187 the reference pointer. */
6188 if (pcum
->can_split
)
6191 nregs
= ARM_NUM_REGS2 (mode
, type
);
6193 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6196 return gen_rtx_REG (mode
, pcum
->nregs
);
6200 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6202 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6203 ? DOUBLEWORD_ALIGNMENT
6208 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6209 tree type
, bool named
)
6211 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6212 int nregs
= pcum
->nregs
;
6214 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6216 aapcs_layout_arg (pcum
, mode
, type
, named
);
6217 return pcum
->aapcs_partial
;
6220 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6223 if (NUM_ARG_REGS
> nregs
6224 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6226 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6231 /* Update the data in PCUM to advance over an argument
6232 of mode MODE and data type TYPE.
6233 (TYPE is null for libcalls where that information may not be available.) */
6236 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6237 const_tree type
, bool named
)
6239 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6241 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6243 aapcs_layout_arg (pcum
, mode
, type
, named
);
6245 if (pcum
->aapcs_cprc_slot
>= 0)
6247 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6249 pcum
->aapcs_cprc_slot
= -1;
6252 /* Generic stuff. */
6253 pcum
->aapcs_arg_processed
= false;
6254 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6255 pcum
->aapcs_reg
= NULL_RTX
;
6256 pcum
->aapcs_partial
= 0;
6261 if (arm_vector_mode_supported_p (mode
)
6262 && pcum
->named_count
> pcum
->nargs
6263 && TARGET_IWMMXT_ABI
)
6264 pcum
->iwmmxt_nregs
+= 1;
6266 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6270 /* Variable sized types are passed by reference. This is a GCC
6271 extension to the ARM ABI. */
6274 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6275 machine_mode mode ATTRIBUTE_UNUSED
,
6276 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6278 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6281 /* Encode the current state of the #pragma [no_]long_calls. */
6284 OFF
, /* No #pragma [no_]long_calls is in effect. */
6285 LONG
, /* #pragma long_calls is in effect. */
6286 SHORT
/* #pragma no_long_calls is in effect. */
6289 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6292 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6294 arm_pragma_long_calls
= LONG
;
6298 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6300 arm_pragma_long_calls
= SHORT
;
6304 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6306 arm_pragma_long_calls
= OFF
;
6309 /* Handle an attribute requiring a FUNCTION_DECL;
6310 arguments as in struct attribute_spec.handler. */
6312 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6313 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6315 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6317 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6319 *no_add_attrs
= true;
6325 /* Handle an "interrupt" or "isr" attribute;
6326 arguments as in struct attribute_spec.handler. */
6328 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6333 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6335 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6337 *no_add_attrs
= true;
6339 /* FIXME: the argument if any is checked for type attributes;
6340 should it be checked for decl ones? */
6344 if (TREE_CODE (*node
) == FUNCTION_TYPE
6345 || TREE_CODE (*node
) == METHOD_TYPE
)
6347 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6349 warning (OPT_Wattributes
, "%qE attribute ignored",
6351 *no_add_attrs
= true;
6354 else if (TREE_CODE (*node
) == POINTER_TYPE
6355 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6356 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6357 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6359 *node
= build_variant_type_copy (*node
);
6360 TREE_TYPE (*node
) = build_type_attribute_variant
6362 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6363 *no_add_attrs
= true;
6367 /* Possibly pass this attribute on from the type to a decl. */
6368 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6369 | (int) ATTR_FLAG_FUNCTION_NEXT
6370 | (int) ATTR_FLAG_ARRAY_NEXT
))
6372 *no_add_attrs
= true;
6373 return tree_cons (name
, args
, NULL_TREE
);
6377 warning (OPT_Wattributes
, "%qE attribute ignored",
6386 /* Handle a "pcs" attribute; arguments as in struct
6387 attribute_spec.handler. */
6389 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6390 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6392 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6394 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6395 *no_add_attrs
= true;
6400 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6401 /* Handle the "notshared" attribute. This attribute is another way of
6402 requesting hidden visibility. ARM's compiler supports
6403 "__declspec(notshared)"; we support the same thing via an
6407 arm_handle_notshared_attribute (tree
*node
,
6408 tree name ATTRIBUTE_UNUSED
,
6409 tree args ATTRIBUTE_UNUSED
,
6410 int flags ATTRIBUTE_UNUSED
,
6413 tree decl
= TYPE_NAME (*node
);
6417 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6418 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6419 *no_add_attrs
= false;
6425 /* Return 0 if the attributes for two types are incompatible, 1 if they
6426 are compatible, and 2 if they are nearly compatible (which causes a
6427 warning to be generated). */
6429 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6433 /* Check for mismatch of non-default calling convention. */
6434 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6437 /* Check for mismatched call attributes. */
6438 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6439 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6440 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6441 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6443 /* Only bother to check if an attribute is defined. */
6444 if (l1
| l2
| s1
| s2
)
6446 /* If one type has an attribute, the other must have the same attribute. */
6447 if ((l1
!= l2
) || (s1
!= s2
))
6450 /* Disallow mixed attributes. */
6451 if ((l1
& s2
) || (l2
& s1
))
6455 /* Check for mismatched ISR attribute. */
6456 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6458 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6459 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6461 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6468 /* Assigns default attributes to newly defined type. This is used to
6469 set short_call/long_call attributes for function types of
6470 functions defined inside corresponding #pragma scopes. */
6472 arm_set_default_type_attributes (tree type
)
6474 /* Add __attribute__ ((long_call)) to all functions, when
6475 inside #pragma long_calls or __attribute__ ((short_call)),
6476 when inside #pragma no_long_calls. */
6477 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6479 tree type_attr_list
, attr_name
;
6480 type_attr_list
= TYPE_ATTRIBUTES (type
);
6482 if (arm_pragma_long_calls
== LONG
)
6483 attr_name
= get_identifier ("long_call");
6484 else if (arm_pragma_long_calls
== SHORT
)
6485 attr_name
= get_identifier ("short_call");
6489 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6490 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6494 /* Return true if DECL is known to be linked into section SECTION. */
6497 arm_function_in_section_p (tree decl
, section
*section
)
6499 /* We can only be certain about the prevailing symbol definition. */
6500 if (!decl_binds_to_current_def_p (decl
))
6503 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6504 if (!DECL_SECTION_NAME (decl
))
6506 /* Make sure that we will not create a unique section for DECL. */
6507 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6511 return function_section (decl
) == section
;
6514 /* Return nonzero if a 32-bit "long_call" should be generated for
6515 a call from the current function to DECL. We generate a long_call
6518 a. has an __attribute__((long call))
6519 or b. is within the scope of a #pragma long_calls
6520 or c. the -mlong-calls command line switch has been specified
6522 However we do not generate a long call if the function:
6524 d. has an __attribute__ ((short_call))
6525 or e. is inside the scope of a #pragma no_long_calls
6526 or f. is defined in the same section as the current function. */
6529 arm_is_long_call_p (tree decl
)
6534 return TARGET_LONG_CALLS
;
6536 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6537 if (lookup_attribute ("short_call", attrs
))
6540 /* For "f", be conservative, and only cater for cases in which the
6541 whole of the current function is placed in the same section. */
6542 if (!flag_reorder_blocks_and_partition
6543 && TREE_CODE (decl
) == FUNCTION_DECL
6544 && arm_function_in_section_p (decl
, current_function_section ()))
6547 if (lookup_attribute ("long_call", attrs
))
6550 return TARGET_LONG_CALLS
;
6553 /* Return nonzero if it is ok to make a tail-call to DECL. */
6555 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6557 unsigned long func_type
;
6559 if (cfun
->machine
->sibcall_blocked
)
6562 /* Never tailcall something if we are generating code for Thumb-1. */
6566 /* The PIC register is live on entry to VxWorks PLT entries, so we
6567 must make the call before restoring the PIC register. */
6568 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6571 /* If we are interworking and the function is not declared static
6572 then we can't tail-call it unless we know that it exists in this
6573 compilation unit (since it might be a Thumb routine). */
6574 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6575 && !TREE_ASM_WRITTEN (decl
))
6578 func_type
= arm_current_func_type ();
6579 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6580 if (IS_INTERRUPT (func_type
))
6583 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6585 /* Check that the return value locations are the same. For
6586 example that we aren't returning a value from the sibling in
6587 a VFP register but then need to transfer it to a core
6591 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6592 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6594 if (!rtx_equal_p (a
, b
))
6598 /* Never tailcall if function may be called with a misaligned SP. */
6599 if (IS_STACKALIGN (func_type
))
6602 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6603 references should become a NOP. Don't convert such calls into
6605 if (TARGET_AAPCS_BASED
6606 && arm_abi
== ARM_ABI_AAPCS
6608 && DECL_WEAK (decl
))
6611 /* Everything else is ok. */
6616 /* Addressing mode support functions. */
6618 /* Return nonzero if X is a legitimate immediate operand when compiling
6619 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6621 legitimate_pic_operand_p (rtx x
)
6623 if (GET_CODE (x
) == SYMBOL_REF
6624 || (GET_CODE (x
) == CONST
6625 && GET_CODE (XEXP (x
, 0)) == PLUS
6626 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6632 /* Record that the current function needs a PIC register. Initialize
6633 cfun->machine->pic_reg if we have not already done so. */
6636 require_pic_register (void)
6638 /* A lot of the logic here is made obscure by the fact that this
6639 routine gets called as part of the rtx cost estimation process.
6640 We don't want those calls to affect any assumptions about the real
6641 function; and further, we can't call entry_of_function() until we
6642 start the real expansion process. */
6643 if (!crtl
->uses_pic_offset_table
)
6645 gcc_assert (can_create_pseudo_p ());
6646 if (arm_pic_register
!= INVALID_REGNUM
6647 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6649 if (!cfun
->machine
->pic_reg
)
6650 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6652 /* Play games to avoid marking the function as needing pic
6653 if we are being called as part of the cost-estimation
6655 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6656 crtl
->uses_pic_offset_table
= 1;
6660 rtx_insn
*seq
, *insn
;
6662 if (!cfun
->machine
->pic_reg
)
6663 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6665 /* Play games to avoid marking the function as needing pic
6666 if we are being called as part of the cost-estimation
6668 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6670 crtl
->uses_pic_offset_table
= 1;
6673 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6674 && arm_pic_register
> LAST_LO_REGNUM
)
6675 emit_move_insn (cfun
->machine
->pic_reg
,
6676 gen_rtx_REG (Pmode
, arm_pic_register
));
6678 arm_load_pic_register (0UL);
6683 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6685 INSN_LOCATION (insn
) = prologue_location
;
6687 /* We can be called during expansion of PHI nodes, where
6688 we can't yet emit instructions directly in the final
6689 insn stream. Queue the insns on the entry edge, they will
6690 be committed after everything else is expanded. */
6691 insert_insn_on_edge (seq
,
6692 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6699 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6701 if (GET_CODE (orig
) == SYMBOL_REF
6702 || GET_CODE (orig
) == LABEL_REF
)
6708 gcc_assert (can_create_pseudo_p ());
6709 reg
= gen_reg_rtx (Pmode
);
6712 /* VxWorks does not impose a fixed gap between segments; the run-time
6713 gap can be different from the object-file gap. We therefore can't
6714 use GOTOFF unless we are absolutely sure that the symbol is in the
6715 same segment as the GOT. Unfortunately, the flexibility of linker
6716 scripts means that we can't be sure of that in general, so assume
6717 that GOTOFF is never valid on VxWorks. */
6718 if ((GET_CODE (orig
) == LABEL_REF
6719 || (GET_CODE (orig
) == SYMBOL_REF
&&
6720 SYMBOL_REF_LOCAL_P (orig
)))
6722 && arm_pic_data_is_text_relative
)
6723 insn
= arm_pic_static_addr (orig
, reg
);
6729 /* If this function doesn't have a pic register, create one now. */
6730 require_pic_register ();
6732 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6734 /* Make the MEM as close to a constant as possible. */
6735 mem
= SET_SRC (pat
);
6736 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6737 MEM_READONLY_P (mem
) = 1;
6738 MEM_NOTRAP_P (mem
) = 1;
6740 insn
= emit_insn (pat
);
6743 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6745 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6749 else if (GET_CODE (orig
) == CONST
)
6753 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6754 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6757 /* Handle the case where we have: const (UNSPEC_TLS). */
6758 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6759 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6762 /* Handle the case where we have:
6763 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6765 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6766 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6767 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6769 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6775 gcc_assert (can_create_pseudo_p ());
6776 reg
= gen_reg_rtx (Pmode
);
6779 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6781 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6782 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6783 base
== reg
? 0 : reg
);
6785 if (CONST_INT_P (offset
))
6787 /* The base register doesn't really matter, we only want to
6788 test the index for the appropriate mode. */
6789 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6791 gcc_assert (can_create_pseudo_p ());
6792 offset
= force_reg (Pmode
, offset
);
6795 if (CONST_INT_P (offset
))
6796 return plus_constant (Pmode
, base
, INTVAL (offset
));
6799 if (GET_MODE_SIZE (mode
) > 4
6800 && (GET_MODE_CLASS (mode
) == MODE_INT
6801 || TARGET_SOFT_FLOAT
))
6803 emit_insn (gen_addsi3 (reg
, base
, offset
));
6807 return gen_rtx_PLUS (Pmode
, base
, offset
);
6814 /* Find a spare register to use during the prolog of a function. */
6817 thumb_find_work_register (unsigned long pushed_regs_mask
)
6821 /* Check the argument registers first as these are call-used. The
6822 register allocation order means that sometimes r3 might be used
6823 but earlier argument registers might not, so check them all. */
6824 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6825 if (!df_regs_ever_live_p (reg
))
6828 /* Before going on to check the call-saved registers we can try a couple
6829 more ways of deducing that r3 is available. The first is when we are
6830 pushing anonymous arguments onto the stack and we have less than 4
6831 registers worth of fixed arguments(*). In this case r3 will be part of
6832 the variable argument list and so we can be sure that it will be
6833 pushed right at the start of the function. Hence it will be available
6834 for the rest of the prologue.
6835 (*): ie crtl->args.pretend_args_size is greater than 0. */
6836 if (cfun
->machine
->uses_anonymous_args
6837 && crtl
->args
.pretend_args_size
> 0)
6838 return LAST_ARG_REGNUM
;
6840 /* The other case is when we have fixed arguments but less than 4 registers
6841 worth. In this case r3 might be used in the body of the function, but
6842 it is not being used to convey an argument into the function. In theory
6843 we could just check crtl->args.size to see how many bytes are
6844 being passed in argument registers, but it seems that it is unreliable.
6845 Sometimes it will have the value 0 when in fact arguments are being
6846 passed. (See testcase execute/20021111-1.c for an example). So we also
6847 check the args_info.nregs field as well. The problem with this field is
6848 that it makes no allowances for arguments that are passed to the
6849 function but which are not used. Hence we could miss an opportunity
6850 when a function has an unused argument in r3. But it is better to be
6851 safe than to be sorry. */
6852 if (! cfun
->machine
->uses_anonymous_args
6853 && crtl
->args
.size
>= 0
6854 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6855 && (TARGET_AAPCS_BASED
6856 ? crtl
->args
.info
.aapcs_ncrn
< 4
6857 : crtl
->args
.info
.nregs
< 4))
6858 return LAST_ARG_REGNUM
;
6860 /* Otherwise look for a call-saved register that is going to be pushed. */
6861 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6862 if (pushed_regs_mask
& (1 << reg
))
6867 /* Thumb-2 can use high regs. */
6868 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6869 if (pushed_regs_mask
& (1 << reg
))
6872 /* Something went wrong - thumb_compute_save_reg_mask()
6873 should have arranged for a suitable register to be pushed. */
6877 static GTY(()) int pic_labelno
;
6879 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6883 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6885 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6887 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6890 gcc_assert (flag_pic
);
6892 pic_reg
= cfun
->machine
->pic_reg
;
6893 if (TARGET_VXWORKS_RTP
)
6895 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6896 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6897 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6899 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6901 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6902 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6906 /* We use an UNSPEC rather than a LABEL_REF because this label
6907 never appears in the code stream. */
6909 labelno
= GEN_INT (pic_labelno
++);
6910 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6911 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6913 /* On the ARM the PC register contains 'dot + 8' at the time of the
6914 addition, on the Thumb it is 'dot + 4'. */
6915 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6916 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6918 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6922 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6924 else /* TARGET_THUMB1 */
6926 if (arm_pic_register
!= INVALID_REGNUM
6927 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6929 /* We will have pushed the pic register, so we should always be
6930 able to find a work register. */
6931 pic_tmp
= gen_rtx_REG (SImode
,
6932 thumb_find_work_register (saved_regs
));
6933 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6934 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6935 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6937 else if (arm_pic_register
!= INVALID_REGNUM
6938 && arm_pic_register
> LAST_LO_REGNUM
6939 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6941 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6942 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6943 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6946 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6950 /* Need to emit this whether or not we obey regdecls,
6951 since setjmp/longjmp can cause life info to screw up. */
6955 /* Generate code to load the address of a static var when flag_pic is set. */
6957 arm_pic_static_addr (rtx orig
, rtx reg
)
6959 rtx l1
, labelno
, offset_rtx
, insn
;
6961 gcc_assert (flag_pic
);
6963 /* We use an UNSPEC rather than a LABEL_REF because this label
6964 never appears in the code stream. */
6965 labelno
= GEN_INT (pic_labelno
++);
6966 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6967 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6969 /* On the ARM the PC register contains 'dot + 8' at the time of the
6970 addition, on the Thumb it is 'dot + 4'. */
6971 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6972 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6973 UNSPEC_SYMBOL_OFFSET
);
6974 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6976 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6980 /* Return nonzero if X is valid as an ARM state addressing register. */
6982 arm_address_register_rtx_p (rtx x
, int strict_p
)
6992 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6994 return (regno
<= LAST_ARM_REGNUM
6995 || regno
>= FIRST_PSEUDO_REGISTER
6996 || regno
== FRAME_POINTER_REGNUM
6997 || regno
== ARG_POINTER_REGNUM
);
7000 /* Return TRUE if this rtx is the difference of a symbol and a label,
7001 and will reduce to a PC-relative relocation in the object file.
7002 Expressions like this can be left alone when generating PIC, rather
7003 than forced through the GOT. */
7005 pcrel_constant_p (rtx x
)
7007 if (GET_CODE (x
) == MINUS
)
7008 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7013 /* Return true if X will surely end up in an index register after next
7016 will_be_in_index_register (const_rtx x
)
7018 /* arm.md: calculate_pic_address will split this into a register. */
7019 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7022 /* Return nonzero if X is a valid ARM state address operand. */
7024 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7028 enum rtx_code code
= GET_CODE (x
);
7030 if (arm_address_register_rtx_p (x
, strict_p
))
7033 use_ldrd
= (TARGET_LDRD
7035 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7037 if (code
== POST_INC
|| code
== PRE_DEC
7038 || ((code
== PRE_INC
|| code
== POST_DEC
)
7039 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7040 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7042 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7043 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7044 && GET_CODE (XEXP (x
, 1)) == PLUS
7045 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7047 rtx addend
= XEXP (XEXP (x
, 1), 1);
7049 /* Don't allow ldrd post increment by register because it's hard
7050 to fixup invalid register choices. */
7052 && GET_CODE (x
) == POST_MODIFY
7056 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7057 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7060 /* After reload constants split into minipools will have addresses
7061 from a LABEL_REF. */
7062 else if (reload_completed
7063 && (code
== LABEL_REF
7065 && GET_CODE (XEXP (x
, 0)) == PLUS
7066 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7067 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7070 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7073 else if (code
== PLUS
)
7075 rtx xop0
= XEXP (x
, 0);
7076 rtx xop1
= XEXP (x
, 1);
7078 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7079 && ((CONST_INT_P (xop1
)
7080 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7081 || (!strict_p
&& will_be_in_index_register (xop1
))))
7082 || (arm_address_register_rtx_p (xop1
, strict_p
)
7083 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7087 /* Reload currently can't handle MINUS, so disable this for now */
7088 else if (GET_CODE (x
) == MINUS
)
7090 rtx xop0
= XEXP (x
, 0);
7091 rtx xop1
= XEXP (x
, 1);
7093 return (arm_address_register_rtx_p (xop0
, strict_p
)
7094 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7098 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7099 && code
== SYMBOL_REF
7100 && CONSTANT_POOL_ADDRESS_P (x
)
7102 && symbol_mentioned_p (get_pool_constant (x
))
7103 && ! pcrel_constant_p (get_pool_constant (x
))))
7109 /* Return nonzero if X is a valid Thumb-2 address operand. */
7111 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7114 enum rtx_code code
= GET_CODE (x
);
7116 if (arm_address_register_rtx_p (x
, strict_p
))
7119 use_ldrd
= (TARGET_LDRD
7121 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7123 if (code
== POST_INC
|| code
== PRE_DEC
7124 || ((code
== PRE_INC
|| code
== POST_DEC
)
7125 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7126 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7128 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7129 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7130 && GET_CODE (XEXP (x
, 1)) == PLUS
7131 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7133 /* Thumb-2 only has autoincrement by constant. */
7134 rtx addend
= XEXP (XEXP (x
, 1), 1);
7135 HOST_WIDE_INT offset
;
7137 if (!CONST_INT_P (addend
))
7140 offset
= INTVAL(addend
);
7141 if (GET_MODE_SIZE (mode
) <= 4)
7142 return (offset
> -256 && offset
< 256);
7144 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7145 && (offset
& 3) == 0);
7148 /* After reload constants split into minipools will have addresses
7149 from a LABEL_REF. */
7150 else if (reload_completed
7151 && (code
== LABEL_REF
7153 && GET_CODE (XEXP (x
, 0)) == PLUS
7154 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7155 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7158 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7161 else if (code
== PLUS
)
7163 rtx xop0
= XEXP (x
, 0);
7164 rtx xop1
= XEXP (x
, 1);
7166 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7167 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7168 || (!strict_p
&& will_be_in_index_register (xop1
))))
7169 || (arm_address_register_rtx_p (xop1
, strict_p
)
7170 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7173 /* Normally we can assign constant values to target registers without
7174 the help of constant pool. But there are cases we have to use constant
7176 1) assign a label to register.
7177 2) sign-extend a 8bit value to 32bit and then assign to register.
7179 Constant pool access in format:
7180 (set (reg r0) (mem (symbol_ref (".LC0"))))
7181 will cause the use of literal pool (later in function arm_reorg).
7182 So here we mark such format as an invalid format, then the compiler
7183 will adjust it into:
7184 (set (reg r0) (symbol_ref (".LC0")))
7185 (set (reg r0) (mem (reg r0))).
7186 No extra register is required, and (mem (reg r0)) won't cause the use
7187 of literal pools. */
7188 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7189 && CONSTANT_POOL_ADDRESS_P (x
))
7192 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7193 && code
== SYMBOL_REF
7194 && CONSTANT_POOL_ADDRESS_P (x
)
7196 && symbol_mentioned_p (get_pool_constant (x
))
7197 && ! pcrel_constant_p (get_pool_constant (x
))))
7203 /* Return nonzero if INDEX is valid for an address index operand in
7206 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7209 HOST_WIDE_INT range
;
7210 enum rtx_code code
= GET_CODE (index
);
7212 /* Standard coprocessor addressing modes. */
7213 if (TARGET_HARD_FLOAT
7215 && (mode
== SFmode
|| mode
== DFmode
))
7216 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7217 && INTVAL (index
) > -1024
7218 && (INTVAL (index
) & 3) == 0);
7220 /* For quad modes, we restrict the constant offset to be slightly less
7221 than what the instruction format permits. We do this because for
7222 quad mode moves, we will actually decompose them into two separate
7223 double-mode reads or writes. INDEX must therefore be a valid
7224 (double-mode) offset and so should INDEX+8. */
7225 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7226 return (code
== CONST_INT
7227 && INTVAL (index
) < 1016
7228 && INTVAL (index
) > -1024
7229 && (INTVAL (index
) & 3) == 0);
7231 /* We have no such constraint on double mode offsets, so we permit the
7232 full range of the instruction format. */
7233 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7234 return (code
== CONST_INT
7235 && INTVAL (index
) < 1024
7236 && INTVAL (index
) > -1024
7237 && (INTVAL (index
) & 3) == 0);
7239 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7240 return (code
== CONST_INT
7241 && INTVAL (index
) < 1024
7242 && INTVAL (index
) > -1024
7243 && (INTVAL (index
) & 3) == 0);
7245 if (arm_address_register_rtx_p (index
, strict_p
)
7246 && (GET_MODE_SIZE (mode
) <= 4))
7249 if (mode
== DImode
|| mode
== DFmode
)
7251 if (code
== CONST_INT
)
7253 HOST_WIDE_INT val
= INTVAL (index
);
7256 return val
> -256 && val
< 256;
7258 return val
> -4096 && val
< 4092;
7261 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7264 if (GET_MODE_SIZE (mode
) <= 4
7268 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7272 rtx xiop0
= XEXP (index
, 0);
7273 rtx xiop1
= XEXP (index
, 1);
7275 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7276 && power_of_two_operand (xiop1
, SImode
))
7277 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7278 && power_of_two_operand (xiop0
, SImode
)));
7280 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7281 || code
== ASHIFT
|| code
== ROTATERT
)
7283 rtx op
= XEXP (index
, 1);
7285 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7288 && INTVAL (op
) <= 31);
7292 /* For ARM v4 we may be doing a sign-extend operation during the
7298 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7304 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7306 return (code
== CONST_INT
7307 && INTVAL (index
) < range
7308 && INTVAL (index
) > -range
);
7311 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7312 index operand. i.e. 1, 2, 4 or 8. */
7314 thumb2_index_mul_operand (rtx op
)
7318 if (!CONST_INT_P (op
))
7322 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7325 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7327 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7329 enum rtx_code code
= GET_CODE (index
);
7331 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7332 /* Standard coprocessor addressing modes. */
7333 if (TARGET_HARD_FLOAT
7335 && (mode
== SFmode
|| mode
== DFmode
))
7336 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7337 /* Thumb-2 allows only > -256 index range for it's core register
7338 load/stores. Since we allow SF/DF in core registers, we have
7339 to use the intersection between -256~4096 (core) and -1024~1024
7341 && INTVAL (index
) > -256
7342 && (INTVAL (index
) & 3) == 0);
7344 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7346 /* For DImode assume values will usually live in core regs
7347 and only allow LDRD addressing modes. */
7348 if (!TARGET_LDRD
|| mode
!= DImode
)
7349 return (code
== CONST_INT
7350 && INTVAL (index
) < 1024
7351 && INTVAL (index
) > -1024
7352 && (INTVAL (index
) & 3) == 0);
7355 /* For quad modes, we restrict the constant offset to be slightly less
7356 than what the instruction format permits. We do this because for
7357 quad mode moves, we will actually decompose them into two separate
7358 double-mode reads or writes. INDEX must therefore be a valid
7359 (double-mode) offset and so should INDEX+8. */
7360 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7361 return (code
== CONST_INT
7362 && INTVAL (index
) < 1016
7363 && INTVAL (index
) > -1024
7364 && (INTVAL (index
) & 3) == 0);
7366 /* We have no such constraint on double mode offsets, so we permit the
7367 full range of the instruction format. */
7368 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7369 return (code
== CONST_INT
7370 && INTVAL (index
) < 1024
7371 && INTVAL (index
) > -1024
7372 && (INTVAL (index
) & 3) == 0);
7374 if (arm_address_register_rtx_p (index
, strict_p
)
7375 && (GET_MODE_SIZE (mode
) <= 4))
7378 if (mode
== DImode
|| mode
== DFmode
)
7380 if (code
== CONST_INT
)
7382 HOST_WIDE_INT val
= INTVAL (index
);
7383 /* ??? Can we assume ldrd for thumb2? */
7384 /* Thumb-2 ldrd only has reg+const addressing modes. */
7385 /* ldrd supports offsets of +-1020.
7386 However the ldr fallback does not. */
7387 return val
> -256 && val
< 256 && (val
& 3) == 0;
7395 rtx xiop0
= XEXP (index
, 0);
7396 rtx xiop1
= XEXP (index
, 1);
7398 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7399 && thumb2_index_mul_operand (xiop1
))
7400 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7401 && thumb2_index_mul_operand (xiop0
)));
7403 else if (code
== ASHIFT
)
7405 rtx op
= XEXP (index
, 1);
7407 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7410 && INTVAL (op
) <= 3);
7413 return (code
== CONST_INT
7414 && INTVAL (index
) < 4096
7415 && INTVAL (index
) > -256);
7418 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7420 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7430 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7432 return (regno
<= LAST_LO_REGNUM
7433 || regno
> LAST_VIRTUAL_REGISTER
7434 || regno
== FRAME_POINTER_REGNUM
7435 || (GET_MODE_SIZE (mode
) >= 4
7436 && (regno
== STACK_POINTER_REGNUM
7437 || regno
>= FIRST_PSEUDO_REGISTER
7438 || x
== hard_frame_pointer_rtx
7439 || x
== arg_pointer_rtx
)));
7442 /* Return nonzero if x is a legitimate index register. This is the case
7443 for any base register that can access a QImode object. */
7445 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7447 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7450 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7452 The AP may be eliminated to either the SP or the FP, so we use the
7453 least common denominator, e.g. SImode, and offsets from 0 to 64.
7455 ??? Verify whether the above is the right approach.
7457 ??? Also, the FP may be eliminated to the SP, so perhaps that
7458 needs special handling also.
7460 ??? Look at how the mips16 port solves this problem. It probably uses
7461 better ways to solve some of these problems.
7463 Although it is not incorrect, we don't accept QImode and HImode
7464 addresses based on the frame pointer or arg pointer until the
7465 reload pass starts. This is so that eliminating such addresses
7466 into stack based ones won't produce impossible code. */
7468 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7470 /* ??? Not clear if this is right. Experiment. */
7471 if (GET_MODE_SIZE (mode
) < 4
7472 && !(reload_in_progress
|| reload_completed
)
7473 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7474 || reg_mentioned_p (arg_pointer_rtx
, x
)
7475 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7476 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7477 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7478 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7481 /* Accept any base register. SP only in SImode or larger. */
7482 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7485 /* This is PC relative data before arm_reorg runs. */
7486 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7487 && GET_CODE (x
) == SYMBOL_REF
7488 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7491 /* This is PC relative data after arm_reorg runs. */
7492 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7494 && (GET_CODE (x
) == LABEL_REF
7495 || (GET_CODE (x
) == CONST
7496 && GET_CODE (XEXP (x
, 0)) == PLUS
7497 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7498 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7501 /* Post-inc indexing only supported for SImode and larger. */
7502 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7503 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7506 else if (GET_CODE (x
) == PLUS
)
7508 /* REG+REG address can be any two index registers. */
7509 /* We disallow FRAME+REG addressing since we know that FRAME
7510 will be replaced with STACK, and SP relative addressing only
7511 permits SP+OFFSET. */
7512 if (GET_MODE_SIZE (mode
) <= 4
7513 && XEXP (x
, 0) != frame_pointer_rtx
7514 && XEXP (x
, 1) != frame_pointer_rtx
7515 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7516 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7517 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7520 /* REG+const has 5-7 bit offset for non-SP registers. */
7521 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7522 || XEXP (x
, 0) == arg_pointer_rtx
)
7523 && CONST_INT_P (XEXP (x
, 1))
7524 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7527 /* REG+const has 10-bit offset for SP, but only SImode and
7528 larger is supported. */
7529 /* ??? Should probably check for DI/DFmode overflow here
7530 just like GO_IF_LEGITIMATE_OFFSET does. */
7531 else if (REG_P (XEXP (x
, 0))
7532 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7533 && GET_MODE_SIZE (mode
) >= 4
7534 && CONST_INT_P (XEXP (x
, 1))
7535 && INTVAL (XEXP (x
, 1)) >= 0
7536 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7537 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7540 else if (REG_P (XEXP (x
, 0))
7541 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7542 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7543 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7544 && REGNO (XEXP (x
, 0))
7545 <= LAST_VIRTUAL_POINTER_REGISTER
))
7546 && GET_MODE_SIZE (mode
) >= 4
7547 && CONST_INT_P (XEXP (x
, 1))
7548 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7552 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7553 && GET_MODE_SIZE (mode
) == 4
7554 && GET_CODE (x
) == SYMBOL_REF
7555 && CONSTANT_POOL_ADDRESS_P (x
)
7557 && symbol_mentioned_p (get_pool_constant (x
))
7558 && ! pcrel_constant_p (get_pool_constant (x
))))
7564 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7565 instruction of mode MODE. */
7567 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7569 switch (GET_MODE_SIZE (mode
))
7572 return val
>= 0 && val
< 32;
7575 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7579 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7585 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7588 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7589 else if (TARGET_THUMB2
)
7590 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7591 else /* if (TARGET_THUMB1) */
7592 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7595 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7597 Given an rtx X being reloaded into a reg required to be
7598 in class CLASS, return the class of reg to actually use.
7599 In general this is just CLASS, but for the Thumb core registers and
7600 immediate constants we prefer a LO_REGS class or a subset. */
7603 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7609 if (rclass
== GENERAL_REGS
)
7616 /* Build the SYMBOL_REF for __tls_get_addr. */
7618 static GTY(()) rtx tls_get_addr_libfunc
;
7621 get_tls_get_addr (void)
7623 if (!tls_get_addr_libfunc
)
7624 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7625 return tls_get_addr_libfunc
;
7629 arm_load_tp (rtx target
)
7632 target
= gen_reg_rtx (SImode
);
7636 /* Can return in any reg. */
7637 emit_insn (gen_load_tp_hard (target
));
7641 /* Always returned in r0. Immediately copy the result into a pseudo,
7642 otherwise other uses of r0 (e.g. setting up function arguments) may
7643 clobber the value. */
7647 emit_insn (gen_load_tp_soft ());
7649 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7650 emit_move_insn (target
, tmp
);
7656 load_tls_operand (rtx x
, rtx reg
)
7660 if (reg
== NULL_RTX
)
7661 reg
= gen_reg_rtx (SImode
);
7663 tmp
= gen_rtx_CONST (SImode
, x
);
7665 emit_move_insn (reg
, tmp
);
7671 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7673 rtx insns
, label
, labelno
, sum
;
7675 gcc_assert (reloc
!= TLS_DESCSEQ
);
7678 labelno
= GEN_INT (pic_labelno
++);
7679 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7680 label
= gen_rtx_CONST (VOIDmode
, label
);
7682 sum
= gen_rtx_UNSPEC (Pmode
,
7683 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7684 GEN_INT (TARGET_ARM
? 8 : 4)),
7686 reg
= load_tls_operand (sum
, reg
);
7689 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7691 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7693 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7694 LCT_PURE
, /* LCT_CONST? */
7695 Pmode
, 1, reg
, Pmode
);
7697 insns
= get_insns ();
7704 arm_tls_descseq_addr (rtx x
, rtx reg
)
7706 rtx labelno
= GEN_INT (pic_labelno
++);
7707 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7708 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7709 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7710 gen_rtx_CONST (VOIDmode
, label
),
7711 GEN_INT (!TARGET_ARM
)),
7713 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7715 emit_insn (gen_tlscall (x
, labelno
));
7717 reg
= gen_reg_rtx (SImode
);
7719 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7721 emit_move_insn (reg
, reg0
);
7727 legitimize_tls_address (rtx x
, rtx reg
)
7729 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7730 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7734 case TLS_MODEL_GLOBAL_DYNAMIC
:
7735 if (TARGET_GNU2_TLS
)
7737 reg
= arm_tls_descseq_addr (x
, reg
);
7739 tp
= arm_load_tp (NULL_RTX
);
7741 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7745 /* Original scheme */
7746 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7747 dest
= gen_reg_rtx (Pmode
);
7748 emit_libcall_block (insns
, dest
, ret
, x
);
7752 case TLS_MODEL_LOCAL_DYNAMIC
:
7753 if (TARGET_GNU2_TLS
)
7755 reg
= arm_tls_descseq_addr (x
, reg
);
7757 tp
= arm_load_tp (NULL_RTX
);
7759 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7763 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7765 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7766 share the LDM result with other LD model accesses. */
7767 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7769 dest
= gen_reg_rtx (Pmode
);
7770 emit_libcall_block (insns
, dest
, ret
, eqv
);
7772 /* Load the addend. */
7773 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7774 GEN_INT (TLS_LDO32
)),
7776 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7777 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7781 case TLS_MODEL_INITIAL_EXEC
:
7782 labelno
= GEN_INT (pic_labelno
++);
7783 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7784 label
= gen_rtx_CONST (VOIDmode
, label
);
7785 sum
= gen_rtx_UNSPEC (Pmode
,
7786 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7787 GEN_INT (TARGET_ARM
? 8 : 4)),
7789 reg
= load_tls_operand (sum
, reg
);
7792 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7793 else if (TARGET_THUMB2
)
7794 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7797 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7798 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7801 tp
= arm_load_tp (NULL_RTX
);
7803 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7805 case TLS_MODEL_LOCAL_EXEC
:
7806 tp
= arm_load_tp (NULL_RTX
);
7808 reg
= gen_rtx_UNSPEC (Pmode
,
7809 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7811 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7813 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7820 /* Try machine-dependent ways of modifying an illegitimate address
7821 to be legitimate. If we find one, return the new, valid address. */
7823 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7825 if (arm_tls_referenced_p (x
))
7829 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7831 addend
= XEXP (XEXP (x
, 0), 1);
7832 x
= XEXP (XEXP (x
, 0), 0);
7835 if (GET_CODE (x
) != SYMBOL_REF
)
7838 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7840 x
= legitimize_tls_address (x
, NULL_RTX
);
7844 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7853 /* TODO: legitimize_address for Thumb2. */
7856 return thumb_legitimize_address (x
, orig_x
, mode
);
7859 if (GET_CODE (x
) == PLUS
)
7861 rtx xop0
= XEXP (x
, 0);
7862 rtx xop1
= XEXP (x
, 1);
7864 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7865 xop0
= force_reg (SImode
, xop0
);
7867 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7868 && !symbol_mentioned_p (xop1
))
7869 xop1
= force_reg (SImode
, xop1
);
7871 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7872 && CONST_INT_P (xop1
))
7874 HOST_WIDE_INT n
, low_n
;
7878 /* VFP addressing modes actually allow greater offsets, but for
7879 now we just stick with the lowest common denominator. */
7881 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7893 low_n
= ((mode
) == TImode
? 0
7894 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7898 base_reg
= gen_reg_rtx (SImode
);
7899 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7900 emit_move_insn (base_reg
, val
);
7901 x
= plus_constant (Pmode
, base_reg
, low_n
);
7903 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7904 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7907 /* XXX We don't allow MINUS any more -- see comment in
7908 arm_legitimate_address_outer_p (). */
7909 else if (GET_CODE (x
) == MINUS
)
7911 rtx xop0
= XEXP (x
, 0);
7912 rtx xop1
= XEXP (x
, 1);
7914 if (CONSTANT_P (xop0
))
7915 xop0
= force_reg (SImode
, xop0
);
7917 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7918 xop1
= force_reg (SImode
, xop1
);
7920 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7921 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7924 /* Make sure to take full advantage of the pre-indexed addressing mode
7925 with absolute addresses which often allows for the base register to
7926 be factorized for multiple adjacent memory references, and it might
7927 even allows for the mini pool to be avoided entirely. */
7928 else if (CONST_INT_P (x
) && optimize
> 0)
7931 HOST_WIDE_INT mask
, base
, index
;
7934 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7935 use a 8-bit index. So let's use a 12-bit index for SImode only and
7936 hope that arm_gen_constant will enable ldrb to use more bits. */
7937 bits
= (mode
== SImode
) ? 12 : 8;
7938 mask
= (1 << bits
) - 1;
7939 base
= INTVAL (x
) & ~mask
;
7940 index
= INTVAL (x
) & mask
;
7941 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7943 /* It'll most probably be more efficient to generate the base
7944 with more bits set and use a negative index instead. */
7948 base_reg
= force_reg (SImode
, GEN_INT (base
));
7949 x
= plus_constant (Pmode
, base_reg
, index
);
7954 /* We need to find and carefully transform any SYMBOL and LABEL
7955 references; so go back to the original address expression. */
7956 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7958 if (new_x
!= orig_x
)
7966 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7967 to be legitimate. If we find one, return the new, valid address. */
7969 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7971 if (GET_CODE (x
) == PLUS
7972 && CONST_INT_P (XEXP (x
, 1))
7973 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7974 || INTVAL (XEXP (x
, 1)) < 0))
7976 rtx xop0
= XEXP (x
, 0);
7977 rtx xop1
= XEXP (x
, 1);
7978 HOST_WIDE_INT offset
= INTVAL (xop1
);
7980 /* Try and fold the offset into a biasing of the base register and
7981 then offsetting that. Don't do this when optimizing for space
7982 since it can cause too many CSEs. */
7983 if (optimize_size
&& offset
>= 0
7984 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7986 HOST_WIDE_INT delta
;
7989 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7990 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7991 delta
= 31 * GET_MODE_SIZE (mode
);
7993 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7995 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7997 x
= plus_constant (Pmode
, xop0
, delta
);
7999 else if (offset
< 0 && offset
> -256)
8000 /* Small negative offsets are best done with a subtract before the
8001 dereference, forcing these into a register normally takes two
8003 x
= force_operand (x
, NULL_RTX
);
8006 /* For the remaining cases, force the constant into a register. */
8007 xop1
= force_reg (SImode
, xop1
);
8008 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8011 else if (GET_CODE (x
) == PLUS
8012 && s_register_operand (XEXP (x
, 1), SImode
)
8013 && !s_register_operand (XEXP (x
, 0), SImode
))
8015 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8017 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8022 /* We need to find and carefully transform any SYMBOL and LABEL
8023 references; so go back to the original address expression. */
8024 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8026 if (new_x
!= orig_x
)
8033 /* Return TRUE if X contains any TLS symbol references. */
8036 arm_tls_referenced_p (rtx x
)
8038 if (! TARGET_HAVE_TLS
)
8041 subrtx_iterator::array_type array
;
8042 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8044 const_rtx x
= *iter
;
8045 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8048 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8049 TLS offsets, not real symbol references. */
8050 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8051 iter
.skip_subrtxes ();
8056 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8058 On the ARM, allow any integer (invalid ones are removed later by insn
8059 patterns), nice doubles and symbol_refs which refer to the function's
8062 When generating pic allow anything. */
8065 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8067 return flag_pic
|| !label_mentioned_p (x
);
8071 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8073 return (CONST_INT_P (x
)
8074 || CONST_DOUBLE_P (x
)
8075 || CONSTANT_ADDRESS_P (x
)
8080 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8082 return (!arm_cannot_force_const_mem (mode
, x
)
8084 ? arm_legitimate_constant_p_1 (mode
, x
)
8085 : thumb_legitimate_constant_p (mode
, x
)));
8088 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8091 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8095 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8097 split_const (x
, &base
, &offset
);
8098 if (GET_CODE (base
) == SYMBOL_REF
8099 && !offset_within_block_p (base
, INTVAL (offset
)))
8102 return arm_tls_referenced_p (x
);
8105 #define REG_OR_SUBREG_REG(X) \
8107 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8109 #define REG_OR_SUBREG_RTX(X) \
8110 (REG_P (X) ? (X) : SUBREG_REG (X))
8113 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8115 machine_mode mode
= GET_MODE (x
);
8124 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8131 return COSTS_N_INSNS (1);
8134 if (CONST_INT_P (XEXP (x
, 1)))
8137 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8144 return COSTS_N_INSNS (2) + cycles
;
8146 return COSTS_N_INSNS (1) + 16;
8149 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8151 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8152 return (COSTS_N_INSNS (words
)
8153 + 4 * ((MEM_P (SET_SRC (x
)))
8154 + MEM_P (SET_DEST (x
))));
8159 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8161 if (thumb_shiftable_const (INTVAL (x
)))
8162 return COSTS_N_INSNS (2);
8163 return COSTS_N_INSNS (3);
8165 else if ((outer
== PLUS
|| outer
== COMPARE
)
8166 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8168 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8169 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8170 return COSTS_N_INSNS (1);
8171 else if (outer
== AND
)
8174 /* This duplicates the tests in the andsi3 expander. */
8175 for (i
= 9; i
<= 31; i
++)
8176 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8177 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8178 return COSTS_N_INSNS (2);
8180 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8181 || outer
== LSHIFTRT
)
8183 return COSTS_N_INSNS (2);
8189 return COSTS_N_INSNS (3);
8207 /* XXX another guess. */
8208 /* Memory costs quite a lot for the first word, but subsequent words
8209 load at the equivalent of a single insn each. */
8210 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8211 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8216 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8222 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8223 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8229 return total
+ COSTS_N_INSNS (1);
8231 /* Assume a two-shift sequence. Increase the cost slightly so
8232 we prefer actual shifts over an extend operation. */
8233 return total
+ 1 + COSTS_N_INSNS (2);
8241 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8243 machine_mode mode
= GET_MODE (x
);
8244 enum rtx_code subcode
;
8246 enum rtx_code code
= GET_CODE (x
);
8252 /* Memory costs quite a lot for the first word, but subsequent words
8253 load at the equivalent of a single insn each. */
8254 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8261 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8262 *total
= COSTS_N_INSNS (2);
8263 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8264 *total
= COSTS_N_INSNS (4);
8266 *total
= COSTS_N_INSNS (20);
8270 if (REG_P (XEXP (x
, 1)))
8271 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8272 else if (!CONST_INT_P (XEXP (x
, 1)))
8273 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8279 *total
+= COSTS_N_INSNS (4);
8284 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8285 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8288 *total
+= COSTS_N_INSNS (3);
8292 *total
+= COSTS_N_INSNS (1);
8293 /* Increase the cost of complex shifts because they aren't any faster,
8294 and reduce dual issue opportunities. */
8295 if (arm_tune_cortex_a9
8296 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8304 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8305 if (CONST_INT_P (XEXP (x
, 0))
8306 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8308 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8312 if (CONST_INT_P (XEXP (x
, 1))
8313 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8315 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8322 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8324 if (TARGET_HARD_FLOAT
8326 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8328 *total
= COSTS_N_INSNS (1);
8329 if (CONST_DOUBLE_P (XEXP (x
, 0))
8330 && arm_const_double_rtx (XEXP (x
, 0)))
8332 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8336 if (CONST_DOUBLE_P (XEXP (x
, 1))
8337 && arm_const_double_rtx (XEXP (x
, 1)))
8339 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8345 *total
= COSTS_N_INSNS (20);
8349 *total
= COSTS_N_INSNS (1);
8350 if (CONST_INT_P (XEXP (x
, 0))
8351 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8353 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8357 subcode
= GET_CODE (XEXP (x
, 1));
8358 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8359 || subcode
== LSHIFTRT
8360 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8362 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8363 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8367 /* A shift as a part of RSB costs no more than RSB itself. */
8368 if (GET_CODE (XEXP (x
, 0)) == MULT
8369 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8371 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8372 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8377 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8379 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8380 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8384 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8385 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8387 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8388 if (REG_P (XEXP (XEXP (x
, 1), 0))
8389 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8390 *total
+= COSTS_N_INSNS (1);
8398 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8399 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8400 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8402 *total
= COSTS_N_INSNS (1);
8403 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8405 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8409 /* MLA: All arguments must be registers. We filter out
8410 multiplication by a power of two, so that we fall down into
8412 if (GET_CODE (XEXP (x
, 0)) == MULT
8413 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8415 /* The cost comes from the cost of the multiply. */
8419 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8421 if (TARGET_HARD_FLOAT
8423 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8425 *total
= COSTS_N_INSNS (1);
8426 if (CONST_DOUBLE_P (XEXP (x
, 1))
8427 && arm_const_double_rtx (XEXP (x
, 1)))
8429 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8436 *total
= COSTS_N_INSNS (20);
8440 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8441 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8443 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8444 if (REG_P (XEXP (XEXP (x
, 0), 0))
8445 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8446 *total
+= COSTS_N_INSNS (1);
8452 case AND
: case XOR
: case IOR
:
8454 /* Normally the frame registers will be spilt into reg+const during
8455 reload, so it is a bad idea to combine them with other instructions,
8456 since then they might not be moved outside of loops. As a compromise
8457 we allow integration with ops that have a constant as their second
8459 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8460 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8461 && !CONST_INT_P (XEXP (x
, 1)))
8462 *total
= COSTS_N_INSNS (1);
8466 *total
+= COSTS_N_INSNS (2);
8467 if (CONST_INT_P (XEXP (x
, 1))
8468 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8470 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8477 *total
+= COSTS_N_INSNS (1);
8478 if (CONST_INT_P (XEXP (x
, 1))
8479 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8481 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8484 subcode
= GET_CODE (XEXP (x
, 0));
8485 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8486 || subcode
== LSHIFTRT
8487 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8489 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8490 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8495 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8497 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8498 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8502 if (subcode
== UMIN
|| subcode
== UMAX
8503 || subcode
== SMIN
|| subcode
== SMAX
)
8505 *total
= COSTS_N_INSNS (3);
8512 /* This should have been handled by the CPU specific routines. */
8516 if (arm_arch3m
&& mode
== SImode
8517 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8518 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8519 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8520 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8521 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8522 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8524 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8527 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8531 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8533 if (TARGET_HARD_FLOAT
8535 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8537 *total
= COSTS_N_INSNS (1);
8540 *total
= COSTS_N_INSNS (2);
8546 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8547 if (mode
== SImode
&& code
== NOT
)
8549 subcode
= GET_CODE (XEXP (x
, 0));
8550 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8551 || subcode
== LSHIFTRT
8552 || subcode
== ROTATE
|| subcode
== ROTATERT
8554 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8556 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8557 /* Register shifts cost an extra cycle. */
8558 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8559 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8568 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8570 *total
= COSTS_N_INSNS (4);
8574 operand
= XEXP (x
, 0);
8576 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8577 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8578 && REG_P (XEXP (operand
, 0))
8579 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8580 *total
+= COSTS_N_INSNS (1);
8581 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8582 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8586 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8588 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8594 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8595 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8597 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8603 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8604 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8606 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8626 /* SCC insns. In the case where the comparison has already been
8627 performed, then they cost 2 instructions. Otherwise they need
8628 an additional comparison before them. */
8629 *total
= COSTS_N_INSNS (2);
8630 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8637 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8643 *total
+= COSTS_N_INSNS (1);
8644 if (CONST_INT_P (XEXP (x
, 1))
8645 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8647 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8651 subcode
= GET_CODE (XEXP (x
, 0));
8652 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8653 || subcode
== LSHIFTRT
8654 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8656 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8657 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8662 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8664 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8665 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8675 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8676 if (!CONST_INT_P (XEXP (x
, 1))
8677 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8678 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8682 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8684 if (TARGET_HARD_FLOAT
8686 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8688 *total
= COSTS_N_INSNS (1);
8691 *total
= COSTS_N_INSNS (20);
8694 *total
= COSTS_N_INSNS (1);
8696 *total
+= COSTS_N_INSNS (3);
8702 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8704 rtx op
= XEXP (x
, 0);
8705 machine_mode opmode
= GET_MODE (op
);
8708 *total
+= COSTS_N_INSNS (1);
8710 if (opmode
!= SImode
)
8714 /* If !arm_arch4, we use one of the extendhisi2_mem
8715 or movhi_bytes patterns for HImode. For a QImode
8716 sign extension, we first zero-extend from memory
8717 and then perform a shift sequence. */
8718 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8719 *total
+= COSTS_N_INSNS (2);
8722 *total
+= COSTS_N_INSNS (1);
8724 /* We don't have the necessary insn, so we need to perform some
8726 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8727 /* An and with constant 255. */
8728 *total
+= COSTS_N_INSNS (1);
8730 /* A shift sequence. Increase costs slightly to avoid
8731 combining two shifts into an extend operation. */
8732 *total
+= COSTS_N_INSNS (2) + 1;
8738 switch (GET_MODE (XEXP (x
, 0)))
8745 *total
= COSTS_N_INSNS (1);
8755 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8759 if (const_ok_for_arm (INTVAL (x
))
8760 || const_ok_for_arm (~INTVAL (x
)))
8761 *total
= COSTS_N_INSNS (1);
8763 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8764 INTVAL (x
), NULL_RTX
,
8771 *total
= COSTS_N_INSNS (3);
8775 *total
= COSTS_N_INSNS (1);
8779 *total
= COSTS_N_INSNS (1);
8780 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8784 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8785 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8786 *total
= COSTS_N_INSNS (1);
8788 *total
= COSTS_N_INSNS (4);
8792 /* The vec_extract patterns accept memory operands that require an
8793 address reload. Account for the cost of that reload to give the
8794 auto-inc-dec pass an incentive to try to replace them. */
8795 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8796 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8798 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8799 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8800 *total
+= COSTS_N_INSNS (1);
8803 /* Likewise for the vec_set patterns. */
8804 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8805 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8806 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8808 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8809 *total
= rtx_cost (mem
, code
, 0, speed
);
8810 if (!neon_vector_mem_operand (mem
, 2, true))
8811 *total
+= COSTS_N_INSNS (1);
8817 /* We cost this as high as our memory costs to allow this to
8818 be hoisted from loops. */
8819 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8821 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8827 && TARGET_HARD_FLOAT
8829 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8830 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8831 *total
= COSTS_N_INSNS (1);
8833 *total
= COSTS_N_INSNS (4);
8837 *total
= COSTS_N_INSNS (4);
8842 /* Estimates the size cost of thumb1 instructions.
8843 For now most of the code is copied from thumb1_rtx_costs. We need more
8844 fine grain tuning when we have more related test cases. */
8846 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8848 machine_mode mode
= GET_MODE (x
);
8857 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8861 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8862 defined by RTL expansion, especially for the expansion of
8864 if ((GET_CODE (XEXP (x
, 0)) == MULT
8865 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8866 || (GET_CODE (XEXP (x
, 1)) == MULT
8867 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8868 return COSTS_N_INSNS (2);
8869 /* On purpose fall through for normal RTX. */
8873 return COSTS_N_INSNS (1);
8876 if (CONST_INT_P (XEXP (x
, 1)))
8878 /* Thumb1 mul instruction can't operate on const. We must Load it
8879 into a register first. */
8880 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8881 /* For the targets which have a very small and high-latency multiply
8882 unit, we prefer to synthesize the mult with up to 5 instructions,
8883 giving a good balance between size and performance. */
8884 if (arm_arch6m
&& arm_m_profile_small_mul
)
8885 return COSTS_N_INSNS (5);
8887 return COSTS_N_INSNS (1) + const_size
;
8889 return COSTS_N_INSNS (1);
8892 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8894 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8895 return COSTS_N_INSNS (words
)
8896 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8897 || satisfies_constraint_K (SET_SRC (x
))
8898 /* thumb1_movdi_insn. */
8899 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8904 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8905 return COSTS_N_INSNS (1);
8906 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8907 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8908 return COSTS_N_INSNS (2);
8909 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8910 if (thumb_shiftable_const (INTVAL (x
)))
8911 return COSTS_N_INSNS (2);
8912 return COSTS_N_INSNS (3);
8914 else if ((outer
== PLUS
|| outer
== COMPARE
)
8915 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8917 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8918 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8919 return COSTS_N_INSNS (1);
8920 else if (outer
== AND
)
8923 /* This duplicates the tests in the andsi3 expander. */
8924 for (i
= 9; i
<= 31; i
++)
8925 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8926 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8927 return COSTS_N_INSNS (2);
8929 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8930 || outer
== LSHIFTRT
)
8932 return COSTS_N_INSNS (2);
8938 return COSTS_N_INSNS (3);
8952 return COSTS_N_INSNS (1);
8955 return (COSTS_N_INSNS (1)
8957 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8958 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8959 ? COSTS_N_INSNS (1) : 0));
8963 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8968 /* XXX still guessing. */
8969 switch (GET_MODE (XEXP (x
, 0)))
8972 return (1 + (mode
== DImode
? 4 : 0)
8973 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8976 return (4 + (mode
== DImode
? 4 : 0)
8977 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8980 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8991 /* RTX costs when optimizing for size. */
8993 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8996 machine_mode mode
= GET_MODE (x
);
8999 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9003 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9007 /* A memory access costs 1 insn if the mode is small, or the address is
9008 a single register, otherwise it costs one insn per word. */
9009 if (REG_P (XEXP (x
, 0)))
9010 *total
= COSTS_N_INSNS (1);
9012 && GET_CODE (XEXP (x
, 0)) == PLUS
9013 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9014 /* This will be split into two instructions.
9015 See arm.md:calculate_pic_address. */
9016 *total
= COSTS_N_INSNS (2);
9018 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9025 /* Needs a libcall, so it costs about this. */
9026 *total
= COSTS_N_INSNS (2);
9030 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9032 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9040 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9042 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9045 else if (mode
== SImode
)
9047 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9048 /* Slightly disparage register shifts, but not by much. */
9049 if (!CONST_INT_P (XEXP (x
, 1)))
9050 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9054 /* Needs a libcall. */
9055 *total
= COSTS_N_INSNS (2);
9059 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9060 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9062 *total
= COSTS_N_INSNS (1);
9068 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9069 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9071 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9072 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9073 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9074 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9075 || subcode1
== ASHIFTRT
)
9077 /* It's just the cost of the two operands. */
9082 *total
= COSTS_N_INSNS (1);
9086 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9090 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9091 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9093 *total
= COSTS_N_INSNS (1);
9097 /* A shift as a part of ADD costs nothing. */
9098 if (GET_CODE (XEXP (x
, 0)) == MULT
9099 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9101 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9102 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9103 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9108 case AND
: case XOR
: case IOR
:
9111 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9113 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9114 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9115 || (code
== AND
&& subcode
== NOT
))
9117 /* It's just the cost of the two operands. */
9123 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9127 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9131 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9132 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9134 *total
= COSTS_N_INSNS (1);
9140 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9149 if (cc_register (XEXP (x
, 0), VOIDmode
))
9152 *total
= COSTS_N_INSNS (1);
9156 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9157 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9158 *total
= COSTS_N_INSNS (1);
9160 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9165 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9168 if (const_ok_for_arm (INTVAL (x
)))
9169 /* A multiplication by a constant requires another instruction
9170 to load the constant to a register. */
9171 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9173 else if (const_ok_for_arm (~INTVAL (x
)))
9174 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9175 else if (const_ok_for_arm (-INTVAL (x
)))
9177 if (outer_code
== COMPARE
|| outer_code
== PLUS
9178 || outer_code
== MINUS
)
9181 *total
= COSTS_N_INSNS (1);
9184 *total
= COSTS_N_INSNS (2);
9190 *total
= COSTS_N_INSNS (2);
9194 *total
= COSTS_N_INSNS (4);
9199 && TARGET_HARD_FLOAT
9200 && outer_code
== SET
9201 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9202 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9203 *total
= COSTS_N_INSNS (1);
9205 *total
= COSTS_N_INSNS (4);
9210 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9211 cost of these slightly. */
9212 *total
= COSTS_N_INSNS (1) + 1;
9219 if (mode
!= VOIDmode
)
9220 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9222 *total
= COSTS_N_INSNS (4); /* How knows? */
9227 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9228 operand, then return the operand that is being shifted. If the shift
9229 is not by a constant, then set SHIFT_REG to point to the operand.
9230 Return NULL if OP is not a shifter operand. */
9232 shifter_op_p (rtx op
, rtx
*shift_reg
)
9234 enum rtx_code code
= GET_CODE (op
);
9236 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9237 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9238 return XEXP (op
, 0);
9239 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9240 return XEXP (op
, 0);
9241 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9242 || code
== ASHIFTRT
)
9244 if (!CONST_INT_P (XEXP (op
, 1)))
9245 *shift_reg
= XEXP (op
, 1);
9246 return XEXP (op
, 0);
9253 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9255 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9256 rtx_code code
= GET_CODE (x
);
9257 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9259 switch (XINT (x
, 1))
9261 case UNSPEC_UNALIGNED_LOAD
:
9262 /* We can only do unaligned loads into the integer unit, and we can't
9264 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9266 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9267 + extra_cost
->ldst
.load_unaligned
);
9270 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9271 ADDR_SPACE_GENERIC
, speed_p
);
9275 case UNSPEC_UNALIGNED_STORE
:
9276 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9278 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9279 + extra_cost
->ldst
.store_unaligned
);
9281 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9283 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9284 ADDR_SPACE_GENERIC
, speed_p
);
9294 *cost
= COSTS_N_INSNS (1);
9296 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9300 *cost
= COSTS_N_INSNS (2);
9306 /* Cost of a libcall. We assume one insn per argument, an amount for the
9307 call (one insn for -Os) and then one for processing the result. */
9308 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9310 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9313 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9314 if (shift_op != NULL \
9315 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9320 *cost += extra_cost->alu.arith_shift_reg; \
9321 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9324 *cost += extra_cost->alu.arith_shift; \
9326 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9327 + rtx_cost (XEXP (x, 1 - IDX), \
9334 /* RTX costs. Make an estimate of the cost of executing the operation
9335 X, which is contained with an operation with code OUTER_CODE.
9336 SPEED_P indicates whether the cost desired is the performance cost,
9337 or the size cost. The estimate is stored in COST and the return
9338 value is TRUE if the cost calculation is final, or FALSE if the
9339 caller should recurse through the operands of X to add additional
9342 We currently make no attempt to model the size savings of Thumb-2
9343 16-bit instructions. At the normal points in compilation where
9344 this code is called we have no measure of whether the condition
9345 flags are live or not, and thus no realistic way to determine what
9346 the size will eventually be. */
9348 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9349 const struct cpu_cost_table
*extra_cost
,
9350 int *cost
, bool speed_p
)
9352 machine_mode mode
= GET_MODE (x
);
9357 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9359 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9367 /* SET RTXs don't have a mode so we get it from the destination. */
9368 mode
= GET_MODE (SET_DEST (x
));
9370 if (REG_P (SET_SRC (x
))
9371 && REG_P (SET_DEST (x
)))
9373 /* Assume that most copies can be done with a single insn,
9374 unless we don't have HW FP, in which case everything
9375 larger than word mode will require two insns. */
9376 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9377 && GET_MODE_SIZE (mode
) > 4)
9380 /* Conditional register moves can be encoded
9381 in 16 bits in Thumb mode. */
9382 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9388 if (CONST_INT_P (SET_SRC (x
)))
9390 /* Handle CONST_INT here, since the value doesn't have a mode
9391 and we would otherwise be unable to work out the true cost. */
9392 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9394 /* Slightly lower the cost of setting a core reg to a constant.
9395 This helps break up chains and allows for better scheduling. */
9396 if (REG_P (SET_DEST (x
))
9397 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9400 /* Immediate moves with an immediate in the range [0, 255] can be
9401 encoded in 16 bits in Thumb mode. */
9402 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9403 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9405 goto const_int_cost
;
9411 /* A memory access costs 1 insn if the mode is small, or the address is
9412 a single register, otherwise it costs one insn per word. */
9413 if (REG_P (XEXP (x
, 0)))
9414 *cost
= COSTS_N_INSNS (1);
9416 && GET_CODE (XEXP (x
, 0)) == PLUS
9417 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9418 /* This will be split into two instructions.
9419 See arm.md:calculate_pic_address. */
9420 *cost
= COSTS_N_INSNS (2);
9422 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9424 /* For speed optimizations, add the costs of the address and
9425 accessing memory. */
9428 *cost
+= (extra_cost
->ldst
.load
9429 + arm_address_cost (XEXP (x
, 0), mode
,
9430 ADDR_SPACE_GENERIC
, speed_p
));
9432 *cost
+= extra_cost
->ldst
.load
;
9438 /* Calculations of LDM costs are complex. We assume an initial cost
9439 (ldm_1st) which will load the number of registers mentioned in
9440 ldm_regs_per_insn_1st registers; then each additional
9441 ldm_regs_per_insn_subsequent registers cost one more insn. The
9442 formula for N regs is thus:
9444 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9445 + ldm_regs_per_insn_subsequent - 1)
9446 / ldm_regs_per_insn_subsequent).
9448 Additional costs may also be added for addressing. A similar
9449 formula is used for STM. */
9451 bool is_ldm
= load_multiple_operation (x
, SImode
);
9452 bool is_stm
= store_multiple_operation (x
, SImode
);
9454 *cost
= COSTS_N_INSNS (1);
9456 if (is_ldm
|| is_stm
)
9460 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9461 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9462 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9463 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9464 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9465 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9466 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9468 *cost
+= regs_per_insn_1st
9469 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9470 + regs_per_insn_sub
- 1)
9471 / regs_per_insn_sub
);
9480 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9481 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9482 *cost
= COSTS_N_INSNS (speed_p
9483 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9484 else if (mode
== SImode
&& TARGET_IDIV
)
9485 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9487 *cost
= LIBCALL_COST (2);
9488 return false; /* All arguments must be in registers. */
9492 *cost
= LIBCALL_COST (2);
9493 return false; /* All arguments must be in registers. */
9496 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9498 *cost
= (COSTS_N_INSNS (2)
9499 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9501 *cost
+= extra_cost
->alu
.shift_reg
;
9509 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9511 *cost
= (COSTS_N_INSNS (3)
9512 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9514 *cost
+= 2 * extra_cost
->alu
.shift
;
9517 else if (mode
== SImode
)
9519 *cost
= (COSTS_N_INSNS (1)
9520 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9521 /* Slightly disparage register shifts at -Os, but not by much. */
9522 if (!CONST_INT_P (XEXP (x
, 1)))
9523 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9524 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9527 else if (GET_MODE_CLASS (mode
) == MODE_INT
9528 && GET_MODE_SIZE (mode
) < 4)
9532 *cost
= (COSTS_N_INSNS (1)
9533 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9534 /* Slightly disparage register shifts at -Os, but not by
9536 if (!CONST_INT_P (XEXP (x
, 1)))
9537 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9538 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9540 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9542 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9544 /* Can use SBFX/UBFX. */
9545 *cost
= COSTS_N_INSNS (1);
9547 *cost
+= extra_cost
->alu
.bfx
;
9548 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9552 *cost
= COSTS_N_INSNS (2);
9553 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9556 if (CONST_INT_P (XEXP (x
, 1)))
9557 *cost
+= 2 * extra_cost
->alu
.shift
;
9559 *cost
+= (extra_cost
->alu
.shift
9560 + extra_cost
->alu
.shift_reg
);
9563 /* Slightly disparage register shifts. */
9564 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9569 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9570 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9573 if (CONST_INT_P (XEXP (x
, 1)))
9574 *cost
+= (2 * extra_cost
->alu
.shift
9575 + extra_cost
->alu
.log_shift
);
9577 *cost
+= (extra_cost
->alu
.shift
9578 + extra_cost
->alu
.shift_reg
9579 + extra_cost
->alu
.log_shift_reg
);
9585 *cost
= LIBCALL_COST (2);
9593 *cost
= COSTS_N_INSNS (1);
9595 *cost
+= extra_cost
->alu
.rev
;
9602 /* No rev instruction available. Look at arm_legacy_rev
9603 and thumb_legacy_rev for the form of RTL used then. */
9606 *cost
= COSTS_N_INSNS (10);
9610 *cost
+= 6 * extra_cost
->alu
.shift
;
9611 *cost
+= 3 * extra_cost
->alu
.logical
;
9616 *cost
= COSTS_N_INSNS (5);
9620 *cost
+= 2 * extra_cost
->alu
.shift
;
9621 *cost
+= extra_cost
->alu
.arith_shift
;
9622 *cost
+= 2 * extra_cost
->alu
.logical
;
9630 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9631 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9633 *cost
= COSTS_N_INSNS (1);
9634 if (GET_CODE (XEXP (x
, 0)) == MULT
9635 || GET_CODE (XEXP (x
, 1)) == MULT
)
9637 rtx mul_op0
, mul_op1
, sub_op
;
9640 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9642 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9644 mul_op0
= XEXP (XEXP (x
, 0), 0);
9645 mul_op1
= XEXP (XEXP (x
, 0), 1);
9646 sub_op
= XEXP (x
, 1);
9650 mul_op0
= XEXP (XEXP (x
, 1), 0);
9651 mul_op1
= XEXP (XEXP (x
, 1), 1);
9652 sub_op
= XEXP (x
, 0);
9655 /* The first operand of the multiply may be optionally
9657 if (GET_CODE (mul_op0
) == NEG
)
9658 mul_op0
= XEXP (mul_op0
, 0);
9660 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9661 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9662 + rtx_cost (sub_op
, code
, 0, speed_p
));
9668 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9674 rtx shift_by_reg
= NULL
;
9678 *cost
= COSTS_N_INSNS (1);
9680 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9681 if (shift_op
== NULL
)
9683 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9684 non_shift_op
= XEXP (x
, 0);
9687 non_shift_op
= XEXP (x
, 1);
9689 if (shift_op
!= NULL
)
9691 if (shift_by_reg
!= NULL
)
9694 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9695 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9698 *cost
+= extra_cost
->alu
.arith_shift
;
9700 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9701 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9706 && GET_CODE (XEXP (x
, 1)) == MULT
)
9710 *cost
+= extra_cost
->mult
[0].add
;
9711 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9712 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9713 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9717 if (CONST_INT_P (XEXP (x
, 0)))
9719 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9720 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9722 *cost
= COSTS_N_INSNS (insns
);
9724 *cost
+= insns
* extra_cost
->alu
.arith
;
9725 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9729 *cost
+= extra_cost
->alu
.arith
;
9734 if (GET_MODE_CLASS (mode
) == MODE_INT
9735 && GET_MODE_SIZE (mode
) < 4)
9737 rtx shift_op
, shift_reg
;
9740 /* We check both sides of the MINUS for shifter operands since,
9741 unlike PLUS, it's not commutative. */
9743 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9744 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9746 /* Slightly disparage, as we might need to widen the result. */
9747 *cost
= 1 + COSTS_N_INSNS (1);
9749 *cost
+= extra_cost
->alu
.arith
;
9751 if (CONST_INT_P (XEXP (x
, 0)))
9753 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9762 *cost
= COSTS_N_INSNS (2);
9764 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9766 rtx op1
= XEXP (x
, 1);
9769 *cost
+= 2 * extra_cost
->alu
.arith
;
9771 if (GET_CODE (op1
) == ZERO_EXTEND
)
9772 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9774 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9775 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9779 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9782 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9783 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9785 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9788 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9789 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9792 *cost
+= (extra_cost
->alu
.arith
9793 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9794 ? extra_cost
->alu
.arith
9795 : extra_cost
->alu
.arith_shift
));
9796 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9797 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9798 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9803 *cost
+= 2 * extra_cost
->alu
.arith
;
9809 *cost
= LIBCALL_COST (2);
9813 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9814 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9816 *cost
= COSTS_N_INSNS (1);
9817 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9819 rtx mul_op0
, mul_op1
, add_op
;
9822 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9824 mul_op0
= XEXP (XEXP (x
, 0), 0);
9825 mul_op1
= XEXP (XEXP (x
, 0), 1);
9826 add_op
= XEXP (x
, 1);
9828 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9829 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9830 + rtx_cost (add_op
, code
, 0, speed_p
));
9836 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9839 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9841 *cost
= LIBCALL_COST (2);
9845 /* Narrow modes can be synthesized in SImode, but the range
9846 of useful sub-operations is limited. Check for shift operations
9847 on one of the operands. Only left shifts can be used in the
9849 if (GET_MODE_CLASS (mode
) == MODE_INT
9850 && GET_MODE_SIZE (mode
) < 4)
9852 rtx shift_op
, shift_reg
;
9855 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9857 if (CONST_INT_P (XEXP (x
, 1)))
9859 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9860 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9862 *cost
= COSTS_N_INSNS (insns
);
9864 *cost
+= insns
* extra_cost
->alu
.arith
;
9865 /* Slightly penalize a narrow operation as the result may
9867 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9871 /* Slightly penalize a narrow operation as the result may
9873 *cost
= 1 + COSTS_N_INSNS (1);
9875 *cost
+= extra_cost
->alu
.arith
;
9882 rtx shift_op
, shift_reg
;
9884 *cost
= COSTS_N_INSNS (1);
9886 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9887 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9889 /* UXTA[BH] or SXTA[BH]. */
9891 *cost
+= extra_cost
->alu
.extend_arith
;
9892 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9894 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9899 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9900 if (shift_op
!= NULL
)
9905 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9906 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9909 *cost
+= extra_cost
->alu
.arith_shift
;
9911 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9912 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9915 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9917 rtx mul_op
= XEXP (x
, 0);
9919 *cost
= COSTS_N_INSNS (1);
9921 if (TARGET_DSP_MULTIPLY
9922 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9923 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9924 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9925 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9926 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9927 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9928 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9929 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9930 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9931 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9932 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9933 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9938 *cost
+= extra_cost
->mult
[0].extend_add
;
9939 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9940 SIGN_EXTEND
, 0, speed_p
)
9941 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9942 SIGN_EXTEND
, 0, speed_p
)
9943 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9948 *cost
+= extra_cost
->mult
[0].add
;
9949 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9950 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9951 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9954 if (CONST_INT_P (XEXP (x
, 1)))
9956 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9957 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9959 *cost
= COSTS_N_INSNS (insns
);
9961 *cost
+= insns
* extra_cost
->alu
.arith
;
9962 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9966 *cost
+= extra_cost
->alu
.arith
;
9974 && GET_CODE (XEXP (x
, 0)) == MULT
9975 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9976 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9977 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9978 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9980 *cost
= COSTS_N_INSNS (1);
9982 *cost
+= extra_cost
->mult
[1].extend_add
;
9983 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9984 ZERO_EXTEND
, 0, speed_p
)
9985 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9986 ZERO_EXTEND
, 0, speed_p
)
9987 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9991 *cost
= COSTS_N_INSNS (2);
9993 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9994 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9997 *cost
+= (extra_cost
->alu
.arith
9998 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9999 ? extra_cost
->alu
.arith
10000 : extra_cost
->alu
.arith_shift
));
10002 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10004 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10009 *cost
+= 2 * extra_cost
->alu
.arith
;
10014 *cost
= LIBCALL_COST (2);
10017 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10019 *cost
= COSTS_N_INSNS (1);
10021 *cost
+= extra_cost
->alu
.rev
;
10025 /* Fall through. */
10026 case AND
: case XOR
:
10027 if (mode
== SImode
)
10029 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10030 rtx op0
= XEXP (x
, 0);
10031 rtx shift_op
, shift_reg
;
10033 *cost
= COSTS_N_INSNS (1);
10037 || (code
== IOR
&& TARGET_THUMB2
)))
10038 op0
= XEXP (op0
, 0);
10041 shift_op
= shifter_op_p (op0
, &shift_reg
);
10042 if (shift_op
!= NULL
)
10047 *cost
+= extra_cost
->alu
.log_shift_reg
;
10048 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10051 *cost
+= extra_cost
->alu
.log_shift
;
10053 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10054 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10058 if (CONST_INT_P (XEXP (x
, 1)))
10060 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10061 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10064 *cost
= COSTS_N_INSNS (insns
);
10066 *cost
+= insns
* extra_cost
->alu
.logical
;
10067 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10072 *cost
+= extra_cost
->alu
.logical
;
10073 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10074 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10078 if (mode
== DImode
)
10080 rtx op0
= XEXP (x
, 0);
10081 enum rtx_code subcode
= GET_CODE (op0
);
10083 *cost
= COSTS_N_INSNS (2);
10087 || (code
== IOR
&& TARGET_THUMB2
)))
10088 op0
= XEXP (op0
, 0);
10090 if (GET_CODE (op0
) == ZERO_EXTEND
)
10093 *cost
+= 2 * extra_cost
->alu
.logical
;
10095 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10096 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10099 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10102 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10104 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10105 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10110 *cost
+= 2 * extra_cost
->alu
.logical
;
10116 *cost
= LIBCALL_COST (2);
10120 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10121 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10123 rtx op0
= XEXP (x
, 0);
10125 *cost
= COSTS_N_INSNS (1);
10127 if (GET_CODE (op0
) == NEG
)
10128 op0
= XEXP (op0
, 0);
10131 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10133 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10134 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10137 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10139 *cost
= LIBCALL_COST (2);
10143 if (mode
== SImode
)
10145 *cost
= COSTS_N_INSNS (1);
10146 if (TARGET_DSP_MULTIPLY
10147 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10148 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10149 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10150 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10151 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10152 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10153 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10154 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10155 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10156 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10157 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10158 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10161 /* SMUL[TB][TB]. */
10163 *cost
+= extra_cost
->mult
[0].extend
;
10164 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10165 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10169 *cost
+= extra_cost
->mult
[0].simple
;
10173 if (mode
== DImode
)
10176 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10177 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10178 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10179 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10181 *cost
= COSTS_N_INSNS (1);
10183 *cost
+= extra_cost
->mult
[1].extend
;
10184 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10185 ZERO_EXTEND
, 0, speed_p
)
10186 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10187 ZERO_EXTEND
, 0, speed_p
));
10191 *cost
= LIBCALL_COST (2);
10196 *cost
= LIBCALL_COST (2);
10200 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10201 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10203 *cost
= COSTS_N_INSNS (1);
10205 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10209 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10211 *cost
= LIBCALL_COST (1);
10215 if (mode
== SImode
)
10217 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10219 *cost
= COSTS_N_INSNS (2);
10220 /* Assume the non-flag-changing variant. */
10222 *cost
+= (extra_cost
->alu
.log_shift
10223 + extra_cost
->alu
.arith_shift
);
10224 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10228 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10229 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10231 *cost
= COSTS_N_INSNS (2);
10232 /* No extra cost for MOV imm and MVN imm. */
10233 /* If the comparison op is using the flags, there's no further
10234 cost, otherwise we need to add the cost of the comparison. */
10235 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10236 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10237 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10239 *cost
+= (COSTS_N_INSNS (1)
10240 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10242 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10245 *cost
+= extra_cost
->alu
.arith
;
10249 *cost
= COSTS_N_INSNS (1);
10251 *cost
+= extra_cost
->alu
.arith
;
10255 if (GET_MODE_CLASS (mode
) == MODE_INT
10256 && GET_MODE_SIZE (mode
) < 4)
10258 /* Slightly disparage, as we might need an extend operation. */
10259 *cost
= 1 + COSTS_N_INSNS (1);
10261 *cost
+= extra_cost
->alu
.arith
;
10265 if (mode
== DImode
)
10267 *cost
= COSTS_N_INSNS (2);
10269 *cost
+= 2 * extra_cost
->alu
.arith
;
10274 *cost
= LIBCALL_COST (1);
10278 if (mode
== SImode
)
10281 rtx shift_reg
= NULL
;
10283 *cost
= COSTS_N_INSNS (1);
10284 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10288 if (shift_reg
!= NULL
)
10291 *cost
+= extra_cost
->alu
.log_shift_reg
;
10292 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10295 *cost
+= extra_cost
->alu
.log_shift
;
10296 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10301 *cost
+= extra_cost
->alu
.logical
;
10304 if (mode
== DImode
)
10306 *cost
= COSTS_N_INSNS (2);
10312 *cost
+= LIBCALL_COST (1);
10317 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10319 *cost
= COSTS_N_INSNS (4);
10322 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10323 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10325 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10326 /* Assume that if one arm of the if_then_else is a register,
10327 that it will be tied with the result and eliminate the
10328 conditional insn. */
10329 if (REG_P (XEXP (x
, 1)))
10331 else if (REG_P (XEXP (x
, 2)))
10337 if (extra_cost
->alu
.non_exec_costs_exec
)
10338 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10340 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10343 *cost
+= op1cost
+ op2cost
;
10349 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10353 machine_mode op0mode
;
10354 /* We'll mostly assume that the cost of a compare is the cost of the
10355 LHS. However, there are some notable exceptions. */
10357 /* Floating point compares are never done as side-effects. */
10358 op0mode
= GET_MODE (XEXP (x
, 0));
10359 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10360 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10362 *cost
= COSTS_N_INSNS (1);
10364 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10366 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10368 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10374 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10376 *cost
= LIBCALL_COST (2);
10380 /* DImode compares normally take two insns. */
10381 if (op0mode
== DImode
)
10383 *cost
= COSTS_N_INSNS (2);
10385 *cost
+= 2 * extra_cost
->alu
.arith
;
10389 if (op0mode
== SImode
)
10394 if (XEXP (x
, 1) == const0_rtx
10395 && !(REG_P (XEXP (x
, 0))
10396 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10397 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10399 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10401 /* Multiply operations that set the flags are often
10402 significantly more expensive. */
10404 && GET_CODE (XEXP (x
, 0)) == MULT
10405 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10406 *cost
+= extra_cost
->mult
[0].flag_setting
;
10409 && GET_CODE (XEXP (x
, 0)) == PLUS
10410 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10411 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10413 *cost
+= extra_cost
->mult
[0].flag_setting
;
10418 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10419 if (shift_op
!= NULL
)
10421 *cost
= COSTS_N_INSNS (1);
10422 if (shift_reg
!= NULL
)
10424 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10426 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10429 *cost
+= extra_cost
->alu
.arith_shift
;
10430 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10431 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10435 *cost
= COSTS_N_INSNS (1);
10437 *cost
+= extra_cost
->alu
.arith
;
10438 if (CONST_INT_P (XEXP (x
, 1))
10439 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10441 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10449 *cost
= LIBCALL_COST (2);
10472 if (outer_code
== SET
)
10474 /* Is it a store-flag operation? */
10475 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10476 && XEXP (x
, 1) == const0_rtx
)
10478 /* Thumb also needs an IT insn. */
10479 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10482 if (XEXP (x
, 1) == const0_rtx
)
10487 /* LSR Rd, Rn, #31. */
10488 *cost
= COSTS_N_INSNS (1);
10490 *cost
+= extra_cost
->alu
.shift
;
10500 *cost
= COSTS_N_INSNS (2);
10504 /* RSBS T1, Rn, Rn, LSR #31
10506 *cost
= COSTS_N_INSNS (2);
10508 *cost
+= extra_cost
->alu
.arith_shift
;
10512 /* RSB Rd, Rn, Rn, ASR #1
10513 LSR Rd, Rd, #31. */
10514 *cost
= COSTS_N_INSNS (2);
10516 *cost
+= (extra_cost
->alu
.arith_shift
10517 + extra_cost
->alu
.shift
);
10523 *cost
= COSTS_N_INSNS (2);
10525 *cost
+= extra_cost
->alu
.shift
;
10529 /* Remaining cases are either meaningless or would take
10530 three insns anyway. */
10531 *cost
= COSTS_N_INSNS (3);
10534 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10539 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10540 if (CONST_INT_P (XEXP (x
, 1))
10541 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10543 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10550 /* Not directly inside a set. If it involves the condition code
10551 register it must be the condition for a branch, cond_exec or
10552 I_T_E operation. Since the comparison is performed elsewhere
10553 this is just the control part which has no additional
10555 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10556 && XEXP (x
, 1) == const0_rtx
)
10564 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10565 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10567 *cost
= COSTS_N_INSNS (1);
10569 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10573 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10575 *cost
= LIBCALL_COST (1);
10579 if (mode
== SImode
)
10581 *cost
= COSTS_N_INSNS (1);
10583 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10587 *cost
= LIBCALL_COST (1);
10591 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10592 && MEM_P (XEXP (x
, 0)))
10594 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10596 if (mode
== DImode
)
10597 *cost
+= COSTS_N_INSNS (1);
10602 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10603 *cost
+= extra_cost
->ldst
.load
;
10605 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10607 if (mode
== DImode
)
10608 *cost
+= extra_cost
->alu
.shift
;
10613 /* Widening from less than 32-bits requires an extend operation. */
10614 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10616 /* We have SXTB/SXTH. */
10617 *cost
= COSTS_N_INSNS (1);
10618 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10620 *cost
+= extra_cost
->alu
.extend
;
10622 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10624 /* Needs two shifts. */
10625 *cost
= COSTS_N_INSNS (2);
10626 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10628 *cost
+= 2 * extra_cost
->alu
.shift
;
10631 /* Widening beyond 32-bits requires one more insn. */
10632 if (mode
== DImode
)
10634 *cost
+= COSTS_N_INSNS (1);
10636 *cost
+= extra_cost
->alu
.shift
;
10643 || GET_MODE (XEXP (x
, 0)) == SImode
10644 || GET_MODE (XEXP (x
, 0)) == QImode
)
10645 && MEM_P (XEXP (x
, 0)))
10647 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10649 if (mode
== DImode
)
10650 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10655 /* Widening from less than 32-bits requires an extend operation. */
10656 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10658 /* UXTB can be a shorter instruction in Thumb2, but it might
10659 be slower than the AND Rd, Rn, #255 alternative. When
10660 optimizing for speed it should never be slower to use
10661 AND, and we don't really model 16-bit vs 32-bit insns
10663 *cost
= COSTS_N_INSNS (1);
10665 *cost
+= extra_cost
->alu
.logical
;
10667 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10669 /* We have UXTB/UXTH. */
10670 *cost
= COSTS_N_INSNS (1);
10671 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10673 *cost
+= extra_cost
->alu
.extend
;
10675 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10677 /* Needs two shifts. It's marginally preferable to use
10678 shifts rather than two BIC instructions as the second
10679 shift may merge with a subsequent insn as a shifter
10681 *cost
= COSTS_N_INSNS (2);
10682 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10684 *cost
+= 2 * extra_cost
->alu
.shift
;
10686 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10687 *cost
= COSTS_N_INSNS (1);
10689 /* Widening beyond 32-bits requires one more insn. */
10690 if (mode
== DImode
)
10692 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10699 /* CONST_INT has no mode, so we cannot tell for sure how many
10700 insns are really going to be needed. The best we can do is
10701 look at the value passed. If it fits in SImode, then assume
10702 that's the mode it will be used for. Otherwise assume it
10703 will be used in DImode. */
10704 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10709 /* Avoid blowing up in arm_gen_constant (). */
10710 if (!(outer_code
== PLUS
10711 || outer_code
== AND
10712 || outer_code
== IOR
10713 || outer_code
== XOR
10714 || outer_code
== MINUS
))
10718 if (mode
== SImode
)
10720 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10721 INTVAL (x
), NULL
, NULL
,
10727 *cost
+= COSTS_N_INSNS (arm_gen_constant
10728 (outer_code
, SImode
, NULL
,
10729 trunc_int_for_mode (INTVAL (x
), SImode
),
10731 + arm_gen_constant (outer_code
, SImode
, NULL
,
10732 INTVAL (x
) >> 32, NULL
,
10744 if (arm_arch_thumb2
&& !flag_pic
)
10745 *cost
= COSTS_N_INSNS (2);
10747 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10750 *cost
= COSTS_N_INSNS (2);
10754 *cost
+= COSTS_N_INSNS (1);
10756 *cost
+= extra_cost
->alu
.arith
;
10762 *cost
= COSTS_N_INSNS (4);
10767 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10768 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10770 if (vfp3_const_double_rtx (x
))
10772 *cost
= COSTS_N_INSNS (1);
10774 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10780 *cost
= COSTS_N_INSNS (1);
10781 if (mode
== DFmode
)
10782 *cost
+= extra_cost
->ldst
.loadd
;
10784 *cost
+= extra_cost
->ldst
.loadf
;
10787 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10791 *cost
= COSTS_N_INSNS (4);
10797 && TARGET_HARD_FLOAT
10798 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10799 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10800 *cost
= COSTS_N_INSNS (1);
10802 *cost
= COSTS_N_INSNS (4);
10807 *cost
= COSTS_N_INSNS (1);
10808 /* When optimizing for size, we prefer constant pool entries to
10809 MOVW/MOVT pairs, so bump the cost of these slightly. */
10815 *cost
= COSTS_N_INSNS (1);
10817 *cost
+= extra_cost
->alu
.clz
;
10821 if (XEXP (x
, 1) == const0_rtx
)
10823 *cost
= COSTS_N_INSNS (1);
10825 *cost
+= extra_cost
->alu
.log_shift
;
10826 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10829 /* Fall through. */
10833 *cost
= COSTS_N_INSNS (2);
10837 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10838 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10839 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10840 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10841 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10842 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10843 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10844 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10847 *cost
= COSTS_N_INSNS (1);
10849 *cost
+= extra_cost
->mult
[1].extend
;
10850 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10852 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10856 *cost
= LIBCALL_COST (1);
10859 case UNSPEC_VOLATILE
:
10861 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10864 /* Reading the PC is like reading any other register. Writing it
10865 is more expensive, but we take that into account elsewhere. */
10870 /* TODO: Simple zero_extract of bottom bits using AND. */
10871 /* Fall through. */
10875 && CONST_INT_P (XEXP (x
, 1))
10876 && CONST_INT_P (XEXP (x
, 2)))
10878 *cost
= COSTS_N_INSNS (1);
10880 *cost
+= extra_cost
->alu
.bfx
;
10881 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10884 /* Without UBFX/SBFX, need to resort to shift operations. */
10885 *cost
= COSTS_N_INSNS (2);
10887 *cost
+= 2 * extra_cost
->alu
.shift
;
10888 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10892 if (TARGET_HARD_FLOAT
)
10894 *cost
= COSTS_N_INSNS (1);
10896 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10897 if (!TARGET_FPU_ARMV8
10898 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10900 /* Pre v8, widening HF->DF is a two-step process, first
10901 widening to SFmode. */
10902 *cost
+= COSTS_N_INSNS (1);
10904 *cost
+= extra_cost
->fp
[0].widen
;
10906 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10910 *cost
= LIBCALL_COST (1);
10913 case FLOAT_TRUNCATE
:
10914 if (TARGET_HARD_FLOAT
)
10916 *cost
= COSTS_N_INSNS (1);
10918 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10919 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10921 /* Vector modes? */
10923 *cost
= LIBCALL_COST (1);
10927 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10929 rtx op0
= XEXP (x
, 0);
10930 rtx op1
= XEXP (x
, 1);
10931 rtx op2
= XEXP (x
, 2);
10933 *cost
= COSTS_N_INSNS (1);
10935 /* vfms or vfnma. */
10936 if (GET_CODE (op0
) == NEG
)
10937 op0
= XEXP (op0
, 0);
10939 /* vfnms or vfnma. */
10940 if (GET_CODE (op2
) == NEG
)
10941 op2
= XEXP (op2
, 0);
10943 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10944 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10945 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10948 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10953 *cost
= LIBCALL_COST (3);
10958 if (TARGET_HARD_FLOAT
)
10960 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10962 *cost
= COSTS_N_INSNS (1);
10964 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10965 /* Strip of the 'cost' of rounding towards zero. */
10966 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10967 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10969 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10970 /* ??? Increase the cost to deal with transferring from
10971 FP -> CORE registers? */
10974 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10975 && TARGET_FPU_ARMV8
)
10977 *cost
= COSTS_N_INSNS (1);
10979 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10982 /* Vector costs? */
10984 *cost
= LIBCALL_COST (1);
10988 case UNSIGNED_FLOAT
:
10989 if (TARGET_HARD_FLOAT
)
10991 /* ??? Increase the cost to deal with transferring from CORE
10992 -> FP registers? */
10993 *cost
= COSTS_N_INSNS (1);
10995 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10998 *cost
= LIBCALL_COST (1);
11002 *cost
= COSTS_N_INSNS (1);
11007 /* Just a guess. Guess number of instructions in the asm
11008 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11009 though (see PR60663). */
11010 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11011 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11013 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11017 if (mode
!= VOIDmode
)
11018 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11020 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11025 #undef HANDLE_NARROW_SHIFT_ARITH
11027 /* RTX costs when optimizing for size. */
11029 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11030 int *total
, bool speed
)
11034 if (TARGET_OLD_RTX_COSTS
11035 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11037 /* Old way. (Deprecated.) */
11039 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11040 (enum rtx_code
) outer_code
, total
);
11042 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11043 (enum rtx_code
) outer_code
, total
,
11049 if (current_tune
->insn_extra_cost
)
11050 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11051 (enum rtx_code
) outer_code
,
11052 current_tune
->insn_extra_cost
,
11054 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11055 && current_tune->insn_extra_cost != NULL */
11057 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11058 (enum rtx_code
) outer_code
,
11059 &generic_extra_costs
, total
, speed
);
11062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11064 print_rtl_single (dump_file
, x
);
11065 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11066 *total
, result
? "final" : "partial");
11071 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11072 supported on any "slowmul" cores, so it can be ignored. */
11075 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11076 int *total
, bool speed
)
11078 machine_mode mode
= GET_MODE (x
);
11082 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11089 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11092 *total
= COSTS_N_INSNS (20);
11096 if (CONST_INT_P (XEXP (x
, 1)))
11098 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11099 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11100 int cost
, const_ok
= const_ok_for_arm (i
);
11101 int j
, booth_unit_size
;
11103 /* Tune as appropriate. */
11104 cost
= const_ok
? 4 : 8;
11105 booth_unit_size
= 2;
11106 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11108 i
>>= booth_unit_size
;
11112 *total
= COSTS_N_INSNS (cost
);
11113 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11117 *total
= COSTS_N_INSNS (20);
11121 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11126 /* RTX cost for cores with a fast multiply unit (M variants). */
11129 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11130 int *total
, bool speed
)
11132 machine_mode mode
= GET_MODE (x
);
11136 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11140 /* ??? should thumb2 use different costs? */
11144 /* There is no point basing this on the tuning, since it is always the
11145 fast variant if it exists at all. */
11147 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11148 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11149 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11151 *total
= COSTS_N_INSNS(2);
11156 if (mode
== DImode
)
11158 *total
= COSTS_N_INSNS (5);
11162 if (CONST_INT_P (XEXP (x
, 1)))
11164 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11165 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11166 int cost
, const_ok
= const_ok_for_arm (i
);
11167 int j
, booth_unit_size
;
11169 /* Tune as appropriate. */
11170 cost
= const_ok
? 4 : 8;
11171 booth_unit_size
= 8;
11172 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11174 i
>>= booth_unit_size
;
11178 *total
= COSTS_N_INSNS(cost
);
11182 if (mode
== SImode
)
11184 *total
= COSTS_N_INSNS (4);
11188 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11190 if (TARGET_HARD_FLOAT
11192 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11194 *total
= COSTS_N_INSNS (1);
11199 /* Requires a lib call */
11200 *total
= COSTS_N_INSNS (20);
11204 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11209 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11210 so it can be ignored. */
11213 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11214 int *total
, bool speed
)
11216 machine_mode mode
= GET_MODE (x
);
11220 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11227 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11228 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11230 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11231 will stall until the multiplication is complete. */
11232 *total
= COSTS_N_INSNS (3);
11236 /* There is no point basing this on the tuning, since it is always the
11237 fast variant if it exists at all. */
11239 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11240 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11241 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11243 *total
= COSTS_N_INSNS (2);
11248 if (mode
== DImode
)
11250 *total
= COSTS_N_INSNS (5);
11254 if (CONST_INT_P (XEXP (x
, 1)))
11256 /* If operand 1 is a constant we can more accurately
11257 calculate the cost of the multiply. The multiplier can
11258 retire 15 bits on the first cycle and a further 12 on the
11259 second. We do, of course, have to load the constant into
11260 a register first. */
11261 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11262 /* There's a general overhead of one cycle. */
11264 unsigned HOST_WIDE_INT masked_const
;
11266 if (i
& 0x80000000)
11269 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11271 masked_const
= i
& 0xffff8000;
11272 if (masked_const
!= 0)
11275 masked_const
= i
& 0xf8000000;
11276 if (masked_const
!= 0)
11279 *total
= COSTS_N_INSNS (cost
);
11283 if (mode
== SImode
)
11285 *total
= COSTS_N_INSNS (3);
11289 /* Requires a lib call */
11290 *total
= COSTS_N_INSNS (20);
11294 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11299 /* RTX costs for 9e (and later) cores. */
11302 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11303 int *total
, bool speed
)
11305 machine_mode mode
= GET_MODE (x
);
11312 /* Small multiply: 32 cycles for an integer multiply inst. */
11313 if (arm_arch6m
&& arm_m_profile_small_mul
)
11314 *total
= COSTS_N_INSNS (32);
11316 *total
= COSTS_N_INSNS (3);
11320 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11328 /* There is no point basing this on the tuning, since it is always the
11329 fast variant if it exists at all. */
11331 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11332 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11333 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11335 *total
= COSTS_N_INSNS (2);
11340 if (mode
== DImode
)
11342 *total
= COSTS_N_INSNS (5);
11346 if (mode
== SImode
)
11348 *total
= COSTS_N_INSNS (2);
11352 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11354 if (TARGET_HARD_FLOAT
11356 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11358 *total
= COSTS_N_INSNS (1);
11363 *total
= COSTS_N_INSNS (20);
11367 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11370 /* All address computations that can be done are free, but rtx cost returns
11371 the same for practically all of them. So we weight the different types
11372 of address here in the order (most pref first):
11373 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11375 arm_arm_address_cost (rtx x
)
11377 enum rtx_code c
= GET_CODE (x
);
11379 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11381 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11386 if (CONST_INT_P (XEXP (x
, 1)))
11389 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11399 arm_thumb_address_cost (rtx x
)
11401 enum rtx_code c
= GET_CODE (x
);
11406 && REG_P (XEXP (x
, 0))
11407 && CONST_INT_P (XEXP (x
, 1)))
11414 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11415 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11417 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11420 /* Adjust cost hook for XScale. */
11422 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11424 /* Some true dependencies can have a higher cost depending
11425 on precisely how certain input operands are used. */
11426 if (REG_NOTE_KIND(link
) == 0
11427 && recog_memoized (insn
) >= 0
11428 && recog_memoized (dep
) >= 0)
11430 int shift_opnum
= get_attr_shift (insn
);
11431 enum attr_type attr_type
= get_attr_type (dep
);
11433 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11434 operand for INSN. If we have a shifted input operand and the
11435 instruction we depend on is another ALU instruction, then we may
11436 have to account for an additional stall. */
11437 if (shift_opnum
!= 0
11438 && (attr_type
== TYPE_ALU_SHIFT_IMM
11439 || attr_type
== TYPE_ALUS_SHIFT_IMM
11440 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11441 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11442 || attr_type
== TYPE_ALU_SHIFT_REG
11443 || attr_type
== TYPE_ALUS_SHIFT_REG
11444 || attr_type
== TYPE_LOGIC_SHIFT_REG
11445 || attr_type
== TYPE_LOGICS_SHIFT_REG
11446 || attr_type
== TYPE_MOV_SHIFT
11447 || attr_type
== TYPE_MVN_SHIFT
11448 || attr_type
== TYPE_MOV_SHIFT_REG
11449 || attr_type
== TYPE_MVN_SHIFT_REG
))
11451 rtx shifted_operand
;
11454 /* Get the shifted operand. */
11455 extract_insn (insn
);
11456 shifted_operand
= recog_data
.operand
[shift_opnum
];
11458 /* Iterate over all the operands in DEP. If we write an operand
11459 that overlaps with SHIFTED_OPERAND, then we have increase the
11460 cost of this dependency. */
11461 extract_insn (dep
);
11462 preprocess_constraints (dep
);
11463 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11465 /* We can ignore strict inputs. */
11466 if (recog_data
.operand_type
[opno
] == OP_IN
)
11469 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11481 /* Adjust cost hook for Cortex A9. */
11483 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11485 switch (REG_NOTE_KIND (link
))
11492 case REG_DEP_OUTPUT
:
11493 if (recog_memoized (insn
) >= 0
11494 && recog_memoized (dep
) >= 0)
11496 if (GET_CODE (PATTERN (insn
)) == SET
)
11499 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11501 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11503 enum attr_type attr_type_insn
= get_attr_type (insn
);
11504 enum attr_type attr_type_dep
= get_attr_type (dep
);
11506 /* By default all dependencies of the form
11509 have an extra latency of 1 cycle because
11510 of the input and output dependency in this
11511 case. However this gets modeled as an true
11512 dependency and hence all these checks. */
11513 if (REG_P (SET_DEST (PATTERN (insn
)))
11514 && REG_P (SET_DEST (PATTERN (dep
)))
11515 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11516 SET_DEST (PATTERN (dep
))))
11518 /* FMACS is a special case where the dependent
11519 instruction can be issued 3 cycles before
11520 the normal latency in case of an output
11522 if ((attr_type_insn
== TYPE_FMACS
11523 || attr_type_insn
== TYPE_FMACD
)
11524 && (attr_type_dep
== TYPE_FMACS
11525 || attr_type_dep
== TYPE_FMACD
))
11527 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11528 *cost
= insn_default_latency (dep
) - 3;
11530 *cost
= insn_default_latency (dep
);
11535 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11536 *cost
= insn_default_latency (dep
) + 1;
11538 *cost
= insn_default_latency (dep
);
11548 gcc_unreachable ();
11554 /* Adjust cost hook for FA726TE. */
11556 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11558 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11559 have penalty of 3. */
11560 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11561 && recog_memoized (insn
) >= 0
11562 && recog_memoized (dep
) >= 0
11563 && get_attr_conds (dep
) == CONDS_SET
)
11565 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11566 if (get_attr_conds (insn
) == CONDS_USE
11567 && get_attr_type (insn
) != TYPE_BRANCH
)
11573 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11574 || get_attr_conds (insn
) == CONDS_USE
)
11584 /* Implement TARGET_REGISTER_MOVE_COST.
11586 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11587 it is typically more expensive than a single memory access. We set
11588 the cost to less than two memory accesses so that floating
11589 point to integer conversion does not go through memory. */
11592 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11593 reg_class_t from
, reg_class_t to
)
11597 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11598 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11600 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11601 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11603 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11610 if (from
== HI_REGS
|| to
== HI_REGS
)
11617 /* Implement TARGET_MEMORY_MOVE_COST. */
11620 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11621 bool in ATTRIBUTE_UNUSED
)
11627 if (GET_MODE_SIZE (mode
) < 4)
11630 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11634 /* Vectorizer cost model implementation. */
11636 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11638 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11640 int misalign ATTRIBUTE_UNUSED
)
11644 switch (type_of_cost
)
11647 return current_tune
->vec_costs
->scalar_stmt_cost
;
11650 return current_tune
->vec_costs
->scalar_load_cost
;
11653 return current_tune
->vec_costs
->scalar_store_cost
;
11656 return current_tune
->vec_costs
->vec_stmt_cost
;
11659 return current_tune
->vec_costs
->vec_align_load_cost
;
11662 return current_tune
->vec_costs
->vec_store_cost
;
11664 case vec_to_scalar
:
11665 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11667 case scalar_to_vec
:
11668 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11670 case unaligned_load
:
11671 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11673 case unaligned_store
:
11674 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11676 case cond_branch_taken
:
11677 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11679 case cond_branch_not_taken
:
11680 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11683 case vec_promote_demote
:
11684 return current_tune
->vec_costs
->vec_stmt_cost
;
11686 case vec_construct
:
11687 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11688 return elements
/ 2 + 1;
11691 gcc_unreachable ();
11695 /* Implement targetm.vectorize.add_stmt_cost. */
11698 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11699 struct _stmt_vec_info
*stmt_info
, int misalign
,
11700 enum vect_cost_model_location where
)
11702 unsigned *cost
= (unsigned *) data
;
11703 unsigned retval
= 0;
11705 if (flag_vect_cost_model
)
11707 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11708 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11710 /* Statements in an inner loop relative to the loop being
11711 vectorized are weighted more heavily. The value here is
11712 arbitrary and could potentially be improved with analysis. */
11713 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11714 count
*= 50; /* FIXME. */
11716 retval
= (unsigned) (count
* stmt_cost
);
11717 cost
[where
] += retval
;
11723 /* Return true if and only if this insn can dual-issue only as older. */
11725 cortexa7_older_only (rtx_insn
*insn
)
11727 if (recog_memoized (insn
) < 0)
11730 switch (get_attr_type (insn
))
11732 case TYPE_ALU_DSP_REG
:
11733 case TYPE_ALU_SREG
:
11734 case TYPE_ALUS_SREG
:
11735 case TYPE_LOGIC_REG
:
11736 case TYPE_LOGICS_REG
:
11738 case TYPE_ADCS_REG
:
11743 case TYPE_SHIFT_IMM
:
11744 case TYPE_SHIFT_REG
:
11745 case TYPE_LOAD_BYTE
:
11748 case TYPE_FFARITHS
:
11750 case TYPE_FFARITHD
:
11768 case TYPE_F_STORES
:
11775 /* Return true if and only if this insn can dual-issue as younger. */
11777 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11779 if (recog_memoized (insn
) < 0)
11782 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11786 switch (get_attr_type (insn
))
11789 case TYPE_ALUS_IMM
:
11790 case TYPE_LOGIC_IMM
:
11791 case TYPE_LOGICS_IMM
:
11796 case TYPE_MOV_SHIFT
:
11797 case TYPE_MOV_SHIFT_REG
:
11807 /* Look for an instruction that can dual issue only as an older
11808 instruction, and move it in front of any instructions that can
11809 dual-issue as younger, while preserving the relative order of all
11810 other instructions in the ready list. This is a hueuristic to help
11811 dual-issue in later cycles, by postponing issue of more flexible
11812 instructions. This heuristic may affect dual issue opportunities
11813 in the current cycle. */
11815 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11816 int *n_readyp
, int clock
)
11819 int first_older_only
= -1, first_younger
= -1;
11823 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11827 /* Traverse the ready list from the head (the instruction to issue
11828 first), and looking for the first instruction that can issue as
11829 younger and the first instruction that can dual-issue only as
11831 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11833 rtx_insn
*insn
= ready
[i
];
11834 if (cortexa7_older_only (insn
))
11836 first_older_only
= i
;
11838 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11841 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11845 /* Nothing to reorder because either no younger insn found or insn
11846 that can dual-issue only as older appears before any insn that
11847 can dual-issue as younger. */
11848 if (first_younger
== -1)
11851 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11855 /* Nothing to reorder because no older-only insn in the ready list. */
11856 if (first_older_only
== -1)
11859 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11863 /* Move first_older_only insn before first_younger. */
11865 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11866 INSN_UID(ready
[first_older_only
]),
11867 INSN_UID(ready
[first_younger
]));
11868 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11869 for (i
= first_older_only
; i
< first_younger
; i
++)
11871 ready
[i
] = ready
[i
+1];
11874 ready
[i
] = first_older_only_insn
;
11878 /* Implement TARGET_SCHED_REORDER. */
11880 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11886 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11889 /* Do nothing for other cores. */
11893 return arm_issue_rate ();
11896 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11897 It corrects the value of COST based on the relationship between
11898 INSN and DEP through the dependence LINK. It returns the new
11899 value. There is a per-core adjust_cost hook to adjust scheduler costs
11900 and the per-core hook can choose to completely override the generic
11901 adjust_cost function. Only put bits of code into arm_adjust_cost that
11902 are common across all cores. */
11904 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11908 /* When generating Thumb-1 code, we want to place flag-setting operations
11909 close to a conditional branch which depends on them, so that we can
11910 omit the comparison. */
11912 && REG_NOTE_KIND (link
) == 0
11913 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11914 && recog_memoized (dep
) >= 0
11915 && get_attr_conds (dep
) == CONDS_SET
)
11918 if (current_tune
->sched_adjust_cost
!= NULL
)
11920 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11924 /* XXX Is this strictly true? */
11925 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11926 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11929 /* Call insns don't incur a stall, even if they follow a load. */
11930 if (REG_NOTE_KIND (link
) == 0
11934 if ((i_pat
= single_set (insn
)) != NULL
11935 && MEM_P (SET_SRC (i_pat
))
11936 && (d_pat
= single_set (dep
)) != NULL
11937 && MEM_P (SET_DEST (d_pat
)))
11939 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11940 /* This is a load after a store, there is no conflict if the load reads
11941 from a cached area. Assume that loads from the stack, and from the
11942 constant pool are cached, and that others will miss. This is a
11945 if ((GET_CODE (src_mem
) == SYMBOL_REF
11946 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11947 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11948 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11949 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11957 arm_max_conditional_execute (void)
11959 return max_insns_skipped
;
11963 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11966 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11968 return (optimize
> 0) ? 2 : 0;
11972 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11974 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11977 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11978 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11979 sequences of non-executed instructions in IT blocks probably take the same
11980 amount of time as executed instructions (and the IT instruction itself takes
11981 space in icache). This function was experimentally determined to give good
11982 results on a popular embedded benchmark. */
11985 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11987 return (TARGET_32BIT
&& speed_p
) ? 1
11988 : arm_default_branch_cost (speed_p
, predictable_p
);
11992 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11994 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11997 static bool fp_consts_inited
= false;
11999 static REAL_VALUE_TYPE value_fp0
;
12002 init_fp_table (void)
12006 r
= REAL_VALUE_ATOF ("0", DFmode
);
12008 fp_consts_inited
= true;
12011 /* Return TRUE if rtx X is a valid immediate FP constant. */
12013 arm_const_double_rtx (rtx x
)
12017 if (!fp_consts_inited
)
12020 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12021 if (REAL_VALUE_MINUS_ZERO (r
))
12024 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12030 /* VFPv3 has a fairly wide range of representable immediates, formed from
12031 "quarter-precision" floating-point values. These can be evaluated using this
12032 formula (with ^ for exponentiation):
12036 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12037 16 <= n <= 31 and 0 <= r <= 7.
12039 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12041 - A (most-significant) is the sign bit.
12042 - BCD are the exponent (encoded as r XOR 3).
12043 - EFGH are the mantissa (encoded as n - 16).
12046 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12047 fconst[sd] instruction, or -1 if X isn't suitable. */
12049 vfp3_const_double_index (rtx x
)
12051 REAL_VALUE_TYPE r
, m
;
12052 int sign
, exponent
;
12053 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12054 unsigned HOST_WIDE_INT mask
;
12055 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12058 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12061 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12063 /* We can't represent these things, so detect them first. */
12064 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12067 /* Extract sign, exponent and mantissa. */
12068 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12069 r
= real_value_abs (&r
);
12070 exponent
= REAL_EXP (&r
);
12071 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12072 highest (sign) bit, with a fixed binary point at bit point_pos.
12073 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12074 bits for the mantissa, this may fail (low bits would be lost). */
12075 real_ldexp (&m
, &r
, point_pos
- exponent
);
12076 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12077 mantissa
= w
.elt (0);
12078 mant_hi
= w
.elt (1);
12080 /* If there are bits set in the low part of the mantissa, we can't
12081 represent this value. */
12085 /* Now make it so that mantissa contains the most-significant bits, and move
12086 the point_pos to indicate that the least-significant bits have been
12088 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12089 mantissa
= mant_hi
;
12091 /* We can permit four significant bits of mantissa only, plus a high bit
12092 which is always 1. */
12093 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12094 if ((mantissa
& mask
) != 0)
12097 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12098 mantissa
>>= point_pos
- 5;
12100 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12101 floating-point immediate zero with Neon using an integer-zero load, but
12102 that case is handled elsewhere.) */
12106 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12108 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12109 normalized significands are in the range [1, 2). (Our mantissa is shifted
12110 left 4 places at this point relative to normalized IEEE754 values). GCC
12111 internally uses [0.5, 1) (see real.c), so the exponent returned from
12112 REAL_EXP must be altered. */
12113 exponent
= 5 - exponent
;
12115 if (exponent
< 0 || exponent
> 7)
12118 /* Sign, mantissa and exponent are now in the correct form to plug into the
12119 formula described in the comment above. */
12120 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12123 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12125 vfp3_const_double_rtx (rtx x
)
12130 return vfp3_const_double_index (x
) != -1;
12133 /* Recognize immediates which can be used in various Neon instructions. Legal
12134 immediates are described by the following table (for VMVN variants, the
12135 bitwise inverse of the constant shown is recognized. In either case, VMOV
12136 is output and the correct instruction to use for a given constant is chosen
12137 by the assembler). The constant shown is replicated across all elements of
12138 the destination vector.
12140 insn elems variant constant (binary)
12141 ---- ----- ------- -----------------
12142 vmov i32 0 00000000 00000000 00000000 abcdefgh
12143 vmov i32 1 00000000 00000000 abcdefgh 00000000
12144 vmov i32 2 00000000 abcdefgh 00000000 00000000
12145 vmov i32 3 abcdefgh 00000000 00000000 00000000
12146 vmov i16 4 00000000 abcdefgh
12147 vmov i16 5 abcdefgh 00000000
12148 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12149 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12150 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12151 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12152 vmvn i16 10 00000000 abcdefgh
12153 vmvn i16 11 abcdefgh 00000000
12154 vmov i32 12 00000000 00000000 abcdefgh 11111111
12155 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12156 vmov i32 14 00000000 abcdefgh 11111111 11111111
12157 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12158 vmov i8 16 abcdefgh
12159 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12160 eeeeeeee ffffffff gggggggg hhhhhhhh
12161 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12162 vmov f32 19 00000000 00000000 00000000 00000000
12164 For case 18, B = !b. Representable values are exactly those accepted by
12165 vfp3_const_double_index, but are output as floating-point numbers rather
12168 For case 19, we will change it to vmov.i32 when assembling.
12170 Variants 0-5 (inclusive) may also be used as immediates for the second
12171 operand of VORR/VBIC instructions.
12173 The INVERSE argument causes the bitwise inverse of the given operand to be
12174 recognized instead (used for recognizing legal immediates for the VAND/VORN
12175 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12176 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12177 output, rather than the real insns vbic/vorr).
12179 INVERSE makes no difference to the recognition of float vectors.
12181 The return value is the variant of immediate as shown in the above table, or
12182 -1 if the given value doesn't match any of the listed patterns.
12185 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12186 rtx
*modconst
, int *elementwidth
)
12188 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12190 for (i = 0; i < idx; i += (STRIDE)) \
12195 immtype = (CLASS); \
12196 elsize = (ELSIZE); \
12200 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12201 unsigned int innersize
;
12202 unsigned char bytes
[16];
12203 int immtype
= -1, matches
;
12204 unsigned int invmask
= inverse
? 0xff : 0;
12205 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12209 n_elts
= CONST_VECTOR_NUNITS (op
);
12210 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12215 if (mode
== VOIDmode
)
12217 innersize
= GET_MODE_SIZE (mode
);
12220 /* Vectors of float constants. */
12221 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12223 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12224 REAL_VALUE_TYPE r0
;
12226 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12229 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12231 for (i
= 1; i
< n_elts
; i
++)
12233 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12234 REAL_VALUE_TYPE re
;
12236 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12238 if (!REAL_VALUES_EQUAL (r0
, re
))
12243 *modconst
= CONST_VECTOR_ELT (op
, 0);
12248 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12254 /* Splat vector constant out into a byte vector. */
12255 for (i
= 0; i
< n_elts
; i
++)
12257 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12258 unsigned HOST_WIDE_INT elpart
;
12259 unsigned int part
, parts
;
12261 if (CONST_INT_P (el
))
12263 elpart
= INTVAL (el
);
12266 else if (CONST_DOUBLE_P (el
))
12268 elpart
= CONST_DOUBLE_LOW (el
);
12272 gcc_unreachable ();
12274 for (part
= 0; part
< parts
; part
++)
12277 for (byte
= 0; byte
< innersize
; byte
++)
12279 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12280 elpart
>>= BITS_PER_UNIT
;
12282 if (CONST_DOUBLE_P (el
))
12283 elpart
= CONST_DOUBLE_HIGH (el
);
12287 /* Sanity check. */
12288 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12292 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12293 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12295 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12296 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12298 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12299 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12301 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12302 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12304 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12306 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12308 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12309 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12311 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12312 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12314 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12315 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12317 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12318 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12320 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12322 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12324 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12325 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12327 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12328 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12330 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12331 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12333 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12334 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12336 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12338 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12339 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12347 *elementwidth
= elsize
;
12351 unsigned HOST_WIDE_INT imm
= 0;
12353 /* Un-invert bytes of recognized vector, if necessary. */
12355 for (i
= 0; i
< idx
; i
++)
12356 bytes
[i
] ^= invmask
;
12360 /* FIXME: Broken on 32-bit H_W_I hosts. */
12361 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12363 for (i
= 0; i
< 8; i
++)
12364 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12365 << (i
* BITS_PER_UNIT
);
12367 *modconst
= GEN_INT (imm
);
12371 unsigned HOST_WIDE_INT imm
= 0;
12373 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12374 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12376 *modconst
= GEN_INT (imm
);
12384 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12385 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12386 float elements), and a modified constant (whatever should be output for a
12387 VMOV) in *MODCONST. */
12390 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12391 rtx
*modconst
, int *elementwidth
)
12395 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12401 *modconst
= tmpconst
;
12404 *elementwidth
= tmpwidth
;
12409 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12410 the immediate is valid, write a constant suitable for using as an operand
12411 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12412 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12415 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12416 rtx
*modconst
, int *elementwidth
)
12420 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12422 if (retval
< 0 || retval
> 5)
12426 *modconst
= tmpconst
;
12429 *elementwidth
= tmpwidth
;
12434 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12435 the immediate is valid, write a constant suitable for using as an operand
12436 to VSHR/VSHL to *MODCONST and the corresponding element width to
12437 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12438 because they have different limitations. */
12441 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12442 rtx
*modconst
, int *elementwidth
,
12445 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12446 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12447 unsigned HOST_WIDE_INT last_elt
= 0;
12448 unsigned HOST_WIDE_INT maxshift
;
12450 /* Split vector constant out into a byte vector. */
12451 for (i
= 0; i
< n_elts
; i
++)
12453 rtx el
= CONST_VECTOR_ELT (op
, i
);
12454 unsigned HOST_WIDE_INT elpart
;
12456 if (CONST_INT_P (el
))
12457 elpart
= INTVAL (el
);
12458 else if (CONST_DOUBLE_P (el
))
12461 gcc_unreachable ();
12463 if (i
!= 0 && elpart
!= last_elt
)
12469 /* Shift less than element size. */
12470 maxshift
= innersize
* 8;
12474 /* Left shift immediate value can be from 0 to <size>-1. */
12475 if (last_elt
>= maxshift
)
12480 /* Right shift immediate value can be from 1 to <size>. */
12481 if (last_elt
== 0 || last_elt
> maxshift
)
12486 *elementwidth
= innersize
* 8;
12489 *modconst
= CONST_VECTOR_ELT (op
, 0);
12494 /* Return a string suitable for output of Neon immediate logic operation
12498 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12499 int inverse
, int quad
)
12501 int width
, is_valid
;
12502 static char templ
[40];
12504 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12506 gcc_assert (is_valid
!= 0);
12509 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12511 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12516 /* Return a string suitable for output of Neon immediate shift operation
12517 (VSHR or VSHL) MNEM. */
12520 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12521 machine_mode mode
, int quad
,
12524 int width
, is_valid
;
12525 static char templ
[40];
12527 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12528 gcc_assert (is_valid
!= 0);
12531 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12533 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12538 /* Output a sequence of pairwise operations to implement a reduction.
12539 NOTE: We do "too much work" here, because pairwise operations work on two
12540 registers-worth of operands in one go. Unfortunately we can't exploit those
12541 extra calculations to do the full operation in fewer steps, I don't think.
12542 Although all vector elements of the result but the first are ignored, we
12543 actually calculate the same result in each of the elements. An alternative
12544 such as initially loading a vector with zero to use as each of the second
12545 operands would use up an additional register and take an extra instruction,
12546 for no particular gain. */
12549 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12550 rtx (*reduc
) (rtx
, rtx
, rtx
))
12552 machine_mode inner
= GET_MODE_INNER (mode
);
12553 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12556 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12558 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12559 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12564 /* If VALS is a vector constant that can be loaded into a register
12565 using VDUP, generate instructions to do so and return an RTX to
12566 assign to the register. Otherwise return NULL_RTX. */
12569 neon_vdup_constant (rtx vals
)
12571 machine_mode mode
= GET_MODE (vals
);
12572 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12573 int n_elts
= GET_MODE_NUNITS (mode
);
12574 bool all_same
= true;
12578 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12581 for (i
= 0; i
< n_elts
; ++i
)
12583 x
= XVECEXP (vals
, 0, i
);
12584 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12589 /* The elements are not all the same. We could handle repeating
12590 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12591 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12595 /* We can load this constant by using VDUP and a constant in a
12596 single ARM register. This will be cheaper than a vector
12599 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12600 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12603 /* Generate code to load VALS, which is a PARALLEL containing only
12604 constants (for vec_init) or CONST_VECTOR, efficiently into a
12605 register. Returns an RTX to copy into the register, or NULL_RTX
12606 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12609 neon_make_constant (rtx vals
)
12611 machine_mode mode
= GET_MODE (vals
);
12613 rtx const_vec
= NULL_RTX
;
12614 int n_elts
= GET_MODE_NUNITS (mode
);
12618 if (GET_CODE (vals
) == CONST_VECTOR
)
12620 else if (GET_CODE (vals
) == PARALLEL
)
12622 /* A CONST_VECTOR must contain only CONST_INTs and
12623 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12624 Only store valid constants in a CONST_VECTOR. */
12625 for (i
= 0; i
< n_elts
; ++i
)
12627 rtx x
= XVECEXP (vals
, 0, i
);
12628 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12631 if (n_const
== n_elts
)
12632 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12635 gcc_unreachable ();
12637 if (const_vec
!= NULL
12638 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12639 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12641 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12642 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12643 pipeline cycle; creating the constant takes one or two ARM
12644 pipeline cycles. */
12646 else if (const_vec
!= NULL_RTX
)
12647 /* Load from constant pool. On Cortex-A8 this takes two cycles
12648 (for either double or quad vectors). We can not take advantage
12649 of single-cycle VLD1 because we need a PC-relative addressing
12653 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12654 We can not construct an initializer. */
12658 /* Initialize vector TARGET to VALS. */
12661 neon_expand_vector_init (rtx target
, rtx vals
)
12663 machine_mode mode
= GET_MODE (target
);
12664 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12665 int n_elts
= GET_MODE_NUNITS (mode
);
12666 int n_var
= 0, one_var
= -1;
12667 bool all_same
= true;
12671 for (i
= 0; i
< n_elts
; ++i
)
12673 x
= XVECEXP (vals
, 0, i
);
12674 if (!CONSTANT_P (x
))
12675 ++n_var
, one_var
= i
;
12677 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12683 rtx constant
= neon_make_constant (vals
);
12684 if (constant
!= NULL_RTX
)
12686 emit_move_insn (target
, constant
);
12691 /* Splat a single non-constant element if we can. */
12692 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12694 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12695 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12699 /* One field is non-constant. Load constant then overwrite varying
12700 field. This is more efficient than using the stack. */
12703 rtx copy
= copy_rtx (vals
);
12704 rtx index
= GEN_INT (one_var
);
12706 /* Load constant part of vector, substitute neighboring value for
12707 varying element. */
12708 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12709 neon_expand_vector_init (target
, copy
);
12711 /* Insert variable. */
12712 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12716 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12719 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12722 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12725 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12728 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12731 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12734 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12737 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12740 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12743 gcc_unreachable ();
12748 /* Construct the vector in memory one field at a time
12749 and load the whole vector. */
12750 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12751 for (i
= 0; i
< n_elts
; i
++)
12752 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12753 i
* GET_MODE_SIZE (inner_mode
)),
12754 XVECEXP (vals
, 0, i
));
12755 emit_move_insn (target
, mem
);
12758 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12759 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12760 reported source locations are bogus. */
12763 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12766 HOST_WIDE_INT lane
;
12768 gcc_assert (CONST_INT_P (operand
));
12770 lane
= INTVAL (operand
);
12772 if (lane
< low
|| lane
>= high
)
12776 /* Bounds-check lanes. */
12779 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12781 bounds_check (operand
, low
, high
, "lane out of range");
12784 /* Bounds-check constants. */
12787 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12789 bounds_check (operand
, low
, high
, "constant out of range");
12793 neon_element_bits (machine_mode mode
)
12795 if (mode
== DImode
)
12796 return GET_MODE_BITSIZE (mode
);
12798 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12802 /* Predicates for `match_operand' and `match_operator'. */
12804 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12805 WB is true if full writeback address modes are allowed and is false
12806 if limited writeback address modes (POST_INC and PRE_DEC) are
12810 arm_coproc_mem_operand (rtx op
, bool wb
)
12814 /* Reject eliminable registers. */
12815 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12816 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12817 || reg_mentioned_p (arg_pointer_rtx
, op
)
12818 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12819 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12820 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12821 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12824 /* Constants are converted into offsets from labels. */
12828 ind
= XEXP (op
, 0);
12830 if (reload_completed
12831 && (GET_CODE (ind
) == LABEL_REF
12832 || (GET_CODE (ind
) == CONST
12833 && GET_CODE (XEXP (ind
, 0)) == PLUS
12834 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12835 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12838 /* Match: (mem (reg)). */
12840 return arm_address_register_rtx_p (ind
, 0);
12842 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12843 acceptable in any case (subject to verification by
12844 arm_address_register_rtx_p). We need WB to be true to accept
12845 PRE_INC and POST_DEC. */
12846 if (GET_CODE (ind
) == POST_INC
12847 || GET_CODE (ind
) == PRE_DEC
12849 && (GET_CODE (ind
) == PRE_INC
12850 || GET_CODE (ind
) == POST_DEC
)))
12851 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12854 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12855 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12856 && GET_CODE (XEXP (ind
, 1)) == PLUS
12857 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12858 ind
= XEXP (ind
, 1);
12863 if (GET_CODE (ind
) == PLUS
12864 && REG_P (XEXP (ind
, 0))
12865 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12866 && CONST_INT_P (XEXP (ind
, 1))
12867 && INTVAL (XEXP (ind
, 1)) > -1024
12868 && INTVAL (XEXP (ind
, 1)) < 1024
12869 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12875 /* Return TRUE if OP is a memory operand which we can load or store a vector
12876 to/from. TYPE is one of the following values:
12877 0 - Vector load/stor (vldr)
12878 1 - Core registers (ldm)
12879 2 - Element/structure loads (vld1)
12882 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12886 /* Reject eliminable registers. */
12887 if (! (reload_in_progress
|| reload_completed
)
12888 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12889 || reg_mentioned_p (arg_pointer_rtx
, op
)
12890 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12891 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12892 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12893 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12896 /* Constants are converted into offsets from labels. */
12900 ind
= XEXP (op
, 0);
12902 if (reload_completed
12903 && (GET_CODE (ind
) == LABEL_REF
12904 || (GET_CODE (ind
) == CONST
12905 && GET_CODE (XEXP (ind
, 0)) == PLUS
12906 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12907 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12910 /* Match: (mem (reg)). */
12912 return arm_address_register_rtx_p (ind
, 0);
12914 /* Allow post-increment with Neon registers. */
12915 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12916 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12917 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12919 /* Allow post-increment by register for VLDn */
12920 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12921 && GET_CODE (XEXP (ind
, 1)) == PLUS
12922 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12929 && GET_CODE (ind
) == PLUS
12930 && REG_P (XEXP (ind
, 0))
12931 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12932 && CONST_INT_P (XEXP (ind
, 1))
12933 && INTVAL (XEXP (ind
, 1)) > -1024
12934 /* For quad modes, we restrict the constant offset to be slightly less
12935 than what the instruction format permits. We have no such constraint
12936 on double mode offsets. (This must match arm_legitimate_index_p.) */
12937 && (INTVAL (XEXP (ind
, 1))
12938 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12939 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12945 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12948 neon_struct_mem_operand (rtx op
)
12952 /* Reject eliminable registers. */
12953 if (! (reload_in_progress
|| reload_completed
)
12954 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12955 || reg_mentioned_p (arg_pointer_rtx
, op
)
12956 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12957 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12958 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12959 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12962 /* Constants are converted into offsets from labels. */
12966 ind
= XEXP (op
, 0);
12968 if (reload_completed
12969 && (GET_CODE (ind
) == LABEL_REF
12970 || (GET_CODE (ind
) == CONST
12971 && GET_CODE (XEXP (ind
, 0)) == PLUS
12972 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12973 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12976 /* Match: (mem (reg)). */
12978 return arm_address_register_rtx_p (ind
, 0);
12980 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12981 if (GET_CODE (ind
) == POST_INC
12982 || GET_CODE (ind
) == PRE_DEC
)
12983 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12988 /* Return true if X is a register that will be eliminated later on. */
12990 arm_eliminable_register (rtx x
)
12992 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12993 || REGNO (x
) == ARG_POINTER_REGNUM
12994 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12995 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12998 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12999 coprocessor registers. Otherwise return NO_REGS. */
13002 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13004 if (mode
== HFmode
)
13006 if (!TARGET_NEON_FP16
)
13007 return GENERAL_REGS
;
13008 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13010 return GENERAL_REGS
;
13013 /* The neon move patterns handle all legitimate vector and struct
13016 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13017 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13018 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13019 || VALID_NEON_STRUCT_MODE (mode
)))
13022 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13025 return GENERAL_REGS
;
13028 /* Values which must be returned in the most-significant end of the return
13032 arm_return_in_msb (const_tree valtype
)
13034 return (TARGET_AAPCS_BASED
13035 && BYTES_BIG_ENDIAN
13036 && (AGGREGATE_TYPE_P (valtype
)
13037 || TREE_CODE (valtype
) == COMPLEX_TYPE
13038 || FIXED_POINT_TYPE_P (valtype
)));
13041 /* Return TRUE if X references a SYMBOL_REF. */
13043 symbol_mentioned_p (rtx x
)
13048 if (GET_CODE (x
) == SYMBOL_REF
)
13051 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13052 are constant offsets, not symbols. */
13053 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13056 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13058 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13064 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13065 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13068 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13075 /* Return TRUE if X references a LABEL_REF. */
13077 label_mentioned_p (rtx x
)
13082 if (GET_CODE (x
) == LABEL_REF
)
13085 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13086 instruction, but they are constant offsets, not symbols. */
13087 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13090 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13091 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13097 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13098 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13101 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13109 tls_mentioned_p (rtx x
)
13111 switch (GET_CODE (x
))
13114 return tls_mentioned_p (XEXP (x
, 0));
13117 if (XINT (x
, 1) == UNSPEC_TLS
)
13125 /* Must not copy any rtx that uses a pc-relative address. */
13128 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13130 /* The tls call insn cannot be copied, as it is paired with a data
13132 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13135 subrtx_iterator::array_type array
;
13136 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13138 const_rtx x
= *iter
;
13139 if (GET_CODE (x
) == UNSPEC
13140 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13141 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13148 minmax_code (rtx x
)
13150 enum rtx_code code
= GET_CODE (x
);
13163 gcc_unreachable ();
13167 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13170 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13171 int *mask
, bool *signed_sat
)
13173 /* The high bound must be a power of two minus one. */
13174 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13178 /* The low bound is either zero (for usat) or one less than the
13179 negation of the high bound (for ssat). */
13180 if (INTVAL (lo_bound
) == 0)
13185 *signed_sat
= false;
13190 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13195 *signed_sat
= true;
13203 /* Return 1 if memory locations are adjacent. */
13205 adjacent_mem_locations (rtx a
, rtx b
)
13207 /* We don't guarantee to preserve the order of these memory refs. */
13208 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13211 if ((REG_P (XEXP (a
, 0))
13212 || (GET_CODE (XEXP (a
, 0)) == PLUS
13213 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13214 && (REG_P (XEXP (b
, 0))
13215 || (GET_CODE (XEXP (b
, 0)) == PLUS
13216 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13218 HOST_WIDE_INT val0
= 0, val1
= 0;
13222 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13224 reg0
= XEXP (XEXP (a
, 0), 0);
13225 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13228 reg0
= XEXP (a
, 0);
13230 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13232 reg1
= XEXP (XEXP (b
, 0), 0);
13233 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13236 reg1
= XEXP (b
, 0);
13238 /* Don't accept any offset that will require multiple
13239 instructions to handle, since this would cause the
13240 arith_adjacentmem pattern to output an overlong sequence. */
13241 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13244 /* Don't allow an eliminable register: register elimination can make
13245 the offset too large. */
13246 if (arm_eliminable_register (reg0
))
13249 val_diff
= val1
- val0
;
13253 /* If the target has load delay slots, then there's no benefit
13254 to using an ldm instruction unless the offset is zero and
13255 we are optimizing for size. */
13256 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13257 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13258 && (val_diff
== 4 || val_diff
== -4));
13261 return ((REGNO (reg0
) == REGNO (reg1
))
13262 && (val_diff
== 4 || val_diff
== -4));
13268 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13269 for load operations, false for store operations. CONSECUTIVE is true
13270 if the register numbers in the operation must be consecutive in the register
13271 bank. RETURN_PC is true if value is to be loaded in PC.
13272 The pattern we are trying to match for load is:
13273 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13274 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13277 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13280 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13281 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13282 3. If consecutive is TRUE, then for kth register being loaded,
13283 REGNO (R_dk) = REGNO (R_d0) + k.
13284 The pattern for store is similar. */
13286 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13287 bool consecutive
, bool return_pc
)
13289 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13290 rtx reg
, mem
, addr
;
13292 unsigned first_regno
;
13293 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13295 bool addr_reg_in_reglist
= false;
13296 bool update
= false;
13301 /* If not in SImode, then registers must be consecutive
13302 (e.g., VLDM instructions for DFmode). */
13303 gcc_assert ((mode
== SImode
) || consecutive
);
13304 /* Setting return_pc for stores is illegal. */
13305 gcc_assert (!return_pc
|| load
);
13307 /* Set up the increments and the regs per val based on the mode. */
13308 reg_increment
= GET_MODE_SIZE (mode
);
13309 regs_per_val
= reg_increment
/ 4;
13310 offset_adj
= return_pc
? 1 : 0;
13313 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13314 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13317 /* Check if this is a write-back. */
13318 elt
= XVECEXP (op
, 0, offset_adj
);
13319 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13325 /* The offset adjustment must be the number of registers being
13326 popped times the size of a single register. */
13327 if (!REG_P (SET_DEST (elt
))
13328 || !REG_P (XEXP (SET_SRC (elt
), 0))
13329 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13330 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13331 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13332 ((count
- 1 - offset_adj
) * reg_increment
))
13336 i
= i
+ offset_adj
;
13337 base
= base
+ offset_adj
;
13338 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13339 success depends on the type: VLDM can do just one reg,
13340 LDM must do at least two. */
13341 if ((count
<= i
) && (mode
== SImode
))
13344 elt
= XVECEXP (op
, 0, i
- 1);
13345 if (GET_CODE (elt
) != SET
)
13350 reg
= SET_DEST (elt
);
13351 mem
= SET_SRC (elt
);
13355 reg
= SET_SRC (elt
);
13356 mem
= SET_DEST (elt
);
13359 if (!REG_P (reg
) || !MEM_P (mem
))
13362 regno
= REGNO (reg
);
13363 first_regno
= regno
;
13364 addr
= XEXP (mem
, 0);
13365 if (GET_CODE (addr
) == PLUS
)
13367 if (!CONST_INT_P (XEXP (addr
, 1)))
13370 offset
= INTVAL (XEXP (addr
, 1));
13371 addr
= XEXP (addr
, 0);
13377 /* Don't allow SP to be loaded unless it is also the base register. It
13378 guarantees that SP is reset correctly when an LDM instruction
13379 is interrupted. Otherwise, we might end up with a corrupt stack. */
13380 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13383 for (; i
< count
; i
++)
13385 elt
= XVECEXP (op
, 0, i
);
13386 if (GET_CODE (elt
) != SET
)
13391 reg
= SET_DEST (elt
);
13392 mem
= SET_SRC (elt
);
13396 reg
= SET_SRC (elt
);
13397 mem
= SET_DEST (elt
);
13401 || GET_MODE (reg
) != mode
13402 || REGNO (reg
) <= regno
13405 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13406 /* Don't allow SP to be loaded unless it is also the base register. It
13407 guarantees that SP is reset correctly when an LDM instruction
13408 is interrupted. Otherwise, we might end up with a corrupt stack. */
13409 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13411 || GET_MODE (mem
) != mode
13412 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13413 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13414 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13415 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13416 offset
+ (i
- base
) * reg_increment
))
13417 && (!REG_P (XEXP (mem
, 0))
13418 || offset
+ (i
- base
) * reg_increment
!= 0)))
13421 regno
= REGNO (reg
);
13422 if (regno
== REGNO (addr
))
13423 addr_reg_in_reglist
= true;
13428 if (update
&& addr_reg_in_reglist
)
13431 /* For Thumb-1, address register is always modified - either by write-back
13432 or by explicit load. If the pattern does not describe an update,
13433 then the address register must be in the list of loaded registers. */
13435 return update
|| addr_reg_in_reglist
;
13441 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13442 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13443 instruction. ADD_OFFSET is nonzero if the base address register needs
13444 to be modified with an add instruction before we can use it. */
13447 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13448 int nops
, HOST_WIDE_INT add_offset
)
13450 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13451 if the offset isn't small enough. The reason 2 ldrs are faster
13452 is because these ARMs are able to do more than one cache access
13453 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13454 whilst the ARM8 has a double bandwidth cache. This means that
13455 these cores can do both an instruction fetch and a data fetch in
13456 a single cycle, so the trick of calculating the address into a
13457 scratch register (one of the result regs) and then doing a load
13458 multiple actually becomes slower (and no smaller in code size).
13459 That is the transformation
13461 ldr rd1, [rbase + offset]
13462 ldr rd2, [rbase + offset + 4]
13466 add rd1, rbase, offset
13467 ldmia rd1, {rd1, rd2}
13469 produces worse code -- '3 cycles + any stalls on rd2' instead of
13470 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13471 access per cycle, the first sequence could never complete in less
13472 than 6 cycles, whereas the ldm sequence would only take 5 and
13473 would make better use of sequential accesses if not hitting the
13476 We cheat here and test 'arm_ld_sched' which we currently know to
13477 only be true for the ARM8, ARM9 and StrongARM. If this ever
13478 changes, then the test below needs to be reworked. */
13479 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13482 /* XScale has load-store double instructions, but they have stricter
13483 alignment requirements than load-store multiple, so we cannot
13486 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13487 the pipeline until completion.
13495 An ldr instruction takes 1-3 cycles, but does not block the
13504 Best case ldr will always win. However, the more ldr instructions
13505 we issue, the less likely we are to be able to schedule them well.
13506 Using ldr instructions also increases code size.
13508 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13509 for counts of 3 or 4 regs. */
13510 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13515 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13516 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13517 an array ORDER which describes the sequence to use when accessing the
13518 offsets that produces an ascending order. In this sequence, each
13519 offset must be larger by exactly 4 than the previous one. ORDER[0]
13520 must have been filled in with the lowest offset by the caller.
13521 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13522 we use to verify that ORDER produces an ascending order of registers.
13523 Return true if it was possible to construct such an order, false if
13527 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13528 int *unsorted_regs
)
13531 for (i
= 1; i
< nops
; i
++)
13535 order
[i
] = order
[i
- 1];
13536 for (j
= 0; j
< nops
; j
++)
13537 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13539 /* We must find exactly one offset that is higher than the
13540 previous one by 4. */
13541 if (order
[i
] != order
[i
- 1])
13545 if (order
[i
] == order
[i
- 1])
13547 /* The register numbers must be ascending. */
13548 if (unsorted_regs
!= NULL
13549 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13555 /* Used to determine in a peephole whether a sequence of load
13556 instructions can be changed into a load-multiple instruction.
13557 NOPS is the number of separate load instructions we are examining. The
13558 first NOPS entries in OPERANDS are the destination registers, the
13559 next NOPS entries are memory operands. If this function is
13560 successful, *BASE is set to the common base register of the memory
13561 accesses; *LOAD_OFFSET is set to the first memory location's offset
13562 from that base register.
13563 REGS is an array filled in with the destination register numbers.
13564 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13565 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13566 the sequence of registers in REGS matches the loads from ascending memory
13567 locations, and the function verifies that the register numbers are
13568 themselves ascending. If CHECK_REGS is false, the register numbers
13569 are stored in the order they are found in the operands. */
13571 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13572 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13574 int unsorted_regs
[MAX_LDM_STM_OPS
];
13575 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13576 int order
[MAX_LDM_STM_OPS
];
13577 rtx base_reg_rtx
= NULL
;
13581 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13582 easily extended if required. */
13583 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13585 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13587 /* Loop over the operands and check that the memory references are
13588 suitable (i.e. immediate offsets from the same base register). At
13589 the same time, extract the target register, and the memory
13591 for (i
= 0; i
< nops
; i
++)
13596 /* Convert a subreg of a mem into the mem itself. */
13597 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13598 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13600 gcc_assert (MEM_P (operands
[nops
+ i
]));
13602 /* Don't reorder volatile memory references; it doesn't seem worth
13603 looking for the case where the order is ok anyway. */
13604 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13607 offset
= const0_rtx
;
13609 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13610 || (GET_CODE (reg
) == SUBREG
13611 && REG_P (reg
= SUBREG_REG (reg
))))
13612 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13613 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13614 || (GET_CODE (reg
) == SUBREG
13615 && REG_P (reg
= SUBREG_REG (reg
))))
13616 && (CONST_INT_P (offset
13617 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13621 base_reg
= REGNO (reg
);
13622 base_reg_rtx
= reg
;
13623 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13626 else if (base_reg
!= (int) REGNO (reg
))
13627 /* Not addressed from the same base register. */
13630 unsorted_regs
[i
] = (REG_P (operands
[i
])
13631 ? REGNO (operands
[i
])
13632 : REGNO (SUBREG_REG (operands
[i
])));
13634 /* If it isn't an integer register, or if it overwrites the
13635 base register but isn't the last insn in the list, then
13636 we can't do this. */
13637 if (unsorted_regs
[i
] < 0
13638 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13639 || unsorted_regs
[i
] > 14
13640 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13643 /* Don't allow SP to be loaded unless it is also the base
13644 register. It guarantees that SP is reset correctly when
13645 an LDM instruction is interrupted. Otherwise, we might
13646 end up with a corrupt stack. */
13647 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13650 unsorted_offsets
[i
] = INTVAL (offset
);
13651 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13655 /* Not a suitable memory address. */
13659 /* All the useful information has now been extracted from the
13660 operands into unsorted_regs and unsorted_offsets; additionally,
13661 order[0] has been set to the lowest offset in the list. Sort
13662 the offsets into order, verifying that they are adjacent, and
13663 check that the register numbers are ascending. */
13664 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13665 check_regs
? unsorted_regs
: NULL
))
13669 memcpy (saved_order
, order
, sizeof order
);
13675 for (i
= 0; i
< nops
; i
++)
13676 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13678 *load_offset
= unsorted_offsets
[order
[0]];
13682 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13685 if (unsorted_offsets
[order
[0]] == 0)
13686 ldm_case
= 1; /* ldmia */
13687 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13688 ldm_case
= 2; /* ldmib */
13689 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13690 ldm_case
= 3; /* ldmda */
13691 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13692 ldm_case
= 4; /* ldmdb */
13693 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13694 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13699 if (!multiple_operation_profitable_p (false, nops
,
13701 ? unsorted_offsets
[order
[0]] : 0))
13707 /* Used to determine in a peephole whether a sequence of store instructions can
13708 be changed into a store-multiple instruction.
13709 NOPS is the number of separate store instructions we are examining.
13710 NOPS_TOTAL is the total number of instructions recognized by the peephole
13712 The first NOPS entries in OPERANDS are the source registers, the next
13713 NOPS entries are memory operands. If this function is successful, *BASE is
13714 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13715 to the first memory location's offset from that base register. REGS is an
13716 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13717 likewise filled with the corresponding rtx's.
13718 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13719 numbers to an ascending order of stores.
13720 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13721 from ascending memory locations, and the function verifies that the register
13722 numbers are themselves ascending. If CHECK_REGS is false, the register
13723 numbers are stored in the order they are found in the operands. */
13725 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13726 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13727 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13729 int unsorted_regs
[MAX_LDM_STM_OPS
];
13730 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13731 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13732 int order
[MAX_LDM_STM_OPS
];
13734 rtx base_reg_rtx
= NULL
;
13737 /* Write back of base register is currently only supported for Thumb 1. */
13738 int base_writeback
= TARGET_THUMB1
;
13740 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13741 easily extended if required. */
13742 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13744 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13746 /* Loop over the operands and check that the memory references are
13747 suitable (i.e. immediate offsets from the same base register). At
13748 the same time, extract the target register, and the memory
13750 for (i
= 0; i
< nops
; i
++)
13755 /* Convert a subreg of a mem into the mem itself. */
13756 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13757 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13759 gcc_assert (MEM_P (operands
[nops
+ i
]));
13761 /* Don't reorder volatile memory references; it doesn't seem worth
13762 looking for the case where the order is ok anyway. */
13763 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13766 offset
= const0_rtx
;
13768 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13769 || (GET_CODE (reg
) == SUBREG
13770 && REG_P (reg
= SUBREG_REG (reg
))))
13771 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13772 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13773 || (GET_CODE (reg
) == SUBREG
13774 && REG_P (reg
= SUBREG_REG (reg
))))
13775 && (CONST_INT_P (offset
13776 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13778 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13779 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13780 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13784 base_reg
= REGNO (reg
);
13785 base_reg_rtx
= reg
;
13786 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13789 else if (base_reg
!= (int) REGNO (reg
))
13790 /* Not addressed from the same base register. */
13793 /* If it isn't an integer register, then we can't do this. */
13794 if (unsorted_regs
[i
] < 0
13795 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13796 /* The effects are unpredictable if the base register is
13797 both updated and stored. */
13798 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13799 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13800 || unsorted_regs
[i
] > 14)
13803 unsorted_offsets
[i
] = INTVAL (offset
);
13804 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13808 /* Not a suitable memory address. */
13812 /* All the useful information has now been extracted from the
13813 operands into unsorted_regs and unsorted_offsets; additionally,
13814 order[0] has been set to the lowest offset in the list. Sort
13815 the offsets into order, verifying that they are adjacent, and
13816 check that the register numbers are ascending. */
13817 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13818 check_regs
? unsorted_regs
: NULL
))
13822 memcpy (saved_order
, order
, sizeof order
);
13828 for (i
= 0; i
< nops
; i
++)
13830 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13832 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13835 *load_offset
= unsorted_offsets
[order
[0]];
13839 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13842 if (unsorted_offsets
[order
[0]] == 0)
13843 stm_case
= 1; /* stmia */
13844 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13845 stm_case
= 2; /* stmib */
13846 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13847 stm_case
= 3; /* stmda */
13848 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13849 stm_case
= 4; /* stmdb */
13853 if (!multiple_operation_profitable_p (false, nops
, 0))
13859 /* Routines for use in generating RTL. */
13861 /* Generate a load-multiple instruction. COUNT is the number of loads in
13862 the instruction; REGS and MEMS are arrays containing the operands.
13863 BASEREG is the base register to be used in addressing the memory operands.
13864 WBACK_OFFSET is nonzero if the instruction should update the base
13868 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13869 HOST_WIDE_INT wback_offset
)
13874 if (!multiple_operation_profitable_p (false, count
, 0))
13880 for (i
= 0; i
< count
; i
++)
13881 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13883 if (wback_offset
!= 0)
13884 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13886 seq
= get_insns ();
13892 result
= gen_rtx_PARALLEL (VOIDmode
,
13893 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13894 if (wback_offset
!= 0)
13896 XVECEXP (result
, 0, 0)
13897 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13902 for (j
= 0; i
< count
; i
++, j
++)
13903 XVECEXP (result
, 0, i
)
13904 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13909 /* Generate a store-multiple instruction. COUNT is the number of stores in
13910 the instruction; REGS and MEMS are arrays containing the operands.
13911 BASEREG is the base register to be used in addressing the memory operands.
13912 WBACK_OFFSET is nonzero if the instruction should update the base
13916 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13917 HOST_WIDE_INT wback_offset
)
13922 if (GET_CODE (basereg
) == PLUS
)
13923 basereg
= XEXP (basereg
, 0);
13925 if (!multiple_operation_profitable_p (false, count
, 0))
13931 for (i
= 0; i
< count
; i
++)
13932 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13934 if (wback_offset
!= 0)
13935 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13937 seq
= get_insns ();
13943 result
= gen_rtx_PARALLEL (VOIDmode
,
13944 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13945 if (wback_offset
!= 0)
13947 XVECEXP (result
, 0, 0)
13948 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13953 for (j
= 0; i
< count
; i
++, j
++)
13954 XVECEXP (result
, 0, i
)
13955 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13960 /* Generate either a load-multiple or a store-multiple instruction. This
13961 function can be used in situations where we can start with a single MEM
13962 rtx and adjust its address upwards.
13963 COUNT is the number of operations in the instruction, not counting a
13964 possible update of the base register. REGS is an array containing the
13966 BASEREG is the base register to be used in addressing the memory operands,
13967 which are constructed from BASEMEM.
13968 WRITE_BACK specifies whether the generated instruction should include an
13969 update of the base register.
13970 OFFSETP is used to pass an offset to and from this function; this offset
13971 is not used when constructing the address (instead BASEMEM should have an
13972 appropriate offset in its address), it is used only for setting
13973 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13976 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13977 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13979 rtx mems
[MAX_LDM_STM_OPS
];
13980 HOST_WIDE_INT offset
= *offsetp
;
13983 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13985 if (GET_CODE (basereg
) == PLUS
)
13986 basereg
= XEXP (basereg
, 0);
13988 for (i
= 0; i
< count
; i
++)
13990 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13991 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13999 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14000 write_back
? 4 * count
: 0);
14002 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14003 write_back
? 4 * count
: 0);
14007 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14008 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14010 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14015 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14016 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14018 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14022 /* Called from a peephole2 expander to turn a sequence of loads into an
14023 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14024 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14025 is true if we can reorder the registers because they are used commutatively
14027 Returns true iff we could generate a new instruction. */
14030 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14032 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14033 rtx mems
[MAX_LDM_STM_OPS
];
14034 int i
, j
, base_reg
;
14036 HOST_WIDE_INT offset
;
14037 int write_back
= FALSE
;
14041 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14042 &base_reg
, &offset
, !sort_regs
);
14048 for (i
= 0; i
< nops
- 1; i
++)
14049 for (j
= i
+ 1; j
< nops
; j
++)
14050 if (regs
[i
] > regs
[j
])
14056 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14060 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14061 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14067 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14068 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14070 if (!TARGET_THUMB1
)
14072 base_reg
= regs
[0];
14073 base_reg_rtx
= newbase
;
14077 for (i
= 0; i
< nops
; i
++)
14079 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14080 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14083 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14084 write_back
? offset
+ i
* 4 : 0));
14088 /* Called from a peephole2 expander to turn a sequence of stores into an
14089 STM instruction. OPERANDS are the operands found by the peephole matcher;
14090 NOPS indicates how many separate stores we are trying to combine.
14091 Returns true iff we could generate a new instruction. */
14094 gen_stm_seq (rtx
*operands
, int nops
)
14097 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14098 rtx mems
[MAX_LDM_STM_OPS
];
14101 HOST_WIDE_INT offset
;
14102 int write_back
= FALSE
;
14105 bool base_reg_dies
;
14107 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14108 mem_order
, &base_reg
, &offset
, true);
14113 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14115 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14118 gcc_assert (base_reg_dies
);
14124 gcc_assert (base_reg_dies
);
14125 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14129 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14131 for (i
= 0; i
< nops
; i
++)
14133 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14134 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14137 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14138 write_back
? offset
+ i
* 4 : 0));
14142 /* Called from a peephole2 expander to turn a sequence of stores that are
14143 preceded by constant loads into an STM instruction. OPERANDS are the
14144 operands found by the peephole matcher; NOPS indicates how many
14145 separate stores we are trying to combine; there are 2 * NOPS
14146 instructions in the peephole.
14147 Returns true iff we could generate a new instruction. */
14150 gen_const_stm_seq (rtx
*operands
, int nops
)
14152 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14153 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14154 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14155 rtx mems
[MAX_LDM_STM_OPS
];
14158 HOST_WIDE_INT offset
;
14159 int write_back
= FALSE
;
14162 bool base_reg_dies
;
14164 HARD_REG_SET allocated
;
14166 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14167 mem_order
, &base_reg
, &offset
, false);
14172 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14174 /* If the same register is used more than once, try to find a free
14176 CLEAR_HARD_REG_SET (allocated
);
14177 for (i
= 0; i
< nops
; i
++)
14179 for (j
= i
+ 1; j
< nops
; j
++)
14180 if (regs
[i
] == regs
[j
])
14182 rtx t
= peep2_find_free_register (0, nops
* 2,
14183 TARGET_THUMB1
? "l" : "r",
14184 SImode
, &allocated
);
14188 regs
[i
] = REGNO (t
);
14192 /* Compute an ordering that maps the register numbers to an ascending
14195 for (i
= 0; i
< nops
; i
++)
14196 if (regs
[i
] < regs
[reg_order
[0]])
14199 for (i
= 1; i
< nops
; i
++)
14201 int this_order
= reg_order
[i
- 1];
14202 for (j
= 0; j
< nops
; j
++)
14203 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14204 && (this_order
== reg_order
[i
- 1]
14205 || regs
[j
] < regs
[this_order
]))
14207 reg_order
[i
] = this_order
;
14210 /* Ensure that registers that must be live after the instruction end
14211 up with the correct value. */
14212 for (i
= 0; i
< nops
; i
++)
14214 int this_order
= reg_order
[i
];
14215 if ((this_order
!= mem_order
[i
]
14216 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14217 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14221 /* Load the constants. */
14222 for (i
= 0; i
< nops
; i
++)
14224 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14225 sorted_regs
[i
] = regs
[reg_order
[i
]];
14226 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14229 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14231 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14234 gcc_assert (base_reg_dies
);
14240 gcc_assert (base_reg_dies
);
14241 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14245 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14247 for (i
= 0; i
< nops
; i
++)
14249 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14250 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14253 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14254 write_back
? offset
+ i
* 4 : 0));
14258 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14259 unaligned copies on processors which support unaligned semantics for those
14260 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14261 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14262 An interleave factor of 1 (the minimum) will perform no interleaving.
14263 Load/store multiple are used for aligned addresses where possible. */
14266 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14267 HOST_WIDE_INT length
,
14268 unsigned int interleave_factor
)
14270 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14271 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14272 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14273 HOST_WIDE_INT i
, j
;
14274 HOST_WIDE_INT remaining
= length
, words
;
14275 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14277 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14278 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14279 HOST_WIDE_INT srcoffset
, dstoffset
;
14280 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14283 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14285 /* Use hard registers if we have aligned source or destination so we can use
14286 load/store multiple with contiguous registers. */
14287 if (dst_aligned
|| src_aligned
)
14288 for (i
= 0; i
< interleave_factor
; i
++)
14289 regs
[i
] = gen_rtx_REG (SImode
, i
);
14291 for (i
= 0; i
< interleave_factor
; i
++)
14292 regs
[i
] = gen_reg_rtx (SImode
);
14294 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14295 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14297 srcoffset
= dstoffset
= 0;
14299 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14300 For copying the last bytes we want to subtract this offset again. */
14301 src_autoinc
= dst_autoinc
= 0;
14303 for (i
= 0; i
< interleave_factor
; i
++)
14306 /* Copy BLOCK_SIZE_BYTES chunks. */
14308 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14311 if (src_aligned
&& interleave_factor
> 1)
14313 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14314 TRUE
, srcbase
, &srcoffset
));
14315 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14319 for (j
= 0; j
< interleave_factor
; j
++)
14321 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14323 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14324 srcoffset
+ j
* UNITS_PER_WORD
);
14325 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14327 srcoffset
+= block_size_bytes
;
14331 if (dst_aligned
&& interleave_factor
> 1)
14333 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14334 TRUE
, dstbase
, &dstoffset
));
14335 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14339 for (j
= 0; j
< interleave_factor
; j
++)
14341 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14343 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14344 dstoffset
+ j
* UNITS_PER_WORD
);
14345 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14347 dstoffset
+= block_size_bytes
;
14350 remaining
-= block_size_bytes
;
14353 /* Copy any whole words left (note these aren't interleaved with any
14354 subsequent halfword/byte load/stores in the interests of simplicity). */
14356 words
= remaining
/ UNITS_PER_WORD
;
14358 gcc_assert (words
< interleave_factor
);
14360 if (src_aligned
&& words
> 1)
14362 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14364 src_autoinc
+= UNITS_PER_WORD
* words
;
14368 for (j
= 0; j
< words
; j
++)
14370 addr
= plus_constant (Pmode
, src
,
14371 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14372 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14373 srcoffset
+ j
* UNITS_PER_WORD
);
14374 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14376 srcoffset
+= words
* UNITS_PER_WORD
;
14379 if (dst_aligned
&& words
> 1)
14381 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14383 dst_autoinc
+= words
* UNITS_PER_WORD
;
14387 for (j
= 0; j
< words
; j
++)
14389 addr
= plus_constant (Pmode
, dst
,
14390 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14391 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14392 dstoffset
+ j
* UNITS_PER_WORD
);
14393 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14395 dstoffset
+= words
* UNITS_PER_WORD
;
14398 remaining
-= words
* UNITS_PER_WORD
;
14400 gcc_assert (remaining
< 4);
14402 /* Copy a halfword if necessary. */
14404 if (remaining
>= 2)
14406 halfword_tmp
= gen_reg_rtx (SImode
);
14408 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14409 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14410 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14412 /* Either write out immediately, or delay until we've loaded the last
14413 byte, depending on interleave factor. */
14414 if (interleave_factor
== 1)
14416 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14417 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14418 emit_insn (gen_unaligned_storehi (mem
,
14419 gen_lowpart (HImode
, halfword_tmp
)));
14420 halfword_tmp
= NULL
;
14428 gcc_assert (remaining
< 2);
14430 /* Copy last byte. */
14432 if ((remaining
& 1) != 0)
14434 byte_tmp
= gen_reg_rtx (SImode
);
14436 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14437 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14438 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14440 if (interleave_factor
== 1)
14442 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14443 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14444 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14453 /* Store last halfword if we haven't done so already. */
14457 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14458 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14459 emit_insn (gen_unaligned_storehi (mem
,
14460 gen_lowpart (HImode
, halfword_tmp
)));
14464 /* Likewise for last byte. */
14468 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14469 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14470 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14474 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14477 /* From mips_adjust_block_mem:
14479 Helper function for doing a loop-based block operation on memory
14480 reference MEM. Each iteration of the loop will operate on LENGTH
14483 Create a new base register for use within the loop and point it to
14484 the start of MEM. Create a new memory reference that uses this
14485 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14488 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14491 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14493 /* Although the new mem does not refer to a known location,
14494 it does keep up to LENGTH bytes of alignment. */
14495 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14496 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14499 /* From mips_block_move_loop:
14501 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14502 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14503 the memory regions do not overlap. */
14506 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14507 unsigned int interleave_factor
,
14508 HOST_WIDE_INT bytes_per_iter
)
14510 rtx src_reg
, dest_reg
, final_src
, test
;
14511 HOST_WIDE_INT leftover
;
14513 leftover
= length
% bytes_per_iter
;
14514 length
-= leftover
;
14516 /* Create registers and memory references for use within the loop. */
14517 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14518 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14520 /* Calculate the value that SRC_REG should have after the last iteration of
14522 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14523 0, 0, OPTAB_WIDEN
);
14525 /* Emit the start of the loop. */
14526 rtx_code_label
*label
= gen_label_rtx ();
14527 emit_label (label
);
14529 /* Emit the loop body. */
14530 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14531 interleave_factor
);
14533 /* Move on to the next block. */
14534 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14535 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14537 /* Emit the loop condition. */
14538 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14539 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14541 /* Mop up any left-over bytes. */
14543 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14546 /* Emit a block move when either the source or destination is unaligned (not
14547 aligned to a four-byte boundary). This may need further tuning depending on
14548 core type, optimize_size setting, etc. */
14551 arm_movmemqi_unaligned (rtx
*operands
)
14553 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14557 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14558 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14559 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14560 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14561 or dst_aligned though: allow more interleaving in those cases since the
14562 resulting code can be smaller. */
14563 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14564 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14567 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14568 interleave_factor
, bytes_per_iter
);
14570 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14571 interleave_factor
);
14575 /* Note that the loop created by arm_block_move_unaligned_loop may be
14576 subject to loop unrolling, which makes tuning this condition a little
14579 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14581 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14588 arm_gen_movmemqi (rtx
*operands
)
14590 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14591 HOST_WIDE_INT srcoffset
, dstoffset
;
14593 rtx src
, dst
, srcbase
, dstbase
;
14594 rtx part_bytes_reg
= NULL
;
14597 if (!CONST_INT_P (operands
[2])
14598 || !CONST_INT_P (operands
[3])
14599 || INTVAL (operands
[2]) > 64)
14602 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14603 return arm_movmemqi_unaligned (operands
);
14605 if (INTVAL (operands
[3]) & 3)
14608 dstbase
= operands
[0];
14609 srcbase
= operands
[1];
14611 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14612 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14614 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14615 out_words_to_go
= INTVAL (operands
[2]) / 4;
14616 last_bytes
= INTVAL (operands
[2]) & 3;
14617 dstoffset
= srcoffset
= 0;
14619 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14620 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14622 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14624 if (in_words_to_go
> 4)
14625 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14626 TRUE
, srcbase
, &srcoffset
));
14628 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14629 src
, FALSE
, srcbase
,
14632 if (out_words_to_go
)
14634 if (out_words_to_go
> 4)
14635 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14636 TRUE
, dstbase
, &dstoffset
));
14637 else if (out_words_to_go
!= 1)
14638 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14639 out_words_to_go
, dst
,
14642 dstbase
, &dstoffset
));
14645 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14646 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14647 if (last_bytes
!= 0)
14649 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14655 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14656 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14659 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14660 if (out_words_to_go
)
14664 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14665 sreg
= copy_to_reg (mem
);
14667 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14668 emit_move_insn (mem
, sreg
);
14671 gcc_assert (!in_words_to_go
); /* Sanity check */
14674 if (in_words_to_go
)
14676 gcc_assert (in_words_to_go
> 0);
14678 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14679 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14682 gcc_assert (!last_bytes
|| part_bytes_reg
);
14684 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14686 rtx tmp
= gen_reg_rtx (SImode
);
14688 /* The bytes we want are in the top end of the word. */
14689 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14690 GEN_INT (8 * (4 - last_bytes
))));
14691 part_bytes_reg
= tmp
;
14695 mem
= adjust_automodify_address (dstbase
, QImode
,
14696 plus_constant (Pmode
, dst
,
14698 dstoffset
+ last_bytes
- 1);
14699 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14703 tmp
= gen_reg_rtx (SImode
);
14704 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14705 part_bytes_reg
= tmp
;
14712 if (last_bytes
> 1)
14714 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14715 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14719 rtx tmp
= gen_reg_rtx (SImode
);
14720 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14721 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14722 part_bytes_reg
= tmp
;
14729 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14730 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14737 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14740 next_consecutive_mem (rtx mem
)
14742 machine_mode mode
= GET_MODE (mem
);
14743 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14744 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14746 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14749 /* Copy using LDRD/STRD instructions whenever possible.
14750 Returns true upon success. */
14752 gen_movmem_ldrd_strd (rtx
*operands
)
14754 unsigned HOST_WIDE_INT len
;
14755 HOST_WIDE_INT align
;
14756 rtx src
, dst
, base
;
14758 bool src_aligned
, dst_aligned
;
14759 bool src_volatile
, dst_volatile
;
14761 gcc_assert (CONST_INT_P (operands
[2]));
14762 gcc_assert (CONST_INT_P (operands
[3]));
14764 len
= UINTVAL (operands
[2]);
14768 /* Maximum alignment we can assume for both src and dst buffers. */
14769 align
= INTVAL (operands
[3]);
14771 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14774 /* Place src and dst addresses in registers
14775 and update the corresponding mem rtx. */
14777 dst_volatile
= MEM_VOLATILE_P (dst
);
14778 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14779 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14780 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14783 src_volatile
= MEM_VOLATILE_P (src
);
14784 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14785 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14786 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14788 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14791 if (src_volatile
|| dst_volatile
)
14794 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14795 if (!(dst_aligned
|| src_aligned
))
14796 return arm_gen_movmemqi (operands
);
14798 src
= adjust_address (src
, DImode
, 0);
14799 dst
= adjust_address (dst
, DImode
, 0);
14803 reg0
= gen_reg_rtx (DImode
);
14805 emit_move_insn (reg0
, src
);
14807 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14810 emit_move_insn (dst
, reg0
);
14812 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14814 src
= next_consecutive_mem (src
);
14815 dst
= next_consecutive_mem (dst
);
14818 gcc_assert (len
< 8);
14821 /* More than a word but less than a double-word to copy. Copy a word. */
14822 reg0
= gen_reg_rtx (SImode
);
14823 src
= adjust_address (src
, SImode
, 0);
14824 dst
= adjust_address (dst
, SImode
, 0);
14826 emit_move_insn (reg0
, src
);
14828 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14831 emit_move_insn (dst
, reg0
);
14833 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14835 src
= next_consecutive_mem (src
);
14836 dst
= next_consecutive_mem (dst
);
14843 /* Copy the remaining bytes. */
14846 dst
= adjust_address (dst
, HImode
, 0);
14847 src
= adjust_address (src
, HImode
, 0);
14848 reg0
= gen_reg_rtx (SImode
);
14850 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14852 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14855 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14857 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14859 src
= next_consecutive_mem (src
);
14860 dst
= next_consecutive_mem (dst
);
14865 dst
= adjust_address (dst
, QImode
, 0);
14866 src
= adjust_address (src
, QImode
, 0);
14867 reg0
= gen_reg_rtx (QImode
);
14868 emit_move_insn (reg0
, src
);
14869 emit_move_insn (dst
, reg0
);
14873 /* Select a dominance comparison mode if possible for a test of the general
14874 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14875 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14876 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14877 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14878 In all cases OP will be either EQ or NE, but we don't need to know which
14879 here. If we are unable to support a dominance comparison we return
14880 CC mode. This will then fail to match for the RTL expressions that
14881 generate this call. */
14883 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14885 enum rtx_code cond1
, cond2
;
14888 /* Currently we will probably get the wrong result if the individual
14889 comparisons are not simple. This also ensures that it is safe to
14890 reverse a comparison if necessary. */
14891 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14893 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14897 /* The if_then_else variant of this tests the second condition if the
14898 first passes, but is true if the first fails. Reverse the first
14899 condition to get a true "inclusive-or" expression. */
14900 if (cond_or
== DOM_CC_NX_OR_Y
)
14901 cond1
= reverse_condition (cond1
);
14903 /* If the comparisons are not equal, and one doesn't dominate the other,
14904 then we can't do this. */
14906 && !comparison_dominates_p (cond1
, cond2
)
14907 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14911 std::swap (cond1
, cond2
);
14916 if (cond_or
== DOM_CC_X_AND_Y
)
14921 case EQ
: return CC_DEQmode
;
14922 case LE
: return CC_DLEmode
;
14923 case LEU
: return CC_DLEUmode
;
14924 case GE
: return CC_DGEmode
;
14925 case GEU
: return CC_DGEUmode
;
14926 default: gcc_unreachable ();
14930 if (cond_or
== DOM_CC_X_AND_Y
)
14942 gcc_unreachable ();
14946 if (cond_or
== DOM_CC_X_AND_Y
)
14958 gcc_unreachable ();
14962 if (cond_or
== DOM_CC_X_AND_Y
)
14963 return CC_DLTUmode
;
14968 return CC_DLTUmode
;
14970 return CC_DLEUmode
;
14974 gcc_unreachable ();
14978 if (cond_or
== DOM_CC_X_AND_Y
)
14979 return CC_DGTUmode
;
14984 return CC_DGTUmode
;
14986 return CC_DGEUmode
;
14990 gcc_unreachable ();
14993 /* The remaining cases only occur when both comparisons are the
14996 gcc_assert (cond1
== cond2
);
15000 gcc_assert (cond1
== cond2
);
15004 gcc_assert (cond1
== cond2
);
15008 gcc_assert (cond1
== cond2
);
15009 return CC_DLEUmode
;
15012 gcc_assert (cond1
== cond2
);
15013 return CC_DGEUmode
;
15016 gcc_unreachable ();
15021 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15023 /* All floating point compares return CCFP if it is an equality
15024 comparison, and CCFPE otherwise. */
15025 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15048 gcc_unreachable ();
15052 /* A compare with a shifted operand. Because of canonicalization, the
15053 comparison will have to be swapped when we emit the assembler. */
15054 if (GET_MODE (y
) == SImode
15055 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15056 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15057 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15058 || GET_CODE (x
) == ROTATERT
))
15061 /* This operation is performed swapped, but since we only rely on the Z
15062 flag we don't need an additional mode. */
15063 if (GET_MODE (y
) == SImode
15064 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15065 && GET_CODE (x
) == NEG
15066 && (op
== EQ
|| op
== NE
))
15069 /* This is a special case that is used by combine to allow a
15070 comparison of a shifted byte load to be split into a zero-extend
15071 followed by a comparison of the shifted integer (only valid for
15072 equalities and unsigned inequalities). */
15073 if (GET_MODE (x
) == SImode
15074 && GET_CODE (x
) == ASHIFT
15075 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15076 && GET_CODE (XEXP (x
, 0)) == SUBREG
15077 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15078 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15079 && (op
== EQ
|| op
== NE
15080 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15081 && CONST_INT_P (y
))
15084 /* A construct for a conditional compare, if the false arm contains
15085 0, then both conditions must be true, otherwise either condition
15086 must be true. Not all conditions are possible, so CCmode is
15087 returned if it can't be done. */
15088 if (GET_CODE (x
) == IF_THEN_ELSE
15089 && (XEXP (x
, 2) == const0_rtx
15090 || XEXP (x
, 2) == const1_rtx
)
15091 && COMPARISON_P (XEXP (x
, 0))
15092 && COMPARISON_P (XEXP (x
, 1)))
15093 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15094 INTVAL (XEXP (x
, 2)));
15096 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15097 if (GET_CODE (x
) == AND
15098 && (op
== EQ
|| op
== NE
)
15099 && COMPARISON_P (XEXP (x
, 0))
15100 && COMPARISON_P (XEXP (x
, 1)))
15101 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15104 if (GET_CODE (x
) == IOR
15105 && (op
== EQ
|| op
== NE
)
15106 && COMPARISON_P (XEXP (x
, 0))
15107 && COMPARISON_P (XEXP (x
, 1)))
15108 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15111 /* An operation (on Thumb) where we want to test for a single bit.
15112 This is done by shifting that bit up into the top bit of a
15113 scratch register; we can then branch on the sign bit. */
15115 && GET_MODE (x
) == SImode
15116 && (op
== EQ
|| op
== NE
)
15117 && GET_CODE (x
) == ZERO_EXTRACT
15118 && XEXP (x
, 1) == const1_rtx
)
15121 /* An operation that sets the condition codes as a side-effect, the
15122 V flag is not set correctly, so we can only use comparisons where
15123 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15125 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15126 if (GET_MODE (x
) == SImode
15128 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15129 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15130 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15131 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15132 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15133 || GET_CODE (x
) == LSHIFTRT
15134 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15135 || GET_CODE (x
) == ROTATERT
15136 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15137 return CC_NOOVmode
;
15139 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15142 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15143 && GET_CODE (x
) == PLUS
15144 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15147 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15153 /* A DImode comparison against zero can be implemented by
15154 or'ing the two halves together. */
15155 if (y
== const0_rtx
)
15158 /* We can do an equality test in three Thumb instructions. */
15168 /* DImode unsigned comparisons can be implemented by cmp +
15169 cmpeq without a scratch register. Not worth doing in
15180 /* DImode signed and unsigned comparisons can be implemented
15181 by cmp + sbcs with a scratch register, but that does not
15182 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15183 gcc_assert (op
!= EQ
&& op
!= NE
);
15187 gcc_unreachable ();
15191 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15192 return GET_MODE (x
);
15197 /* X and Y are two things to compare using CODE. Emit the compare insn and
15198 return the rtx for register 0 in the proper mode. FP means this is a
15199 floating point compare: I don't think that it is needed on the arm. */
15201 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15205 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15207 /* We might have X as a constant, Y as a register because of the predicates
15208 used for cmpdi. If so, force X to a register here. */
15209 if (dimode_comparison
&& !REG_P (x
))
15210 x
= force_reg (DImode
, x
);
15212 mode
= SELECT_CC_MODE (code
, x
, y
);
15213 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15215 if (dimode_comparison
15216 && mode
!= CC_CZmode
)
15220 /* To compare two non-zero values for equality, XOR them and
15221 then compare against zero. Not used for ARM mode; there
15222 CC_CZmode is cheaper. */
15223 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15225 gcc_assert (!reload_completed
);
15226 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15230 /* A scratch register is required. */
15231 if (reload_completed
)
15232 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15234 scratch
= gen_rtx_SCRATCH (SImode
);
15236 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15237 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15238 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15241 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15246 /* Generate a sequence of insns that will generate the correct return
15247 address mask depending on the physical architecture that the program
15250 arm_gen_return_addr_mask (void)
15252 rtx reg
= gen_reg_rtx (Pmode
);
15254 emit_insn (gen_return_addr_mask (reg
));
15259 arm_reload_in_hi (rtx
*operands
)
15261 rtx ref
= operands
[1];
15263 HOST_WIDE_INT offset
= 0;
15265 if (GET_CODE (ref
) == SUBREG
)
15267 offset
= SUBREG_BYTE (ref
);
15268 ref
= SUBREG_REG (ref
);
15273 /* We have a pseudo which has been spilt onto the stack; there
15274 are two cases here: the first where there is a simple
15275 stack-slot replacement and a second where the stack-slot is
15276 out of range, or is used as a subreg. */
15277 if (reg_equiv_mem (REGNO (ref
)))
15279 ref
= reg_equiv_mem (REGNO (ref
));
15280 base
= find_replacement (&XEXP (ref
, 0));
15283 /* The slot is out of range, or was dressed up in a SUBREG. */
15284 base
= reg_equiv_address (REGNO (ref
));
15287 base
= find_replacement (&XEXP (ref
, 0));
15289 /* Handle the case where the address is too complex to be offset by 1. */
15290 if (GET_CODE (base
) == MINUS
15291 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15293 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15295 emit_set_insn (base_plus
, base
);
15298 else if (GET_CODE (base
) == PLUS
)
15300 /* The addend must be CONST_INT, or we would have dealt with it above. */
15301 HOST_WIDE_INT hi
, lo
;
15303 offset
+= INTVAL (XEXP (base
, 1));
15304 base
= XEXP (base
, 0);
15306 /* Rework the address into a legal sequence of insns. */
15307 /* Valid range for lo is -4095 -> 4095 */
15310 : -((-offset
) & 0xfff));
15312 /* Corner case, if lo is the max offset then we would be out of range
15313 once we have added the additional 1 below, so bump the msb into the
15314 pre-loading insn(s). */
15318 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15319 ^ (HOST_WIDE_INT
) 0x80000000)
15320 - (HOST_WIDE_INT
) 0x80000000);
15322 gcc_assert (hi
+ lo
== offset
);
15326 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15328 /* Get the base address; addsi3 knows how to handle constants
15329 that require more than one insn. */
15330 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15336 /* Operands[2] may overlap operands[0] (though it won't overlap
15337 operands[1]), that's why we asked for a DImode reg -- so we can
15338 use the bit that does not overlap. */
15339 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15340 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15342 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15344 emit_insn (gen_zero_extendqisi2 (scratch
,
15345 gen_rtx_MEM (QImode
,
15346 plus_constant (Pmode
, base
,
15348 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15349 gen_rtx_MEM (QImode
,
15350 plus_constant (Pmode
, base
,
15352 if (!BYTES_BIG_ENDIAN
)
15353 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15354 gen_rtx_IOR (SImode
,
15357 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15361 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15362 gen_rtx_IOR (SImode
,
15363 gen_rtx_ASHIFT (SImode
, scratch
,
15365 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15368 /* Handle storing a half-word to memory during reload by synthesizing as two
15369 byte stores. Take care not to clobber the input values until after we
15370 have moved them somewhere safe. This code assumes that if the DImode
15371 scratch in operands[2] overlaps either the input value or output address
15372 in some way, then that value must die in this insn (we absolutely need
15373 two scratch registers for some corner cases). */
15375 arm_reload_out_hi (rtx
*operands
)
15377 rtx ref
= operands
[0];
15378 rtx outval
= operands
[1];
15380 HOST_WIDE_INT offset
= 0;
15382 if (GET_CODE (ref
) == SUBREG
)
15384 offset
= SUBREG_BYTE (ref
);
15385 ref
= SUBREG_REG (ref
);
15390 /* We have a pseudo which has been spilt onto the stack; there
15391 are two cases here: the first where there is a simple
15392 stack-slot replacement and a second where the stack-slot is
15393 out of range, or is used as a subreg. */
15394 if (reg_equiv_mem (REGNO (ref
)))
15396 ref
= reg_equiv_mem (REGNO (ref
));
15397 base
= find_replacement (&XEXP (ref
, 0));
15400 /* The slot is out of range, or was dressed up in a SUBREG. */
15401 base
= reg_equiv_address (REGNO (ref
));
15404 base
= find_replacement (&XEXP (ref
, 0));
15406 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15408 /* Handle the case where the address is too complex to be offset by 1. */
15409 if (GET_CODE (base
) == MINUS
15410 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15412 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15414 /* Be careful not to destroy OUTVAL. */
15415 if (reg_overlap_mentioned_p (base_plus
, outval
))
15417 /* Updating base_plus might destroy outval, see if we can
15418 swap the scratch and base_plus. */
15419 if (!reg_overlap_mentioned_p (scratch
, outval
))
15420 std::swap (scratch
, base_plus
);
15423 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15425 /* Be conservative and copy OUTVAL into the scratch now,
15426 this should only be necessary if outval is a subreg
15427 of something larger than a word. */
15428 /* XXX Might this clobber base? I can't see how it can,
15429 since scratch is known to overlap with OUTVAL, and
15430 must be wider than a word. */
15431 emit_insn (gen_movhi (scratch_hi
, outval
));
15432 outval
= scratch_hi
;
15436 emit_set_insn (base_plus
, base
);
15439 else if (GET_CODE (base
) == PLUS
)
15441 /* The addend must be CONST_INT, or we would have dealt with it above. */
15442 HOST_WIDE_INT hi
, lo
;
15444 offset
+= INTVAL (XEXP (base
, 1));
15445 base
= XEXP (base
, 0);
15447 /* Rework the address into a legal sequence of insns. */
15448 /* Valid range for lo is -4095 -> 4095 */
15451 : -((-offset
) & 0xfff));
15453 /* Corner case, if lo is the max offset then we would be out of range
15454 once we have added the additional 1 below, so bump the msb into the
15455 pre-loading insn(s). */
15459 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15460 ^ (HOST_WIDE_INT
) 0x80000000)
15461 - (HOST_WIDE_INT
) 0x80000000);
15463 gcc_assert (hi
+ lo
== offset
);
15467 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15469 /* Be careful not to destroy OUTVAL. */
15470 if (reg_overlap_mentioned_p (base_plus
, outval
))
15472 /* Updating base_plus might destroy outval, see if we
15473 can swap the scratch and base_plus. */
15474 if (!reg_overlap_mentioned_p (scratch
, outval
))
15475 std::swap (scratch
, base_plus
);
15478 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15480 /* Be conservative and copy outval into scratch now,
15481 this should only be necessary if outval is a
15482 subreg of something larger than a word. */
15483 /* XXX Might this clobber base? I can't see how it
15484 can, since scratch is known to overlap with
15486 emit_insn (gen_movhi (scratch_hi
, outval
));
15487 outval
= scratch_hi
;
15491 /* Get the base address; addsi3 knows how to handle constants
15492 that require more than one insn. */
15493 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15499 if (BYTES_BIG_ENDIAN
)
15501 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15502 plus_constant (Pmode
, base
,
15504 gen_lowpart (QImode
, outval
)));
15505 emit_insn (gen_lshrsi3 (scratch
,
15506 gen_rtx_SUBREG (SImode
, outval
, 0),
15508 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15510 gen_lowpart (QImode
, scratch
)));
15514 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15516 gen_lowpart (QImode
, outval
)));
15517 emit_insn (gen_lshrsi3 (scratch
,
15518 gen_rtx_SUBREG (SImode
, outval
, 0),
15520 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15521 plus_constant (Pmode
, base
,
15523 gen_lowpart (QImode
, scratch
)));
15527 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15528 (padded to the size of a word) should be passed in a register. */
15531 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15533 if (TARGET_AAPCS_BASED
)
15534 return must_pass_in_stack_var_size (mode
, type
);
15536 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15540 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15541 Return true if an argument passed on the stack should be padded upwards,
15542 i.e. if the least-significant byte has useful data.
15543 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15544 aggregate types are placed in the lowest memory address. */
15547 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15549 if (!TARGET_AAPCS_BASED
)
15550 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15552 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15559 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15560 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15561 register has useful data, and return the opposite if the most
15562 significant byte does. */
15565 arm_pad_reg_upward (machine_mode mode
,
15566 tree type
, int first ATTRIBUTE_UNUSED
)
15568 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15570 /* For AAPCS, small aggregates, small fixed-point types,
15571 and small complex types are always padded upwards. */
15574 if ((AGGREGATE_TYPE_P (type
)
15575 || TREE_CODE (type
) == COMPLEX_TYPE
15576 || FIXED_POINT_TYPE_P (type
))
15577 && int_size_in_bytes (type
) <= 4)
15582 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15583 && GET_MODE_SIZE (mode
) <= 4)
15588 /* Otherwise, use default padding. */
15589 return !BYTES_BIG_ENDIAN
;
15592 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15593 assuming that the address in the base register is word aligned. */
15595 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15597 HOST_WIDE_INT max_offset
;
15599 /* Offset must be a multiple of 4 in Thumb mode. */
15600 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15605 else if (TARGET_ARM
)
15610 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15613 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15614 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15615 Assumes that the address in the base register RN is word aligned. Pattern
15616 guarantees that both memory accesses use the same base register,
15617 the offsets are constants within the range, and the gap between the offsets is 4.
15618 If preload complete then check that registers are legal. WBACK indicates whether
15619 address is updated. LOAD indicates whether memory access is load or store. */
15621 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15622 bool wback
, bool load
)
15624 unsigned int t
, t2
, n
;
15626 if (!reload_completed
)
15629 if (!offset_ok_for_ldrd_strd (offset
))
15636 if ((TARGET_THUMB2
)
15637 && ((wback
&& (n
== t
|| n
== t2
))
15638 || (t
== SP_REGNUM
)
15639 || (t
== PC_REGNUM
)
15640 || (t2
== SP_REGNUM
)
15641 || (t2
== PC_REGNUM
)
15642 || (!load
&& (n
== PC_REGNUM
))
15643 || (load
&& (t
== t2
))
15644 /* Triggers Cortex-M3 LDRD errata. */
15645 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15649 && ((wback
&& (n
== t
|| n
== t2
))
15650 || (t2
== PC_REGNUM
)
15651 || (t
% 2 != 0) /* First destination register is not even. */
15653 /* PC can be used as base register (for offset addressing only),
15654 but it is depricated. */
15655 || (n
== PC_REGNUM
)))
15661 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15662 operand MEM's address contains an immediate offset from the base
15663 register and has no side effects, in which case it sets BASE and
15664 OFFSET accordingly. */
15666 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15670 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15672 /* TODO: Handle more general memory operand patterns, such as
15673 PRE_DEC and PRE_INC. */
15675 if (side_effects_p (mem
))
15678 /* Can't deal with subregs. */
15679 if (GET_CODE (mem
) == SUBREG
)
15682 gcc_assert (MEM_P (mem
));
15684 *offset
= const0_rtx
;
15686 addr
= XEXP (mem
, 0);
15688 /* If addr isn't valid for DImode, then we can't handle it. */
15689 if (!arm_legitimate_address_p (DImode
, addr
,
15690 reload_in_progress
|| reload_completed
))
15698 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15700 *base
= XEXP (addr
, 0);
15701 *offset
= XEXP (addr
, 1);
15702 return (REG_P (*base
) && CONST_INT_P (*offset
));
15708 /* Called from a peephole2 to replace two word-size accesses with a
15709 single LDRD/STRD instruction. Returns true iff we can generate a
15710 new instruction sequence. That is, both accesses use the same base
15711 register and the gap between constant offsets is 4. This function
15712 may reorder its operands to match ldrd/strd RTL templates.
15713 OPERANDS are the operands found by the peephole matcher;
15714 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15715 corresponding memory operands. LOAD indicaates whether the access
15716 is load or store. CONST_STORE indicates a store of constant
15717 integer values held in OPERANDS[4,5] and assumes that the pattern
15718 is of length 4 insn, for the purpose of checking dead registers.
15719 COMMUTE indicates that register operands may be reordered. */
15721 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15722 bool const_store
, bool commute
)
15725 HOST_WIDE_INT offsets
[2], offset
;
15726 rtx base
= NULL_RTX
;
15727 rtx cur_base
, cur_offset
, tmp
;
15729 HARD_REG_SET regset
;
15731 gcc_assert (!const_store
|| !load
);
15732 /* Check that the memory references are immediate offsets from the
15733 same base register. Extract the base register, the destination
15734 registers, and the corresponding memory offsets. */
15735 for (i
= 0; i
< nops
; i
++)
15737 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15742 else if (REGNO (base
) != REGNO (cur_base
))
15745 offsets
[i
] = INTVAL (cur_offset
);
15746 if (GET_CODE (operands
[i
]) == SUBREG
)
15748 tmp
= SUBREG_REG (operands
[i
]);
15749 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15754 /* Make sure there is no dependency between the individual loads. */
15755 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15756 return false; /* RAW */
15758 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15759 return false; /* WAW */
15761 /* If the same input register is used in both stores
15762 when storing different constants, try to find a free register.
15763 For example, the code
15768 can be transformed into
15771 in Thumb mode assuming that r1 is free. */
15773 && REGNO (operands
[0]) == REGNO (operands
[1])
15774 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15778 CLEAR_HARD_REG_SET (regset
);
15779 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15780 if (tmp
== NULL_RTX
)
15783 /* Use the new register in the first load to ensure that
15784 if the original input register is not dead after peephole,
15785 then it will have the correct constant value. */
15788 else if (TARGET_ARM
)
15791 int regno
= REGNO (operands
[0]);
15792 if (!peep2_reg_dead_p (4, operands
[0]))
15794 /* When the input register is even and is not dead after the
15795 pattern, it has to hold the second constant but we cannot
15796 form a legal STRD in ARM mode with this register as the second
15798 if (regno
% 2 == 0)
15801 /* Is regno-1 free? */
15802 SET_HARD_REG_SET (regset
);
15803 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15804 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15805 if (tmp
== NULL_RTX
)
15812 /* Find a DImode register. */
15813 CLEAR_HARD_REG_SET (regset
);
15814 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15815 if (tmp
!= NULL_RTX
)
15817 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15818 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15822 /* Can we use the input register to form a DI register? */
15823 SET_HARD_REG_SET (regset
);
15824 CLEAR_HARD_REG_BIT(regset
,
15825 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15826 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15827 if (tmp
== NULL_RTX
)
15829 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15833 gcc_assert (operands
[0] != NULL_RTX
);
15834 gcc_assert (operands
[1] != NULL_RTX
);
15835 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15836 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15840 /* Make sure the instructions are ordered with lower memory access first. */
15841 if (offsets
[0] > offsets
[1])
15843 gap
= offsets
[0] - offsets
[1];
15844 offset
= offsets
[1];
15846 /* Swap the instructions such that lower memory is accessed first. */
15847 std::swap (operands
[0], operands
[1]);
15848 std::swap (operands
[2], operands
[3]);
15850 std::swap (operands
[4], operands
[5]);
15854 gap
= offsets
[1] - offsets
[0];
15855 offset
= offsets
[0];
15858 /* Make sure accesses are to consecutive memory locations. */
15862 /* Make sure we generate legal instructions. */
15863 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15867 /* In Thumb state, where registers are almost unconstrained, there
15868 is little hope to fix it. */
15872 if (load
&& commute
)
15874 /* Try reordering registers. */
15875 std::swap (operands
[0], operands
[1]);
15876 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15883 /* If input registers are dead after this pattern, they can be
15884 reordered or replaced by other registers that are free in the
15885 current pattern. */
15886 if (!peep2_reg_dead_p (4, operands
[0])
15887 || !peep2_reg_dead_p (4, operands
[1]))
15890 /* Try to reorder the input registers. */
15891 /* For example, the code
15896 can be transformed into
15901 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15904 std::swap (operands
[0], operands
[1]);
15908 /* Try to find a free DI register. */
15909 CLEAR_HARD_REG_SET (regset
);
15910 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15911 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15914 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15915 if (tmp
== NULL_RTX
)
15918 /* DREG must be an even-numbered register in DImode.
15919 Split it into SI registers. */
15920 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15921 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15922 gcc_assert (operands
[0] != NULL_RTX
);
15923 gcc_assert (operands
[1] != NULL_RTX
);
15924 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15925 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15927 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15939 /* Print a symbolic form of X to the debug file, F. */
15941 arm_print_value (FILE *f
, rtx x
)
15943 switch (GET_CODE (x
))
15946 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15950 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15958 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15960 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15961 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15969 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15973 fprintf (f
, "`%s'", XSTR (x
, 0));
15977 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15981 arm_print_value (f
, XEXP (x
, 0));
15985 arm_print_value (f
, XEXP (x
, 0));
15987 arm_print_value (f
, XEXP (x
, 1));
15995 fprintf (f
, "????");
16000 /* Routines for manipulation of the constant pool. */
16002 /* Arm instructions cannot load a large constant directly into a
16003 register; they have to come from a pc relative load. The constant
16004 must therefore be placed in the addressable range of the pc
16005 relative load. Depending on the precise pc relative load
16006 instruction the range is somewhere between 256 bytes and 4k. This
16007 means that we often have to dump a constant inside a function, and
16008 generate code to branch around it.
16010 It is important to minimize this, since the branches will slow
16011 things down and make the code larger.
16013 Normally we can hide the table after an existing unconditional
16014 branch so that there is no interruption of the flow, but in the
16015 worst case the code looks like this:
16033 We fix this by performing a scan after scheduling, which notices
16034 which instructions need to have their operands fetched from the
16035 constant table and builds the table.
16037 The algorithm starts by building a table of all the constants that
16038 need fixing up and all the natural barriers in the function (places
16039 where a constant table can be dropped without breaking the flow).
16040 For each fixup we note how far the pc-relative replacement will be
16041 able to reach and the offset of the instruction into the function.
16043 Having built the table we then group the fixes together to form
16044 tables that are as large as possible (subject to addressing
16045 constraints) and emit each table of constants after the last
16046 barrier that is within range of all the instructions in the group.
16047 If a group does not contain a barrier, then we forcibly create one
16048 by inserting a jump instruction into the flow. Once the table has
16049 been inserted, the insns are then modified to reference the
16050 relevant entry in the pool.
16052 Possible enhancements to the algorithm (not implemented) are:
16054 1) For some processors and object formats, there may be benefit in
16055 aligning the pools to the start of cache lines; this alignment
16056 would need to be taken into account when calculating addressability
16059 /* These typedefs are located at the start of this file, so that
16060 they can be used in the prototypes there. This comment is to
16061 remind readers of that fact so that the following structures
16062 can be understood more easily.
16064 typedef struct minipool_node Mnode;
16065 typedef struct minipool_fixup Mfix; */
16067 struct minipool_node
16069 /* Doubly linked chain of entries. */
16072 /* The maximum offset into the code that this entry can be placed. While
16073 pushing fixes for forward references, all entries are sorted in order
16074 of increasing max_address. */
16075 HOST_WIDE_INT max_address
;
16076 /* Similarly for an entry inserted for a backwards ref. */
16077 HOST_WIDE_INT min_address
;
16078 /* The number of fixes referencing this entry. This can become zero
16079 if we "unpush" an entry. In this case we ignore the entry when we
16080 come to emit the code. */
16082 /* The offset from the start of the minipool. */
16083 HOST_WIDE_INT offset
;
16084 /* The value in table. */
16086 /* The mode of value. */
16088 /* The size of the value. With iWMMXt enabled
16089 sizes > 4 also imply an alignment of 8-bytes. */
16093 struct minipool_fixup
16097 HOST_WIDE_INT address
;
16103 HOST_WIDE_INT forwards
;
16104 HOST_WIDE_INT backwards
;
16107 /* Fixes less than a word need padding out to a word boundary. */
16108 #define MINIPOOL_FIX_SIZE(mode) \
16109 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16111 static Mnode
* minipool_vector_head
;
16112 static Mnode
* minipool_vector_tail
;
16113 static rtx_code_label
*minipool_vector_label
;
16114 static int minipool_pad
;
16116 /* The linked list of all minipool fixes required for this function. */
16117 Mfix
* minipool_fix_head
;
16118 Mfix
* minipool_fix_tail
;
16119 /* The fix entry for the current minipool, once it has been placed. */
16120 Mfix
* minipool_barrier
;
16122 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16123 #define JUMP_TABLES_IN_TEXT_SECTION 0
16126 static HOST_WIDE_INT
16127 get_jump_table_size (rtx_jump_table_data
*insn
)
16129 /* ADDR_VECs only take room if read-only data does into the text
16131 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16133 rtx body
= PATTERN (insn
);
16134 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16135 HOST_WIDE_INT size
;
16136 HOST_WIDE_INT modesize
;
16138 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16139 size
= modesize
* XVECLEN (body
, elt
);
16143 /* Round up size of TBB table to a halfword boundary. */
16144 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16147 /* No padding necessary for TBH. */
16150 /* Add two bytes for alignment on Thumb. */
16155 gcc_unreachable ();
16163 /* Return the maximum amount of padding that will be inserted before
16166 static HOST_WIDE_INT
16167 get_label_padding (rtx label
)
16169 HOST_WIDE_INT align
, min_insn_size
;
16171 align
= 1 << label_to_alignment (label
);
16172 min_insn_size
= TARGET_THUMB
? 2 : 4;
16173 return align
> min_insn_size
? align
- min_insn_size
: 0;
16176 /* Move a minipool fix MP from its current location to before MAX_MP.
16177 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16178 constraints may need updating. */
16180 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16181 HOST_WIDE_INT max_address
)
16183 /* The code below assumes these are different. */
16184 gcc_assert (mp
!= max_mp
);
16186 if (max_mp
== NULL
)
16188 if (max_address
< mp
->max_address
)
16189 mp
->max_address
= max_address
;
16193 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16194 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16196 mp
->max_address
= max_address
;
16198 /* Unlink MP from its current position. Since max_mp is non-null,
16199 mp->prev must be non-null. */
16200 mp
->prev
->next
= mp
->next
;
16201 if (mp
->next
!= NULL
)
16202 mp
->next
->prev
= mp
->prev
;
16204 minipool_vector_tail
= mp
->prev
;
16206 /* Re-insert it before MAX_MP. */
16208 mp
->prev
= max_mp
->prev
;
16211 if (mp
->prev
!= NULL
)
16212 mp
->prev
->next
= mp
;
16214 minipool_vector_head
= mp
;
16217 /* Save the new entry. */
16220 /* Scan over the preceding entries and adjust their addresses as
16222 while (mp
->prev
!= NULL
16223 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16225 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16232 /* Add a constant to the minipool for a forward reference. Returns the
16233 node added or NULL if the constant will not fit in this pool. */
16235 add_minipool_forward_ref (Mfix
*fix
)
16237 /* If set, max_mp is the first pool_entry that has a lower
16238 constraint than the one we are trying to add. */
16239 Mnode
* max_mp
= NULL
;
16240 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16243 /* If the minipool starts before the end of FIX->INSN then this FIX
16244 can not be placed into the current pool. Furthermore, adding the
16245 new constant pool entry may cause the pool to start FIX_SIZE bytes
16247 if (minipool_vector_head
&&
16248 (fix
->address
+ get_attr_length (fix
->insn
)
16249 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16252 /* Scan the pool to see if a constant with the same value has
16253 already been added. While we are doing this, also note the
16254 location where we must insert the constant if it doesn't already
16256 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16258 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16259 && fix
->mode
== mp
->mode
16260 && (!LABEL_P (fix
->value
)
16261 || (CODE_LABEL_NUMBER (fix
->value
)
16262 == CODE_LABEL_NUMBER (mp
->value
)))
16263 && rtx_equal_p (fix
->value
, mp
->value
))
16265 /* More than one fix references this entry. */
16267 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16270 /* Note the insertion point if necessary. */
16272 && mp
->max_address
> max_address
)
16275 /* If we are inserting an 8-bytes aligned quantity and
16276 we have not already found an insertion point, then
16277 make sure that all such 8-byte aligned quantities are
16278 placed at the start of the pool. */
16279 if (ARM_DOUBLEWORD_ALIGN
16281 && fix
->fix_size
>= 8
16282 && mp
->fix_size
< 8)
16285 max_address
= mp
->max_address
;
16289 /* The value is not currently in the minipool, so we need to create
16290 a new entry for it. If MAX_MP is NULL, the entry will be put on
16291 the end of the list since the placement is less constrained than
16292 any existing entry. Otherwise, we insert the new fix before
16293 MAX_MP and, if necessary, adjust the constraints on the other
16296 mp
->fix_size
= fix
->fix_size
;
16297 mp
->mode
= fix
->mode
;
16298 mp
->value
= fix
->value
;
16300 /* Not yet required for a backwards ref. */
16301 mp
->min_address
= -65536;
16303 if (max_mp
== NULL
)
16305 mp
->max_address
= max_address
;
16307 mp
->prev
= minipool_vector_tail
;
16309 if (mp
->prev
== NULL
)
16311 minipool_vector_head
= mp
;
16312 minipool_vector_label
= gen_label_rtx ();
16315 mp
->prev
->next
= mp
;
16317 minipool_vector_tail
= mp
;
16321 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16322 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16324 mp
->max_address
= max_address
;
16327 mp
->prev
= max_mp
->prev
;
16329 if (mp
->prev
!= NULL
)
16330 mp
->prev
->next
= mp
;
16332 minipool_vector_head
= mp
;
16335 /* Save the new entry. */
16338 /* Scan over the preceding entries and adjust their addresses as
16340 while (mp
->prev
!= NULL
16341 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16343 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16351 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16352 HOST_WIDE_INT min_address
)
16354 HOST_WIDE_INT offset
;
16356 /* The code below assumes these are different. */
16357 gcc_assert (mp
!= min_mp
);
16359 if (min_mp
== NULL
)
16361 if (min_address
> mp
->min_address
)
16362 mp
->min_address
= min_address
;
16366 /* We will adjust this below if it is too loose. */
16367 mp
->min_address
= min_address
;
16369 /* Unlink MP from its current position. Since min_mp is non-null,
16370 mp->next must be non-null. */
16371 mp
->next
->prev
= mp
->prev
;
16372 if (mp
->prev
!= NULL
)
16373 mp
->prev
->next
= mp
->next
;
16375 minipool_vector_head
= mp
->next
;
16377 /* Reinsert it after MIN_MP. */
16379 mp
->next
= min_mp
->next
;
16381 if (mp
->next
!= NULL
)
16382 mp
->next
->prev
= mp
;
16384 minipool_vector_tail
= mp
;
16390 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16392 mp
->offset
= offset
;
16393 if (mp
->refcount
> 0)
16394 offset
+= mp
->fix_size
;
16396 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16397 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16403 /* Add a constant to the minipool for a backward reference. Returns the
16404 node added or NULL if the constant will not fit in this pool.
16406 Note that the code for insertion for a backwards reference can be
16407 somewhat confusing because the calculated offsets for each fix do
16408 not take into account the size of the pool (which is still under
16411 add_minipool_backward_ref (Mfix
*fix
)
16413 /* If set, min_mp is the last pool_entry that has a lower constraint
16414 than the one we are trying to add. */
16415 Mnode
*min_mp
= NULL
;
16416 /* This can be negative, since it is only a constraint. */
16417 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16420 /* If we can't reach the current pool from this insn, or if we can't
16421 insert this entry at the end of the pool without pushing other
16422 fixes out of range, then we don't try. This ensures that we
16423 can't fail later on. */
16424 if (min_address
>= minipool_barrier
->address
16425 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16426 >= minipool_barrier
->address
))
16429 /* Scan the pool to see if a constant with the same value has
16430 already been added. While we are doing this, also note the
16431 location where we must insert the constant if it doesn't already
16433 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16435 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16436 && fix
->mode
== mp
->mode
16437 && (!LABEL_P (fix
->value
)
16438 || (CODE_LABEL_NUMBER (fix
->value
)
16439 == CODE_LABEL_NUMBER (mp
->value
)))
16440 && rtx_equal_p (fix
->value
, mp
->value
)
16441 /* Check that there is enough slack to move this entry to the
16442 end of the table (this is conservative). */
16443 && (mp
->max_address
16444 > (minipool_barrier
->address
16445 + minipool_vector_tail
->offset
16446 + minipool_vector_tail
->fix_size
)))
16449 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16452 if (min_mp
!= NULL
)
16453 mp
->min_address
+= fix
->fix_size
;
16456 /* Note the insertion point if necessary. */
16457 if (mp
->min_address
< min_address
)
16459 /* For now, we do not allow the insertion of 8-byte alignment
16460 requiring nodes anywhere but at the start of the pool. */
16461 if (ARM_DOUBLEWORD_ALIGN
16462 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16467 else if (mp
->max_address
16468 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16470 /* Inserting before this entry would push the fix beyond
16471 its maximum address (which can happen if we have
16472 re-located a forwards fix); force the new fix to come
16474 if (ARM_DOUBLEWORD_ALIGN
16475 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16480 min_address
= mp
->min_address
+ fix
->fix_size
;
16483 /* Do not insert a non-8-byte aligned quantity before 8-byte
16484 aligned quantities. */
16485 else if (ARM_DOUBLEWORD_ALIGN
16486 && fix
->fix_size
< 8
16487 && mp
->fix_size
>= 8)
16490 min_address
= mp
->min_address
+ fix
->fix_size
;
16495 /* We need to create a new entry. */
16497 mp
->fix_size
= fix
->fix_size
;
16498 mp
->mode
= fix
->mode
;
16499 mp
->value
= fix
->value
;
16501 mp
->max_address
= minipool_barrier
->address
+ 65536;
16503 mp
->min_address
= min_address
;
16505 if (min_mp
== NULL
)
16508 mp
->next
= minipool_vector_head
;
16510 if (mp
->next
== NULL
)
16512 minipool_vector_tail
= mp
;
16513 minipool_vector_label
= gen_label_rtx ();
16516 mp
->next
->prev
= mp
;
16518 minipool_vector_head
= mp
;
16522 mp
->next
= min_mp
->next
;
16526 if (mp
->next
!= NULL
)
16527 mp
->next
->prev
= mp
;
16529 minipool_vector_tail
= mp
;
16532 /* Save the new entry. */
16540 /* Scan over the following entries and adjust their offsets. */
16541 while (mp
->next
!= NULL
)
16543 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16544 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16547 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16549 mp
->next
->offset
= mp
->offset
;
16558 assign_minipool_offsets (Mfix
*barrier
)
16560 HOST_WIDE_INT offset
= 0;
16563 minipool_barrier
= barrier
;
16565 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16567 mp
->offset
= offset
;
16569 if (mp
->refcount
> 0)
16570 offset
+= mp
->fix_size
;
16574 /* Output the literal table */
16576 dump_minipool (rtx_insn
*scan
)
16582 if (ARM_DOUBLEWORD_ALIGN
)
16583 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16584 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16591 fprintf (dump_file
,
16592 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16593 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16595 scan
= emit_label_after (gen_label_rtx (), scan
);
16596 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16597 scan
= emit_label_after (minipool_vector_label
, scan
);
16599 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16601 if (mp
->refcount
> 0)
16605 fprintf (dump_file
,
16606 ";; Offset %u, min %ld, max %ld ",
16607 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16608 (unsigned long) mp
->max_address
);
16609 arm_print_value (dump_file
, mp
->value
);
16610 fputc ('\n', dump_file
);
16613 switch (GET_MODE_SIZE (mp
->mode
))
16615 #ifdef HAVE_consttable_1
16617 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16621 #ifdef HAVE_consttable_2
16623 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16627 #ifdef HAVE_consttable_4
16629 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16633 #ifdef HAVE_consttable_8
16635 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16639 #ifdef HAVE_consttable_16
16641 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16646 gcc_unreachable ();
16654 minipool_vector_head
= minipool_vector_tail
= NULL
;
16655 scan
= emit_insn_after (gen_consttable_end (), scan
);
16656 scan
= emit_barrier_after (scan
);
16659 /* Return the cost of forcibly inserting a barrier after INSN. */
16661 arm_barrier_cost (rtx_insn
*insn
)
16663 /* Basing the location of the pool on the loop depth is preferable,
16664 but at the moment, the basic block information seems to be
16665 corrupt by this stage of the compilation. */
16666 int base_cost
= 50;
16667 rtx_insn
*next
= next_nonnote_insn (insn
);
16669 if (next
!= NULL
&& LABEL_P (next
))
16672 switch (GET_CODE (insn
))
16675 /* It will always be better to place the table before the label, rather
16684 return base_cost
- 10;
16687 return base_cost
+ 10;
16691 /* Find the best place in the insn stream in the range
16692 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16693 Create the barrier by inserting a jump and add a new fix entry for
16696 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16698 HOST_WIDE_INT count
= 0;
16699 rtx_barrier
*barrier
;
16700 rtx_insn
*from
= fix
->insn
;
16701 /* The instruction after which we will insert the jump. */
16702 rtx_insn
*selected
= NULL
;
16704 /* The address at which the jump instruction will be placed. */
16705 HOST_WIDE_INT selected_address
;
16707 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16708 rtx_code_label
*label
= gen_label_rtx ();
16710 selected_cost
= arm_barrier_cost (from
);
16711 selected_address
= fix
->address
;
16713 while (from
&& count
< max_count
)
16715 rtx_jump_table_data
*tmp
;
16718 /* This code shouldn't have been called if there was a natural barrier
16720 gcc_assert (!BARRIER_P (from
));
16722 /* Count the length of this insn. This must stay in sync with the
16723 code that pushes minipool fixes. */
16724 if (LABEL_P (from
))
16725 count
+= get_label_padding (from
);
16727 count
+= get_attr_length (from
);
16729 /* If there is a jump table, add its length. */
16730 if (tablejump_p (from
, NULL
, &tmp
))
16732 count
+= get_jump_table_size (tmp
);
16734 /* Jump tables aren't in a basic block, so base the cost on
16735 the dispatch insn. If we select this location, we will
16736 still put the pool after the table. */
16737 new_cost
= arm_barrier_cost (from
);
16739 if (count
< max_count
16740 && (!selected
|| new_cost
<= selected_cost
))
16743 selected_cost
= new_cost
;
16744 selected_address
= fix
->address
+ count
;
16747 /* Continue after the dispatch table. */
16748 from
= NEXT_INSN (tmp
);
16752 new_cost
= arm_barrier_cost (from
);
16754 if (count
< max_count
16755 && (!selected
|| new_cost
<= selected_cost
))
16758 selected_cost
= new_cost
;
16759 selected_address
= fix
->address
+ count
;
16762 from
= NEXT_INSN (from
);
16765 /* Make sure that we found a place to insert the jump. */
16766 gcc_assert (selected
);
16768 /* Make sure we do not split a call and its corresponding
16769 CALL_ARG_LOCATION note. */
16770 if (CALL_P (selected
))
16772 rtx_insn
*next
= NEXT_INSN (selected
);
16773 if (next
&& NOTE_P (next
)
16774 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16778 /* Create a new JUMP_INSN that branches around a barrier. */
16779 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16780 JUMP_LABEL (from
) = label
;
16781 barrier
= emit_barrier_after (from
);
16782 emit_label_after (label
, barrier
);
16784 /* Create a minipool barrier entry for the new barrier. */
16785 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16786 new_fix
->insn
= barrier
;
16787 new_fix
->address
= selected_address
;
16788 new_fix
->next
= fix
->next
;
16789 fix
->next
= new_fix
;
16794 /* Record that there is a natural barrier in the insn stream at
16797 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16799 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16802 fix
->address
= address
;
16805 if (minipool_fix_head
!= NULL
)
16806 minipool_fix_tail
->next
= fix
;
16808 minipool_fix_head
= fix
;
16810 minipool_fix_tail
= fix
;
16813 /* Record INSN, which will need fixing up to load a value from the
16814 minipool. ADDRESS is the offset of the insn since the start of the
16815 function; LOC is a pointer to the part of the insn which requires
16816 fixing; VALUE is the constant that must be loaded, which is of type
16819 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16820 machine_mode mode
, rtx value
)
16822 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16825 fix
->address
= address
;
16828 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16829 fix
->value
= value
;
16830 fix
->forwards
= get_attr_pool_range (insn
);
16831 fix
->backwards
= get_attr_neg_pool_range (insn
);
16832 fix
->minipool
= NULL
;
16834 /* If an insn doesn't have a range defined for it, then it isn't
16835 expecting to be reworked by this code. Better to stop now than
16836 to generate duff assembly code. */
16837 gcc_assert (fix
->forwards
|| fix
->backwards
);
16839 /* If an entry requires 8-byte alignment then assume all constant pools
16840 require 4 bytes of padding. Trying to do this later on a per-pool
16841 basis is awkward because existing pool entries have to be modified. */
16842 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16847 fprintf (dump_file
,
16848 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16849 GET_MODE_NAME (mode
),
16850 INSN_UID (insn
), (unsigned long) address
,
16851 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16852 arm_print_value (dump_file
, fix
->value
);
16853 fprintf (dump_file
, "\n");
16856 /* Add it to the chain of fixes. */
16859 if (minipool_fix_head
!= NULL
)
16860 minipool_fix_tail
->next
= fix
;
16862 minipool_fix_head
= fix
;
16864 minipool_fix_tail
= fix
;
16867 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16868 Returns the number of insns needed, or 99 if we always want to synthesize
16871 arm_max_const_double_inline_cost ()
16873 /* Let the value get synthesized to avoid the use of literal pools. */
16874 if (arm_disable_literal_pool
)
16877 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16880 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16881 Returns the number of insns needed, or 99 if we don't know how to
16884 arm_const_double_inline_cost (rtx val
)
16886 rtx lowpart
, highpart
;
16889 mode
= GET_MODE (val
);
16891 if (mode
== VOIDmode
)
16894 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16896 lowpart
= gen_lowpart (SImode
, val
);
16897 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16899 gcc_assert (CONST_INT_P (lowpart
));
16900 gcc_assert (CONST_INT_P (highpart
));
16902 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16903 NULL_RTX
, NULL_RTX
, 0, 0)
16904 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16905 NULL_RTX
, NULL_RTX
, 0, 0));
16908 /* Cost of loading a SImode constant. */
16910 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16912 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16913 NULL_RTX
, NULL_RTX
, 1, 0);
16916 /* Return true if it is worthwhile to split a 64-bit constant into two
16917 32-bit operations. This is the case if optimizing for size, or
16918 if we have load delay slots, or if one 32-bit part can be done with
16919 a single data operation. */
16921 arm_const_double_by_parts (rtx val
)
16923 machine_mode mode
= GET_MODE (val
);
16926 if (optimize_size
|| arm_ld_sched
)
16929 if (mode
== VOIDmode
)
16932 part
= gen_highpart_mode (SImode
, mode
, val
);
16934 gcc_assert (CONST_INT_P (part
));
16936 if (const_ok_for_arm (INTVAL (part
))
16937 || const_ok_for_arm (~INTVAL (part
)))
16940 part
= gen_lowpart (SImode
, val
);
16942 gcc_assert (CONST_INT_P (part
));
16944 if (const_ok_for_arm (INTVAL (part
))
16945 || const_ok_for_arm (~INTVAL (part
)))
16951 /* Return true if it is possible to inline both the high and low parts
16952 of a 64-bit constant into 32-bit data processing instructions. */
16954 arm_const_double_by_immediates (rtx val
)
16956 machine_mode mode
= GET_MODE (val
);
16959 if (mode
== VOIDmode
)
16962 part
= gen_highpart_mode (SImode
, mode
, val
);
16964 gcc_assert (CONST_INT_P (part
));
16966 if (!const_ok_for_arm (INTVAL (part
)))
16969 part
= gen_lowpart (SImode
, val
);
16971 gcc_assert (CONST_INT_P (part
));
16973 if (!const_ok_for_arm (INTVAL (part
)))
16979 /* Scan INSN and note any of its operands that need fixing.
16980 If DO_PUSHES is false we do not actually push any of the fixups
16983 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16987 extract_constrain_insn (insn
);
16989 if (recog_data
.n_alternatives
== 0)
16992 /* Fill in recog_op_alt with information about the constraints of
16994 preprocess_constraints (insn
);
16996 const operand_alternative
*op_alt
= which_op_alt ();
16997 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16999 /* Things we need to fix can only occur in inputs. */
17000 if (recog_data
.operand_type
[opno
] != OP_IN
)
17003 /* If this alternative is a memory reference, then any mention
17004 of constants in this alternative is really to fool reload
17005 into allowing us to accept one there. We need to fix them up
17006 now so that we output the right code. */
17007 if (op_alt
[opno
].memory_ok
)
17009 rtx op
= recog_data
.operand
[opno
];
17011 if (CONSTANT_P (op
))
17014 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17015 recog_data
.operand_mode
[opno
], op
);
17017 else if (MEM_P (op
)
17018 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17019 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17023 rtx cop
= avoid_constant_pool_reference (op
);
17025 /* Casting the address of something to a mode narrower
17026 than a word can cause avoid_constant_pool_reference()
17027 to return the pool reference itself. That's no good to
17028 us here. Lets just hope that we can use the
17029 constant pool value directly. */
17031 cop
= get_pool_constant (XEXP (op
, 0));
17033 push_minipool_fix (insn
, address
,
17034 recog_data
.operand_loc
[opno
],
17035 recog_data
.operand_mode
[opno
], cop
);
17045 /* Rewrite move insn into subtract of 0 if the condition codes will
17046 be useful in next conditional jump insn. */
17049 thumb1_reorg (void)
17053 FOR_EACH_BB_FN (bb
, cfun
)
17056 rtx pat
, op0
, set
= NULL
;
17057 rtx_insn
*prev
, *insn
= BB_END (bb
);
17058 bool insn_clobbered
= false;
17060 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17061 insn
= PREV_INSN (insn
);
17063 /* Find the last cbranchsi4_insn in basic block BB. */
17064 if (insn
== BB_HEAD (bb
)
17065 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17068 /* Get the register with which we are comparing. */
17069 pat
= PATTERN (insn
);
17070 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17072 /* Find the first flag setting insn before INSN in basic block BB. */
17073 gcc_assert (insn
!= BB_HEAD (bb
));
17074 for (prev
= PREV_INSN (insn
);
17076 && prev
!= BB_HEAD (bb
)
17078 || DEBUG_INSN_P (prev
)
17079 || ((set
= single_set (prev
)) != NULL
17080 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17081 prev
= PREV_INSN (prev
))
17083 if (reg_set_p (op0
, prev
))
17084 insn_clobbered
= true;
17087 /* Skip if op0 is clobbered by insn other than prev. */
17088 if (insn_clobbered
)
17094 dest
= SET_DEST (set
);
17095 src
= SET_SRC (set
);
17096 if (!low_register_operand (dest
, SImode
)
17097 || !low_register_operand (src
, SImode
))
17100 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17101 in INSN. Both src and dest of the move insn are checked. */
17102 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17104 dest
= copy_rtx (dest
);
17105 src
= copy_rtx (src
);
17106 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17107 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17108 INSN_CODE (prev
) = -1;
17109 /* Set test register in INSN to dest. */
17110 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17111 INSN_CODE (insn
) = -1;
17116 /* Convert instructions to their cc-clobbering variant if possible, since
17117 that allows us to use smaller encodings. */
17120 thumb2_reorg (void)
17125 INIT_REG_SET (&live
);
17127 /* We are freeing block_for_insn in the toplev to keep compatibility
17128 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17129 compute_bb_for_insn ();
17132 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17134 FOR_EACH_BB_FN (bb
, cfun
)
17136 if ((current_tune
->disparage_flag_setting_t16_encodings
17137 == tune_params::DISPARAGE_FLAGS_ALL
)
17138 && optimize_bb_for_speed_p (bb
))
17142 Convert_Action action
= SKIP
;
17143 Convert_Action action_for_partial_flag_setting
17144 = ((current_tune
->disparage_flag_setting_t16_encodings
17145 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17146 && optimize_bb_for_speed_p (bb
))
17149 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17150 df_simulate_initialize_backwards (bb
, &live
);
17151 FOR_BB_INSNS_REVERSE (bb
, insn
)
17153 if (NONJUMP_INSN_P (insn
)
17154 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17155 && GET_CODE (PATTERN (insn
)) == SET
)
17158 rtx pat
= PATTERN (insn
);
17159 rtx dst
= XEXP (pat
, 0);
17160 rtx src
= XEXP (pat
, 1);
17161 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17163 if (UNARY_P (src
) || BINARY_P (src
))
17164 op0
= XEXP (src
, 0);
17166 if (BINARY_P (src
))
17167 op1
= XEXP (src
, 1);
17169 if (low_register_operand (dst
, SImode
))
17171 switch (GET_CODE (src
))
17174 /* Adding two registers and storing the result
17175 in the first source is already a 16-bit
17177 if (rtx_equal_p (dst
, op0
)
17178 && register_operand (op1
, SImode
))
17181 if (low_register_operand (op0
, SImode
))
17183 /* ADDS <Rd>,<Rn>,<Rm> */
17184 if (low_register_operand (op1
, SImode
))
17186 /* ADDS <Rdn>,#<imm8> */
17187 /* SUBS <Rdn>,#<imm8> */
17188 else if (rtx_equal_p (dst
, op0
)
17189 && CONST_INT_P (op1
)
17190 && IN_RANGE (INTVAL (op1
), -255, 255))
17192 /* ADDS <Rd>,<Rn>,#<imm3> */
17193 /* SUBS <Rd>,<Rn>,#<imm3> */
17194 else if (CONST_INT_P (op1
)
17195 && IN_RANGE (INTVAL (op1
), -7, 7))
17198 /* ADCS <Rd>, <Rn> */
17199 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17200 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17201 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17203 && COMPARISON_P (op1
)
17204 && cc_register (XEXP (op1
, 0), VOIDmode
)
17205 && maybe_get_arm_condition_code (op1
) == ARM_CS
17206 && XEXP (op1
, 1) == const0_rtx
)
17211 /* RSBS <Rd>,<Rn>,#0
17212 Not handled here: see NEG below. */
17213 /* SUBS <Rd>,<Rn>,#<imm3>
17215 Not handled here: see PLUS above. */
17216 /* SUBS <Rd>,<Rn>,<Rm> */
17217 if (low_register_operand (op0
, SImode
)
17218 && low_register_operand (op1
, SImode
))
17223 /* MULS <Rdm>,<Rn>,<Rdm>
17224 As an exception to the rule, this is only used
17225 when optimizing for size since MULS is slow on all
17226 known implementations. We do not even want to use
17227 MULS in cold code, if optimizing for speed, so we
17228 test the global flag here. */
17229 if (!optimize_size
)
17231 /* else fall through. */
17235 /* ANDS <Rdn>,<Rm> */
17236 if (rtx_equal_p (dst
, op0
)
17237 && low_register_operand (op1
, SImode
))
17238 action
= action_for_partial_flag_setting
;
17239 else if (rtx_equal_p (dst
, op1
)
17240 && low_register_operand (op0
, SImode
))
17241 action
= action_for_partial_flag_setting
== SKIP
17242 ? SKIP
: SWAP_CONV
;
17248 /* ASRS <Rdn>,<Rm> */
17249 /* LSRS <Rdn>,<Rm> */
17250 /* LSLS <Rdn>,<Rm> */
17251 if (rtx_equal_p (dst
, op0
)
17252 && low_register_operand (op1
, SImode
))
17253 action
= action_for_partial_flag_setting
;
17254 /* ASRS <Rd>,<Rm>,#<imm5> */
17255 /* LSRS <Rd>,<Rm>,#<imm5> */
17256 /* LSLS <Rd>,<Rm>,#<imm5> */
17257 else if (low_register_operand (op0
, SImode
)
17258 && CONST_INT_P (op1
)
17259 && IN_RANGE (INTVAL (op1
), 0, 31))
17260 action
= action_for_partial_flag_setting
;
17264 /* RORS <Rdn>,<Rm> */
17265 if (rtx_equal_p (dst
, op0
)
17266 && low_register_operand (op1
, SImode
))
17267 action
= action_for_partial_flag_setting
;
17271 /* MVNS <Rd>,<Rm> */
17272 if (low_register_operand (op0
, SImode
))
17273 action
= action_for_partial_flag_setting
;
17277 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17278 if (low_register_operand (op0
, SImode
))
17283 /* MOVS <Rd>,#<imm8> */
17284 if (CONST_INT_P (src
)
17285 && IN_RANGE (INTVAL (src
), 0, 255))
17286 action
= action_for_partial_flag_setting
;
17290 /* MOVS and MOV<c> with registers have different
17291 encodings, so are not relevant here. */
17299 if (action
!= SKIP
)
17301 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17302 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17305 if (action
== SWAP_CONV
)
17307 src
= copy_rtx (src
);
17308 XEXP (src
, 0) = op1
;
17309 XEXP (src
, 1) = op0
;
17310 pat
= gen_rtx_SET (dst
, src
);
17311 vec
= gen_rtvec (2, pat
, clobber
);
17313 else /* action == CONV */
17314 vec
= gen_rtvec (2, pat
, clobber
);
17316 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17317 INSN_CODE (insn
) = -1;
17321 if (NONDEBUG_INSN_P (insn
))
17322 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17326 CLEAR_REG_SET (&live
);
17329 /* Gcc puts the pool in the wrong place for ARM, since we can only
17330 load addresses a limited distance around the pc. We do some
17331 special munging to move the constant pool values to the correct
17332 point in the code. */
17337 HOST_WIDE_INT address
= 0;
17342 else if (TARGET_THUMB2
)
17345 /* Ensure all insns that must be split have been split at this point.
17346 Otherwise, the pool placement code below may compute incorrect
17347 insn lengths. Note that when optimizing, all insns have already
17348 been split at this point. */
17350 split_all_insns_noflow ();
17352 minipool_fix_head
= minipool_fix_tail
= NULL
;
17354 /* The first insn must always be a note, or the code below won't
17355 scan it properly. */
17356 insn
= get_insns ();
17357 gcc_assert (NOTE_P (insn
));
17360 /* Scan all the insns and record the operands that will need fixing. */
17361 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17363 if (BARRIER_P (insn
))
17364 push_minipool_barrier (insn
, address
);
17365 else if (INSN_P (insn
))
17367 rtx_jump_table_data
*table
;
17369 note_invalid_constants (insn
, address
, true);
17370 address
+= get_attr_length (insn
);
17372 /* If the insn is a vector jump, add the size of the table
17373 and skip the table. */
17374 if (tablejump_p (insn
, NULL
, &table
))
17376 address
+= get_jump_table_size (table
);
17380 else if (LABEL_P (insn
))
17381 /* Add the worst-case padding due to alignment. We don't add
17382 the _current_ padding because the minipool insertions
17383 themselves might change it. */
17384 address
+= get_label_padding (insn
);
17387 fix
= minipool_fix_head
;
17389 /* Now scan the fixups and perform the required changes. */
17394 Mfix
* last_added_fix
;
17395 Mfix
* last_barrier
= NULL
;
17398 /* Skip any further barriers before the next fix. */
17399 while (fix
&& BARRIER_P (fix
->insn
))
17402 /* No more fixes. */
17406 last_added_fix
= NULL
;
17408 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17410 if (BARRIER_P (ftmp
->insn
))
17412 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17415 last_barrier
= ftmp
;
17417 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17420 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17423 /* If we found a barrier, drop back to that; any fixes that we
17424 could have reached but come after the barrier will now go in
17425 the next mini-pool. */
17426 if (last_barrier
!= NULL
)
17428 /* Reduce the refcount for those fixes that won't go into this
17430 for (fdel
= last_barrier
->next
;
17431 fdel
&& fdel
!= ftmp
;
17434 fdel
->minipool
->refcount
--;
17435 fdel
->minipool
= NULL
;
17438 ftmp
= last_barrier
;
17442 /* ftmp is first fix that we can't fit into this pool and
17443 there no natural barriers that we could use. Insert a
17444 new barrier in the code somewhere between the previous
17445 fix and this one, and arrange to jump around it. */
17446 HOST_WIDE_INT max_address
;
17448 /* The last item on the list of fixes must be a barrier, so
17449 we can never run off the end of the list of fixes without
17450 last_barrier being set. */
17453 max_address
= minipool_vector_head
->max_address
;
17454 /* Check that there isn't another fix that is in range that
17455 we couldn't fit into this pool because the pool was
17456 already too large: we need to put the pool before such an
17457 instruction. The pool itself may come just after the
17458 fix because create_fix_barrier also allows space for a
17459 jump instruction. */
17460 if (ftmp
->address
< max_address
)
17461 max_address
= ftmp
->address
+ 1;
17463 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17466 assign_minipool_offsets (last_barrier
);
17470 if (!BARRIER_P (ftmp
->insn
)
17471 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17478 /* Scan over the fixes we have identified for this pool, fixing them
17479 up and adding the constants to the pool itself. */
17480 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17481 this_fix
= this_fix
->next
)
17482 if (!BARRIER_P (this_fix
->insn
))
17485 = plus_constant (Pmode
,
17486 gen_rtx_LABEL_REF (VOIDmode
,
17487 minipool_vector_label
),
17488 this_fix
->minipool
->offset
);
17489 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17492 dump_minipool (last_barrier
->insn
);
17496 /* From now on we must synthesize any constants that we can't handle
17497 directly. This can happen if the RTL gets split during final
17498 instruction generation. */
17499 cfun
->machine
->after_arm_reorg
= 1;
17501 /* Free the minipool memory. */
17502 obstack_free (&minipool_obstack
, minipool_startobj
);
17505 /* Routines to output assembly language. */
17507 /* Return string representation of passed in real value. */
17508 static const char *
17509 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17511 if (!fp_consts_inited
)
17514 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17518 /* OPERANDS[0] is the entire list of insns that constitute pop,
17519 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17520 is in the list, UPDATE is true iff the list contains explicit
17521 update of base register. */
17523 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17529 const char *conditional
;
17530 int num_saves
= XVECLEN (operands
[0], 0);
17531 unsigned int regno
;
17532 unsigned int regno_base
= REGNO (operands
[1]);
17535 offset
+= update
? 1 : 0;
17536 offset
+= return_pc
? 1 : 0;
17538 /* Is the base register in the list? */
17539 for (i
= offset
; i
< num_saves
; i
++)
17541 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17542 /* If SP is in the list, then the base register must be SP. */
17543 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17544 /* If base register is in the list, there must be no explicit update. */
17545 if (regno
== regno_base
)
17546 gcc_assert (!update
);
17549 conditional
= reverse
? "%?%D0" : "%?%d0";
17550 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17552 /* Output pop (not stmfd) because it has a shorter encoding. */
17553 gcc_assert (update
);
17554 sprintf (pattern
, "pop%s\t{", conditional
);
17558 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17559 It's just a convention, their semantics are identical. */
17560 if (regno_base
== SP_REGNUM
)
17561 sprintf (pattern
, "ldm%sfd\t", conditional
);
17562 else if (TARGET_UNIFIED_ASM
)
17563 sprintf (pattern
, "ldmia%s\t", conditional
);
17565 sprintf (pattern
, "ldm%sia\t", conditional
);
17567 strcat (pattern
, reg_names
[regno_base
]);
17569 strcat (pattern
, "!, {");
17571 strcat (pattern
, ", {");
17574 /* Output the first destination register. */
17576 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17578 /* Output the rest of the destination registers. */
17579 for (i
= offset
+ 1; i
< num_saves
; i
++)
17581 strcat (pattern
, ", ");
17583 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17586 strcat (pattern
, "}");
17588 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17589 strcat (pattern
, "^");
17591 output_asm_insn (pattern
, &cond
);
17595 /* Output the assembly for a store multiple. */
17598 vfp_output_vstmd (rtx
* operands
)
17604 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17605 ? XEXP (operands
[0], 0)
17606 : XEXP (XEXP (operands
[0], 0), 0);
17607 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17610 strcpy (pattern
, "vpush%?.64\t{%P1");
17612 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17614 p
= strlen (pattern
);
17616 gcc_assert (REG_P (operands
[1]));
17618 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17619 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17621 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17623 strcpy (&pattern
[p
], "}");
17625 output_asm_insn (pattern
, operands
);
17630 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17631 number of bytes pushed. */
17634 vfp_emit_fstmd (int base_reg
, int count
)
17641 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17642 register pairs are stored by a store multiple insn. We avoid this
17643 by pushing an extra pair. */
17644 if (count
== 2 && !arm_arch6
)
17646 if (base_reg
== LAST_VFP_REGNUM
- 3)
17651 /* FSTMD may not store more than 16 doubleword registers at once. Split
17652 larger stores into multiple parts (up to a maximum of two, in
17657 /* NOTE: base_reg is an internal register number, so each D register
17659 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17660 saved
+= vfp_emit_fstmd (base_reg
, 16);
17664 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17665 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17667 reg
= gen_rtx_REG (DFmode
, base_reg
);
17670 XVECEXP (par
, 0, 0)
17671 = gen_rtx_SET (gen_frame_mem
17673 gen_rtx_PRE_MODIFY (Pmode
,
17676 (Pmode
, stack_pointer_rtx
,
17679 gen_rtx_UNSPEC (BLKmode
,
17680 gen_rtvec (1, reg
),
17681 UNSPEC_PUSH_MULT
));
17683 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17684 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17685 RTX_FRAME_RELATED_P (tmp
) = 1;
17686 XVECEXP (dwarf
, 0, 0) = tmp
;
17688 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17689 RTX_FRAME_RELATED_P (tmp
) = 1;
17690 XVECEXP (dwarf
, 0, 1) = tmp
;
17692 for (i
= 1; i
< count
; i
++)
17694 reg
= gen_rtx_REG (DFmode
, base_reg
);
17696 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17698 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17699 plus_constant (Pmode
,
17703 RTX_FRAME_RELATED_P (tmp
) = 1;
17704 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17707 par
= emit_insn (par
);
17708 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17709 RTX_FRAME_RELATED_P (par
) = 1;
17714 /* Emit a call instruction with pattern PAT. ADDR is the address of
17715 the call target. */
17718 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17722 insn
= emit_call_insn (pat
);
17724 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17725 If the call might use such an entry, add a use of the PIC register
17726 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17727 if (TARGET_VXWORKS_RTP
17730 && GET_CODE (addr
) == SYMBOL_REF
17731 && (SYMBOL_REF_DECL (addr
)
17732 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17733 : !SYMBOL_REF_LOCAL_P (addr
)))
17735 require_pic_register ();
17736 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17739 if (TARGET_AAPCS_BASED
)
17741 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17742 linker. We need to add an IP clobber to allow setting
17743 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17744 is not needed since it's a fixed register. */
17745 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17746 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17750 /* Output a 'call' insn. */
17752 output_call (rtx
*operands
)
17754 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17756 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17757 if (REGNO (operands
[0]) == LR_REGNUM
)
17759 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17760 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17763 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17765 if (TARGET_INTERWORK
|| arm_arch4t
)
17766 output_asm_insn ("bx%?\t%0", operands
);
17768 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17773 /* Output a 'call' insn that is a reference in memory. This is
17774 disabled for ARMv5 and we prefer a blx instead because otherwise
17775 there's a significant performance overhead. */
17777 output_call_mem (rtx
*operands
)
17779 gcc_assert (!arm_arch5
);
17780 if (TARGET_INTERWORK
)
17782 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17783 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17784 output_asm_insn ("bx%?\t%|ip", operands
);
17786 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17788 /* LR is used in the memory address. We load the address in the
17789 first instruction. It's safe to use IP as the target of the
17790 load since the call will kill it anyway. */
17791 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17792 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17794 output_asm_insn ("bx%?\t%|ip", operands
);
17796 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17800 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17801 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17808 /* Output a move from arm registers to arm registers of a long double
17809 OPERANDS[0] is the destination.
17810 OPERANDS[1] is the source. */
17812 output_mov_long_double_arm_from_arm (rtx
*operands
)
17814 /* We have to be careful here because the two might overlap. */
17815 int dest_start
= REGNO (operands
[0]);
17816 int src_start
= REGNO (operands
[1]);
17820 if (dest_start
< src_start
)
17822 for (i
= 0; i
< 3; i
++)
17824 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17825 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17826 output_asm_insn ("mov%?\t%0, %1", ops
);
17831 for (i
= 2; i
>= 0; i
--)
17833 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17834 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17835 output_asm_insn ("mov%?\t%0, %1", ops
);
17843 arm_emit_movpair (rtx dest
, rtx src
)
17845 /* If the src is an immediate, simplify it. */
17846 if (CONST_INT_P (src
))
17848 HOST_WIDE_INT val
= INTVAL (src
);
17849 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17850 if ((val
>> 16) & 0x0000ffff)
17851 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17853 GEN_INT ((val
>> 16) & 0x0000ffff));
17856 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17857 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17860 /* Output a move between double words. It must be REG<-MEM
17863 output_move_double (rtx
*operands
, bool emit
, int *count
)
17865 enum rtx_code code0
= GET_CODE (operands
[0]);
17866 enum rtx_code code1
= GET_CODE (operands
[1]);
17871 /* The only case when this might happen is when
17872 you are looking at the length of a DImode instruction
17873 that has an invalid constant in it. */
17874 if (code0
== REG
&& code1
!= MEM
)
17876 gcc_assert (!emit
);
17883 unsigned int reg0
= REGNO (operands
[0]);
17885 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17887 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17889 switch (GET_CODE (XEXP (operands
[1], 0)))
17896 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17897 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17899 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17904 gcc_assert (TARGET_LDRD
);
17906 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17913 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17915 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17923 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17925 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17930 gcc_assert (TARGET_LDRD
);
17932 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17937 /* Autoicrement addressing modes should never have overlapping
17938 base and destination registers, and overlapping index registers
17939 are already prohibited, so this doesn't need to worry about
17941 otherops
[0] = operands
[0];
17942 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17943 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17945 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17947 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17949 /* Registers overlap so split out the increment. */
17952 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17953 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17960 /* Use a single insn if we can.
17961 FIXME: IWMMXT allows offsets larger than ldrd can
17962 handle, fix these up with a pair of ldr. */
17964 || !CONST_INT_P (otherops
[2])
17965 || (INTVAL (otherops
[2]) > -256
17966 && INTVAL (otherops
[2]) < 256))
17969 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17975 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17986 /* Use a single insn if we can.
17987 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17988 fix these up with a pair of ldr. */
17990 || !CONST_INT_P (otherops
[2])
17991 || (INTVAL (otherops
[2]) > -256
17992 && INTVAL (otherops
[2]) < 256))
17995 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18001 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18002 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18012 /* We might be able to use ldrd %0, %1 here. However the range is
18013 different to ldr/adr, and it is broken on some ARMv7-M
18014 implementations. */
18015 /* Use the second register of the pair to avoid problematic
18017 otherops
[1] = operands
[1];
18019 output_asm_insn ("adr%?\t%0, %1", otherops
);
18020 operands
[1] = otherops
[0];
18024 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18026 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18033 /* ??? This needs checking for thumb2. */
18035 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18036 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18038 otherops
[0] = operands
[0];
18039 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18040 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18042 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18044 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18046 switch ((int) INTVAL (otherops
[2]))
18050 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18056 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18062 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18066 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18067 operands
[1] = otherops
[0];
18069 && (REG_P (otherops
[2])
18071 || (CONST_INT_P (otherops
[2])
18072 && INTVAL (otherops
[2]) > -256
18073 && INTVAL (otherops
[2]) < 256)))
18075 if (reg_overlap_mentioned_p (operands
[0],
18078 /* Swap base and index registers over to
18079 avoid a conflict. */
18080 std::swap (otherops
[1], otherops
[2]);
18082 /* If both registers conflict, it will usually
18083 have been fixed by a splitter. */
18084 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18085 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18089 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18090 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18097 otherops
[0] = operands
[0];
18099 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18104 if (CONST_INT_P (otherops
[2]))
18108 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18109 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18111 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18117 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18123 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18130 return "ldr%(d%)\t%0, [%1]";
18132 return "ldm%(ia%)\t%1, %M0";
18136 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18137 /* Take care of overlapping base/data reg. */
18138 if (reg_mentioned_p (operands
[0], operands
[1]))
18142 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18143 output_asm_insn ("ldr%?\t%0, %1", operands
);
18153 output_asm_insn ("ldr%?\t%0, %1", operands
);
18154 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18164 /* Constraints should ensure this. */
18165 gcc_assert (code0
== MEM
&& code1
== REG
);
18166 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18167 || (TARGET_ARM
&& TARGET_LDRD
));
18169 switch (GET_CODE (XEXP (operands
[0], 0)))
18175 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18177 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18182 gcc_assert (TARGET_LDRD
);
18184 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18191 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18193 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18201 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18203 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18208 gcc_assert (TARGET_LDRD
);
18210 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18215 otherops
[0] = operands
[1];
18216 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18217 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18219 /* IWMMXT allows offsets larger than ldrd can handle,
18220 fix these up with a pair of ldr. */
18222 && CONST_INT_P (otherops
[2])
18223 && (INTVAL(otherops
[2]) <= -256
18224 || INTVAL(otherops
[2]) >= 256))
18226 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18230 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18231 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18240 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18241 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18247 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18250 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18255 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18260 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18261 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18263 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18267 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18274 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18281 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18286 && (REG_P (otherops
[2])
18288 || (CONST_INT_P (otherops
[2])
18289 && INTVAL (otherops
[2]) > -256
18290 && INTVAL (otherops
[2]) < 256)))
18292 otherops
[0] = operands
[1];
18293 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18295 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18301 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18302 otherops
[1] = operands
[1];
18305 output_asm_insn ("str%?\t%1, %0", operands
);
18306 output_asm_insn ("str%?\t%H1, %0", otherops
);
18316 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18317 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18320 output_move_quad (rtx
*operands
)
18322 if (REG_P (operands
[0]))
18324 /* Load, or reg->reg move. */
18326 if (MEM_P (operands
[1]))
18328 switch (GET_CODE (XEXP (operands
[1], 0)))
18331 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18336 output_asm_insn ("adr%?\t%0, %1", operands
);
18337 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18341 gcc_unreachable ();
18349 gcc_assert (REG_P (operands
[1]));
18351 dest
= REGNO (operands
[0]);
18352 src
= REGNO (operands
[1]);
18354 /* This seems pretty dumb, but hopefully GCC won't try to do it
18357 for (i
= 0; i
< 4; i
++)
18359 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18360 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18361 output_asm_insn ("mov%?\t%0, %1", ops
);
18364 for (i
= 3; i
>= 0; i
--)
18366 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18367 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18368 output_asm_insn ("mov%?\t%0, %1", ops
);
18374 gcc_assert (MEM_P (operands
[0]));
18375 gcc_assert (REG_P (operands
[1]));
18376 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18378 switch (GET_CODE (XEXP (operands
[0], 0)))
18381 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18385 gcc_unreachable ();
18392 /* Output a VFP load or store instruction. */
18395 output_move_vfp (rtx
*operands
)
18397 rtx reg
, mem
, addr
, ops
[2];
18398 int load
= REG_P (operands
[0]);
18399 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18400 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18405 reg
= operands
[!load
];
18406 mem
= operands
[load
];
18408 mode
= GET_MODE (reg
);
18410 gcc_assert (REG_P (reg
));
18411 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18412 gcc_assert (mode
== SFmode
18416 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18417 gcc_assert (MEM_P (mem
));
18419 addr
= XEXP (mem
, 0);
18421 switch (GET_CODE (addr
))
18424 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18425 ops
[0] = XEXP (addr
, 0);
18430 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18431 ops
[0] = XEXP (addr
, 0);
18436 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18442 sprintf (buff
, templ
,
18443 load
? "ld" : "st",
18446 integer_p
? "\t%@ int" : "");
18447 output_asm_insn (buff
, ops
);
18452 /* Output a Neon double-word or quad-word load or store, or a load
18453 or store for larger structure modes.
18455 WARNING: The ordering of elements is weird in big-endian mode,
18456 because the EABI requires that vectors stored in memory appear
18457 as though they were stored by a VSTM, as required by the EABI.
18458 GCC RTL defines element ordering based on in-memory order.
18459 This can be different from the architectural ordering of elements
18460 within a NEON register. The intrinsics defined in arm_neon.h use the
18461 NEON register element ordering, not the GCC RTL element ordering.
18463 For example, the in-memory ordering of a big-endian a quadword
18464 vector with 16-bit elements when stored from register pair {d0,d1}
18465 will be (lowest address first, d0[N] is NEON register element N):
18467 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18469 When necessary, quadword registers (dN, dN+1) are moved to ARM
18470 registers from rN in the order:
18472 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18474 So that STM/LDM can be used on vectors in ARM registers, and the
18475 same memory layout will result as if VSTM/VLDM were used.
18477 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18478 possible, which allows use of appropriate alignment tags.
18479 Note that the choice of "64" is independent of the actual vector
18480 element size; this size simply ensures that the behavior is
18481 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18483 Due to limitations of those instructions, use of VST1.64/VLD1.64
18484 is not possible if:
18485 - the address contains PRE_DEC, or
18486 - the mode refers to more than 4 double-word registers
18488 In those cases, it would be possible to replace VSTM/VLDM by a
18489 sequence of instructions; this is not currently implemented since
18490 this is not certain to actually improve performance. */
18493 output_move_neon (rtx
*operands
)
18495 rtx reg
, mem
, addr
, ops
[2];
18496 int regno
, nregs
, load
= REG_P (operands
[0]);
18501 reg
= operands
[!load
];
18502 mem
= operands
[load
];
18504 mode
= GET_MODE (reg
);
18506 gcc_assert (REG_P (reg
));
18507 regno
= REGNO (reg
);
18508 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18509 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18510 || NEON_REGNO_OK_FOR_QUAD (regno
));
18511 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18512 || VALID_NEON_QREG_MODE (mode
)
18513 || VALID_NEON_STRUCT_MODE (mode
));
18514 gcc_assert (MEM_P (mem
));
18516 addr
= XEXP (mem
, 0);
18518 /* Strip off const from addresses like (const (plus (...))). */
18519 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18520 addr
= XEXP (addr
, 0);
18522 switch (GET_CODE (addr
))
18525 /* We have to use vldm / vstm for too-large modes. */
18528 templ
= "v%smia%%?\t%%0!, %%h1";
18529 ops
[0] = XEXP (addr
, 0);
18533 templ
= "v%s1.64\t%%h1, %%A0";
18540 /* We have to use vldm / vstm in this case, since there is no
18541 pre-decrement form of the vld1 / vst1 instructions. */
18542 templ
= "v%smdb%%?\t%%0!, %%h1";
18543 ops
[0] = XEXP (addr
, 0);
18548 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18549 gcc_unreachable ();
18552 /* We have to use vldm / vstm for too-large modes. */
18556 templ
= "v%smia%%?\t%%m0, %%h1";
18558 templ
= "v%s1.64\t%%h1, %%A0";
18564 /* Fall through. */
18570 for (i
= 0; i
< nregs
; i
++)
18572 /* We're only using DImode here because it's a convenient size. */
18573 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18574 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18575 if (reg_overlap_mentioned_p (ops
[0], mem
))
18577 gcc_assert (overlap
== -1);
18582 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18583 output_asm_insn (buff
, ops
);
18588 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18589 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18590 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18591 output_asm_insn (buff
, ops
);
18598 gcc_unreachable ();
18601 sprintf (buff
, templ
, load
? "ld" : "st");
18602 output_asm_insn (buff
, ops
);
18607 /* Compute and return the length of neon_mov<mode>, where <mode> is
18608 one of VSTRUCT modes: EI, OI, CI or XI. */
18610 arm_attr_length_move_neon (rtx_insn
*insn
)
18612 rtx reg
, mem
, addr
;
18616 extract_insn_cached (insn
);
18618 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18620 mode
= GET_MODE (recog_data
.operand
[0]);
18631 gcc_unreachable ();
18635 load
= REG_P (recog_data
.operand
[0]);
18636 reg
= recog_data
.operand
[!load
];
18637 mem
= recog_data
.operand
[load
];
18639 gcc_assert (MEM_P (mem
));
18641 mode
= GET_MODE (reg
);
18642 addr
= XEXP (mem
, 0);
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18646 addr
= XEXP (addr
, 0);
18648 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18650 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18657 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18661 arm_address_offset_is_imm (rtx_insn
*insn
)
18665 extract_insn_cached (insn
);
18667 if (REG_P (recog_data
.operand
[0]))
18670 mem
= recog_data
.operand
[0];
18672 gcc_assert (MEM_P (mem
));
18674 addr
= XEXP (mem
, 0);
18677 || (GET_CODE (addr
) == PLUS
18678 && REG_P (XEXP (addr
, 0))
18679 && CONST_INT_P (XEXP (addr
, 1))))
18685 /* Output an ADD r, s, #n where n may be too big for one instruction.
18686 If adding zero to one register, output nothing. */
18688 output_add_immediate (rtx
*operands
)
18690 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18692 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18695 output_multi_immediate (operands
,
18696 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18699 output_multi_immediate (operands
,
18700 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18707 /* Output a multiple immediate operation.
18708 OPERANDS is the vector of operands referred to in the output patterns.
18709 INSTR1 is the output pattern to use for the first constant.
18710 INSTR2 is the output pattern to use for subsequent constants.
18711 IMMED_OP is the index of the constant slot in OPERANDS.
18712 N is the constant value. */
18713 static const char *
18714 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18715 int immed_op
, HOST_WIDE_INT n
)
18717 #if HOST_BITS_PER_WIDE_INT > 32
18723 /* Quick and easy output. */
18724 operands
[immed_op
] = const0_rtx
;
18725 output_asm_insn (instr1
, operands
);
18730 const char * instr
= instr1
;
18732 /* Note that n is never zero here (which would give no output). */
18733 for (i
= 0; i
< 32; i
+= 2)
18737 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18738 output_asm_insn (instr
, operands
);
18748 /* Return the name of a shifter operation. */
18749 static const char *
18750 arm_shift_nmem(enum rtx_code code
)
18755 return ARM_LSL_NAME
;
18771 /* Return the appropriate ARM instruction for the operation code.
18772 The returned result should not be overwritten. OP is the rtx of the
18773 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18776 arithmetic_instr (rtx op
, int shift_first_arg
)
18778 switch (GET_CODE (op
))
18784 return shift_first_arg
? "rsb" : "sub";
18799 return arm_shift_nmem(GET_CODE(op
));
18802 gcc_unreachable ();
18806 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18807 for the operation code. The returned result should not be overwritten.
18808 OP is the rtx code of the shift.
18809 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18811 static const char *
18812 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18815 enum rtx_code code
= GET_CODE (op
);
18820 if (!CONST_INT_P (XEXP (op
, 1)))
18822 output_operand_lossage ("invalid shift operand");
18827 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18835 mnem
= arm_shift_nmem(code
);
18836 if (CONST_INT_P (XEXP (op
, 1)))
18838 *amountp
= INTVAL (XEXP (op
, 1));
18840 else if (REG_P (XEXP (op
, 1)))
18847 output_operand_lossage ("invalid shift operand");
18853 /* We never have to worry about the amount being other than a
18854 power of 2, since this case can never be reloaded from a reg. */
18855 if (!CONST_INT_P (XEXP (op
, 1)))
18857 output_operand_lossage ("invalid shift operand");
18861 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18863 /* Amount must be a power of two. */
18864 if (*amountp
& (*amountp
- 1))
18866 output_operand_lossage ("invalid shift operand");
18870 *amountp
= int_log2 (*amountp
);
18871 return ARM_LSL_NAME
;
18874 output_operand_lossage ("invalid shift operand");
18878 /* This is not 100% correct, but follows from the desire to merge
18879 multiplication by a power of 2 with the recognizer for a
18880 shift. >=32 is not a valid shift for "lsl", so we must try and
18881 output a shift that produces the correct arithmetical result.
18882 Using lsr #32 is identical except for the fact that the carry bit
18883 is not set correctly if we set the flags; but we never use the
18884 carry bit from such an operation, so we can ignore that. */
18885 if (code
== ROTATERT
)
18886 /* Rotate is just modulo 32. */
18888 else if (*amountp
!= (*amountp
& 31))
18890 if (code
== ASHIFT
)
18895 /* Shifts of 0 are no-ops. */
18902 /* Obtain the shift from the POWER of two. */
18904 static HOST_WIDE_INT
18905 int_log2 (HOST_WIDE_INT power
)
18907 HOST_WIDE_INT shift
= 0;
18909 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18911 gcc_assert (shift
<= 31);
18918 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18919 because /bin/as is horribly restrictive. The judgement about
18920 whether or not each character is 'printable' (and can be output as
18921 is) or not (and must be printed with an octal escape) must be made
18922 with reference to the *host* character set -- the situation is
18923 similar to that discussed in the comments above pp_c_char in
18924 c-pretty-print.c. */
18926 #define MAX_ASCII_LEN 51
18929 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18932 int len_so_far
= 0;
18934 fputs ("\t.ascii\t\"", stream
);
18936 for (i
= 0; i
< len
; i
++)
18940 if (len_so_far
>= MAX_ASCII_LEN
)
18942 fputs ("\"\n\t.ascii\t\"", stream
);
18948 if (c
== '\\' || c
== '\"')
18950 putc ('\\', stream
);
18958 fprintf (stream
, "\\%03o", c
);
18963 fputs ("\"\n", stream
);
18966 /* Whether a register is callee saved or not. This is necessary because high
18967 registers are marked as caller saved when optimizing for size on Thumb-1
18968 targets despite being callee saved in order to avoid using them. */
18969 #define callee_saved_reg_p(reg) \
18970 (!call_used_regs[reg] \
18971 || (TARGET_THUMB1 && optimize_size \
18972 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18974 /* Compute the register save mask for registers 0 through 12
18975 inclusive. This code is used by arm_compute_save_reg_mask. */
18977 static unsigned long
18978 arm_compute_save_reg0_reg12_mask (void)
18980 unsigned long func_type
= arm_current_func_type ();
18981 unsigned long save_reg_mask
= 0;
18984 if (IS_INTERRUPT (func_type
))
18986 unsigned int max_reg
;
18987 /* Interrupt functions must not corrupt any registers,
18988 even call clobbered ones. If this is a leaf function
18989 we can just examine the registers used by the RTL, but
18990 otherwise we have to assume that whatever function is
18991 called might clobber anything, and so we have to save
18992 all the call-clobbered registers as well. */
18993 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18994 /* FIQ handlers have registers r8 - r12 banked, so
18995 we only need to check r0 - r7, Normal ISRs only
18996 bank r14 and r15, so we must check up to r12.
18997 r13 is the stack pointer which is always preserved,
18998 so we do not need to consider it here. */
19003 for (reg
= 0; reg
<= max_reg
; reg
++)
19004 if (df_regs_ever_live_p (reg
)
19005 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19006 save_reg_mask
|= (1 << reg
);
19008 /* Also save the pic base register if necessary. */
19010 && !TARGET_SINGLE_PIC_BASE
19011 && arm_pic_register
!= INVALID_REGNUM
19012 && crtl
->uses_pic_offset_table
)
19013 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19015 else if (IS_VOLATILE(func_type
))
19017 /* For noreturn functions we historically omitted register saves
19018 altogether. However this really messes up debugging. As a
19019 compromise save just the frame pointers. Combined with the link
19020 register saved elsewhere this should be sufficient to get
19022 if (frame_pointer_needed
)
19023 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19024 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19025 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19026 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19027 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19031 /* In the normal case we only need to save those registers
19032 which are call saved and which are used by this function. */
19033 for (reg
= 0; reg
<= 11; reg
++)
19034 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19035 save_reg_mask
|= (1 << reg
);
19037 /* Handle the frame pointer as a special case. */
19038 if (frame_pointer_needed
)
19039 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19041 /* If we aren't loading the PIC register,
19042 don't stack it even though it may be live. */
19044 && !TARGET_SINGLE_PIC_BASE
19045 && arm_pic_register
!= INVALID_REGNUM
19046 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19047 || crtl
->uses_pic_offset_table
))
19048 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19050 /* The prologue will copy SP into R0, so save it. */
19051 if (IS_STACKALIGN (func_type
))
19052 save_reg_mask
|= 1;
19055 /* Save registers so the exception handler can modify them. */
19056 if (crtl
->calls_eh_return
)
19062 reg
= EH_RETURN_DATA_REGNO (i
);
19063 if (reg
== INVALID_REGNUM
)
19065 save_reg_mask
|= 1 << reg
;
19069 return save_reg_mask
;
19072 /* Return true if r3 is live at the start of the function. */
19075 arm_r3_live_at_start_p (void)
19077 /* Just look at cfg info, which is still close enough to correct at this
19078 point. This gives false positives for broken functions that might use
19079 uninitialized data that happens to be allocated in r3, but who cares? */
19080 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19083 /* Compute the number of bytes used to store the static chain register on the
19084 stack, above the stack frame. We need to know this accurately to get the
19085 alignment of the rest of the stack frame correct. */
19088 arm_compute_static_chain_stack_bytes (void)
19090 /* See the defining assertion in arm_expand_prologue. */
19091 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19092 && IS_NESTED (arm_current_func_type ())
19093 && arm_r3_live_at_start_p ()
19094 && crtl
->args
.pretend_args_size
== 0)
19100 /* Compute a bit mask of which registers need to be
19101 saved on the stack for the current function.
19102 This is used by arm_get_frame_offsets, which may add extra registers. */
19104 static unsigned long
19105 arm_compute_save_reg_mask (void)
19107 unsigned int save_reg_mask
= 0;
19108 unsigned long func_type
= arm_current_func_type ();
19111 if (IS_NAKED (func_type
))
19112 /* This should never really happen. */
19115 /* If we are creating a stack frame, then we must save the frame pointer,
19116 IP (which will hold the old stack pointer), LR and the PC. */
19117 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19119 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19122 | (1 << PC_REGNUM
);
19124 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19126 /* Decide if we need to save the link register.
19127 Interrupt routines have their own banked link register,
19128 so they never need to save it.
19129 Otherwise if we do not use the link register we do not need to save
19130 it. If we are pushing other registers onto the stack however, we
19131 can save an instruction in the epilogue by pushing the link register
19132 now and then popping it back into the PC. This incurs extra memory
19133 accesses though, so we only do it when optimizing for size, and only
19134 if we know that we will not need a fancy return sequence. */
19135 if (df_regs_ever_live_p (LR_REGNUM
)
19138 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19139 && !crtl
->tail_call_emit
19140 && !crtl
->calls_eh_return
))
19141 save_reg_mask
|= 1 << LR_REGNUM
;
19143 if (cfun
->machine
->lr_save_eliminated
)
19144 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19146 if (TARGET_REALLY_IWMMXT
19147 && ((bit_count (save_reg_mask
)
19148 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19149 arm_compute_static_chain_stack_bytes())
19152 /* The total number of registers that are going to be pushed
19153 onto the stack is odd. We need to ensure that the stack
19154 is 64-bit aligned before we start to save iWMMXt registers,
19155 and also before we start to create locals. (A local variable
19156 might be a double or long long which we will load/store using
19157 an iWMMXt instruction). Therefore we need to push another
19158 ARM register, so that the stack will be 64-bit aligned. We
19159 try to avoid using the arg registers (r0 -r3) as they might be
19160 used to pass values in a tail call. */
19161 for (reg
= 4; reg
<= 12; reg
++)
19162 if ((save_reg_mask
& (1 << reg
)) == 0)
19166 save_reg_mask
|= (1 << reg
);
19169 cfun
->machine
->sibcall_blocked
= 1;
19170 save_reg_mask
|= (1 << 3);
19174 /* We may need to push an additional register for use initializing the
19175 PIC base register. */
19176 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19177 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19179 reg
= thumb_find_work_register (1 << 4);
19180 if (!call_used_regs
[reg
])
19181 save_reg_mask
|= (1 << reg
);
19184 return save_reg_mask
;
19188 /* Compute a bit mask of which registers need to be
19189 saved on the stack for the current function. */
19190 static unsigned long
19191 thumb1_compute_save_reg_mask (void)
19193 unsigned long mask
;
19197 for (reg
= 0; reg
< 12; reg
++)
19198 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19202 && !TARGET_SINGLE_PIC_BASE
19203 && arm_pic_register
!= INVALID_REGNUM
19204 && crtl
->uses_pic_offset_table
)
19205 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19207 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19208 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19209 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19211 /* LR will also be pushed if any lo regs are pushed. */
19212 if (mask
& 0xff || thumb_force_lr_save ())
19213 mask
|= (1 << LR_REGNUM
);
19215 /* Make sure we have a low work register if we need one.
19216 We will need one if we are going to push a high register,
19217 but we are not currently intending to push a low register. */
19218 if ((mask
& 0xff) == 0
19219 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19221 /* Use thumb_find_work_register to choose which register
19222 we will use. If the register is live then we will
19223 have to push it. Use LAST_LO_REGNUM as our fallback
19224 choice for the register to select. */
19225 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19226 /* Make sure the register returned by thumb_find_work_register is
19227 not part of the return value. */
19228 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19229 reg
= LAST_LO_REGNUM
;
19231 if (callee_saved_reg_p (reg
))
19235 /* The 504 below is 8 bytes less than 512 because there are two possible
19236 alignment words. We can't tell here if they will be present or not so we
19237 have to play it safe and assume that they are. */
19238 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19239 ROUND_UP_WORD (get_frame_size ()) +
19240 crtl
->outgoing_args_size
) >= 504)
19242 /* This is the same as the code in thumb1_expand_prologue() which
19243 determines which register to use for stack decrement. */
19244 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19245 if (mask
& (1 << reg
))
19248 if (reg
> LAST_LO_REGNUM
)
19250 /* Make sure we have a register available for stack decrement. */
19251 mask
|= 1 << LAST_LO_REGNUM
;
19259 /* Return the number of bytes required to save VFP registers. */
19261 arm_get_vfp_saved_size (void)
19263 unsigned int regno
;
19268 /* Space for saved VFP registers. */
19269 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19272 for (regno
= FIRST_VFP_REGNUM
;
19273 regno
< LAST_VFP_REGNUM
;
19276 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19277 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19281 /* Workaround ARM10 VFPr1 bug. */
19282 if (count
== 2 && !arm_arch6
)
19284 saved
+= count
* 8;
19293 if (count
== 2 && !arm_arch6
)
19295 saved
+= count
* 8;
19302 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19303 everything bar the final return instruction. If simple_return is true,
19304 then do not output epilogue, because it has already been emitted in RTL. */
19306 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19307 bool simple_return
)
19309 char conditional
[10];
19312 unsigned long live_regs_mask
;
19313 unsigned long func_type
;
19314 arm_stack_offsets
*offsets
;
19316 func_type
= arm_current_func_type ();
19318 if (IS_NAKED (func_type
))
19321 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19323 /* If this function was declared non-returning, and we have
19324 found a tail call, then we have to trust that the called
19325 function won't return. */
19330 /* Otherwise, trap an attempted return by aborting. */
19332 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19334 assemble_external_libcall (ops
[1]);
19335 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19341 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19343 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19345 cfun
->machine
->return_used_this_function
= 1;
19347 offsets
= arm_get_frame_offsets ();
19348 live_regs_mask
= offsets
->saved_regs_mask
;
19350 if (!simple_return
&& live_regs_mask
)
19352 const char * return_reg
;
19354 /* If we do not have any special requirements for function exit
19355 (e.g. interworking) then we can load the return address
19356 directly into the PC. Otherwise we must load it into LR. */
19358 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19359 return_reg
= reg_names
[PC_REGNUM
];
19361 return_reg
= reg_names
[LR_REGNUM
];
19363 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19365 /* There are three possible reasons for the IP register
19366 being saved. 1) a stack frame was created, in which case
19367 IP contains the old stack pointer, or 2) an ISR routine
19368 corrupted it, or 3) it was saved to align the stack on
19369 iWMMXt. In case 1, restore IP into SP, otherwise just
19371 if (frame_pointer_needed
)
19373 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19374 live_regs_mask
|= (1 << SP_REGNUM
);
19377 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19380 /* On some ARM architectures it is faster to use LDR rather than
19381 LDM to load a single register. On other architectures, the
19382 cost is the same. In 26 bit mode, or for exception handlers,
19383 we have to use LDM to load the PC so that the CPSR is also
19385 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19386 if (live_regs_mask
== (1U << reg
))
19389 if (reg
<= LAST_ARM_REGNUM
19390 && (reg
!= LR_REGNUM
19392 || ! IS_INTERRUPT (func_type
)))
19394 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19395 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19402 /* Generate the load multiple instruction to restore the
19403 registers. Note we can get here, even if
19404 frame_pointer_needed is true, but only if sp already
19405 points to the base of the saved core registers. */
19406 if (live_regs_mask
& (1 << SP_REGNUM
))
19408 unsigned HOST_WIDE_INT stack_adjust
;
19410 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19411 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19413 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19414 if (TARGET_UNIFIED_ASM
)
19415 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19417 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19420 /* If we can't use ldmib (SA110 bug),
19421 then try to pop r3 instead. */
19423 live_regs_mask
|= 1 << 3;
19425 if (TARGET_UNIFIED_ASM
)
19426 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19428 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19432 if (TARGET_UNIFIED_ASM
)
19433 sprintf (instr
, "pop%s\t{", conditional
);
19435 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19437 p
= instr
+ strlen (instr
);
19439 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19440 if (live_regs_mask
& (1 << reg
))
19442 int l
= strlen (reg_names
[reg
]);
19448 memcpy (p
, ", ", 2);
19452 memcpy (p
, "%|", 2);
19453 memcpy (p
+ 2, reg_names
[reg
], l
);
19457 if (live_regs_mask
& (1 << LR_REGNUM
))
19459 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19460 /* If returning from an interrupt, restore the CPSR. */
19461 if (IS_INTERRUPT (func_type
))
19468 output_asm_insn (instr
, & operand
);
19470 /* See if we need to generate an extra instruction to
19471 perform the actual function return. */
19473 && func_type
!= ARM_FT_INTERWORKED
19474 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19476 /* The return has already been handled
19477 by loading the LR into the PC. */
19484 switch ((int) ARM_FUNC_TYPE (func_type
))
19488 /* ??? This is wrong for unified assembly syntax. */
19489 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19492 case ARM_FT_INTERWORKED
:
19493 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19496 case ARM_FT_EXCEPTION
:
19497 /* ??? This is wrong for unified assembly syntax. */
19498 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19502 /* Use bx if it's available. */
19503 if (arm_arch5
|| arm_arch4t
)
19504 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19506 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19510 output_asm_insn (instr
, & operand
);
19516 /* Write the function name into the code section, directly preceding
19517 the function prologue.
19519 Code will be output similar to this:
19521 .ascii "arm_poke_function_name", 0
19524 .word 0xff000000 + (t1 - t0)
19525 arm_poke_function_name
19527 stmfd sp!, {fp, ip, lr, pc}
19530 When performing a stack backtrace, code can inspect the value
19531 of 'pc' stored at 'fp' + 0. If the trace function then looks
19532 at location pc - 12 and the top 8 bits are set, then we know
19533 that there is a function name embedded immediately preceding this
19534 location and has length ((pc[-3]) & 0xff000000).
19536 We assume that pc is declared as a pointer to an unsigned long.
19538 It is of no benefit to output the function name if we are assembling
19539 a leaf function. These function types will not contain a stack
19540 backtrace structure, therefore it is not possible to determine the
19543 arm_poke_function_name (FILE *stream
, const char *name
)
19545 unsigned long alignlength
;
19546 unsigned long length
;
19549 length
= strlen (name
) + 1;
19550 alignlength
= ROUND_UP_WORD (length
);
19552 ASM_OUTPUT_ASCII (stream
, name
, length
);
19553 ASM_OUTPUT_ALIGN (stream
, 2);
19554 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19555 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19558 /* Place some comments into the assembler stream
19559 describing the current function. */
19561 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19563 unsigned long func_type
;
19565 /* ??? Do we want to print some of the below anyway? */
19569 /* Sanity check. */
19570 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19572 func_type
= arm_current_func_type ();
19574 switch ((int) ARM_FUNC_TYPE (func_type
))
19577 case ARM_FT_NORMAL
:
19579 case ARM_FT_INTERWORKED
:
19580 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19583 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19586 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19588 case ARM_FT_EXCEPTION
:
19589 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19593 if (IS_NAKED (func_type
))
19594 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19596 if (IS_VOLATILE (func_type
))
19597 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19599 if (IS_NESTED (func_type
))
19600 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19601 if (IS_STACKALIGN (func_type
))
19602 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19604 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19606 crtl
->args
.pretend_args_size
, frame_size
);
19608 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19609 frame_pointer_needed
,
19610 cfun
->machine
->uses_anonymous_args
);
19612 if (cfun
->machine
->lr_save_eliminated
)
19613 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19615 if (crtl
->calls_eh_return
)
19616 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19621 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19622 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19624 arm_stack_offsets
*offsets
;
19630 /* Emit any call-via-reg trampolines that are needed for v4t support
19631 of call_reg and call_value_reg type insns. */
19632 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19634 rtx label
= cfun
->machine
->call_via
[regno
];
19638 switch_to_section (function_section (current_function_decl
));
19639 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19640 CODE_LABEL_NUMBER (label
));
19641 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19645 /* ??? Probably not safe to set this here, since it assumes that a
19646 function will be emitted as assembly immediately after we generate
19647 RTL for it. This does not happen for inline functions. */
19648 cfun
->machine
->return_used_this_function
= 0;
19650 else /* TARGET_32BIT */
19652 /* We need to take into account any stack-frame rounding. */
19653 offsets
= arm_get_frame_offsets ();
19655 gcc_assert (!use_return_insn (FALSE
, NULL
)
19656 || (cfun
->machine
->return_used_this_function
!= 0)
19657 || offsets
->saved_regs
== offsets
->outgoing_args
19658 || frame_pointer_needed
);
19662 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19663 STR and STRD. If an even number of registers are being pushed, one
19664 or more STRD patterns are created for each register pair. If an
19665 odd number of registers are pushed, emit an initial STR followed by
19666 as many STRD instructions as are needed. This works best when the
19667 stack is initially 64-bit aligned (the normal case), since it
19668 ensures that each STRD is also 64-bit aligned. */
19670 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19675 rtx par
= NULL_RTX
;
19676 rtx dwarf
= NULL_RTX
;
19680 num_regs
= bit_count (saved_regs_mask
);
19682 /* Must be at least one register to save, and can't save SP or PC. */
19683 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19684 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19685 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19687 /* Create sequence for DWARF info. All the frame-related data for
19688 debugging is held in this wrapper. */
19689 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19691 /* Describe the stack adjustment. */
19692 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19693 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19694 RTX_FRAME_RELATED_P (tmp
) = 1;
19695 XVECEXP (dwarf
, 0, 0) = tmp
;
19697 /* Find the first register. */
19698 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19703 /* If there's an odd number of registers to push. Start off by
19704 pushing a single register. This ensures that subsequent strd
19705 operations are dword aligned (assuming that SP was originally
19706 64-bit aligned). */
19707 if ((num_regs
& 1) != 0)
19709 rtx reg
, mem
, insn
;
19711 reg
= gen_rtx_REG (SImode
, regno
);
19713 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19714 stack_pointer_rtx
));
19716 mem
= gen_frame_mem (Pmode
,
19718 (Pmode
, stack_pointer_rtx
,
19719 plus_constant (Pmode
, stack_pointer_rtx
,
19722 tmp
= gen_rtx_SET (mem
, reg
);
19723 RTX_FRAME_RELATED_P (tmp
) = 1;
19724 insn
= emit_insn (tmp
);
19725 RTX_FRAME_RELATED_P (insn
) = 1;
19726 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19727 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19728 RTX_FRAME_RELATED_P (tmp
) = 1;
19731 XVECEXP (dwarf
, 0, i
) = tmp
;
19735 while (i
< num_regs
)
19736 if (saved_regs_mask
& (1 << regno
))
19738 rtx reg1
, reg2
, mem1
, mem2
;
19739 rtx tmp0
, tmp1
, tmp2
;
19742 /* Find the register to pair with this one. */
19743 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19747 reg1
= gen_rtx_REG (SImode
, regno
);
19748 reg2
= gen_rtx_REG (SImode
, regno2
);
19755 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19758 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19760 -4 * (num_regs
- 1)));
19761 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19762 plus_constant (Pmode
, stack_pointer_rtx
,
19764 tmp1
= gen_rtx_SET (mem1
, reg1
);
19765 tmp2
= gen_rtx_SET (mem2
, reg2
);
19766 RTX_FRAME_RELATED_P (tmp0
) = 1;
19767 RTX_FRAME_RELATED_P (tmp1
) = 1;
19768 RTX_FRAME_RELATED_P (tmp2
) = 1;
19769 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19770 XVECEXP (par
, 0, 0) = tmp0
;
19771 XVECEXP (par
, 0, 1) = tmp1
;
19772 XVECEXP (par
, 0, 2) = tmp2
;
19773 insn
= emit_insn (par
);
19774 RTX_FRAME_RELATED_P (insn
) = 1;
19775 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19779 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19782 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19785 tmp1
= gen_rtx_SET (mem1
, reg1
);
19786 tmp2
= gen_rtx_SET (mem2
, reg2
);
19787 RTX_FRAME_RELATED_P (tmp1
) = 1;
19788 RTX_FRAME_RELATED_P (tmp2
) = 1;
19789 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19790 XVECEXP (par
, 0, 0) = tmp1
;
19791 XVECEXP (par
, 0, 1) = tmp2
;
19795 /* Create unwind information. This is an approximation. */
19796 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19797 plus_constant (Pmode
,
19801 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19802 plus_constant (Pmode
,
19807 RTX_FRAME_RELATED_P (tmp1
) = 1;
19808 RTX_FRAME_RELATED_P (tmp2
) = 1;
19809 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19810 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19812 regno
= regno2
+ 1;
19820 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19821 whenever possible, otherwise it emits single-word stores. The first store
19822 also allocates stack space for all saved registers, using writeback with
19823 post-addressing mode. All other stores use offset addressing. If no STRD
19824 can be emitted, this function emits a sequence of single-word stores,
19825 and not an STM as before, because single-word stores provide more freedom
19826 scheduling and can be turned into an STM by peephole optimizations. */
19828 arm_emit_strd_push (unsigned long saved_regs_mask
)
19831 int i
, j
, dwarf_index
= 0;
19833 rtx dwarf
= NULL_RTX
;
19834 rtx insn
= NULL_RTX
;
19837 /* TODO: A more efficient code can be emitted by changing the
19838 layout, e.g., first push all pairs that can use STRD to keep the
19839 stack aligned, and then push all other registers. */
19840 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19841 if (saved_regs_mask
& (1 << i
))
19844 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19845 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19846 gcc_assert (num_regs
> 0);
19848 /* Create sequence for DWARF info. */
19849 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19851 /* For dwarf info, we generate explicit stack update. */
19852 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19853 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19854 RTX_FRAME_RELATED_P (tmp
) = 1;
19855 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19857 /* Save registers. */
19858 offset
= - 4 * num_regs
;
19860 while (j
<= LAST_ARM_REGNUM
)
19861 if (saved_regs_mask
& (1 << j
))
19864 && (saved_regs_mask
& (1 << (j
+ 1))))
19866 /* Current register and previous register form register pair for
19867 which STRD can be generated. */
19870 /* Allocate stack space for all saved registers. */
19871 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19872 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19873 mem
= gen_frame_mem (DImode
, tmp
);
19876 else if (offset
> 0)
19877 mem
= gen_frame_mem (DImode
,
19878 plus_constant (Pmode
,
19882 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19884 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19885 RTX_FRAME_RELATED_P (tmp
) = 1;
19886 tmp
= emit_insn (tmp
);
19888 /* Record the first store insn. */
19889 if (dwarf_index
== 1)
19892 /* Generate dwarf info. */
19893 mem
= gen_frame_mem (SImode
,
19894 plus_constant (Pmode
,
19897 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19898 RTX_FRAME_RELATED_P (tmp
) = 1;
19899 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19901 mem
= gen_frame_mem (SImode
,
19902 plus_constant (Pmode
,
19905 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19906 RTX_FRAME_RELATED_P (tmp
) = 1;
19907 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19914 /* Emit a single word store. */
19917 /* Allocate stack space for all saved registers. */
19918 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19919 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19920 mem
= gen_frame_mem (SImode
, tmp
);
19923 else if (offset
> 0)
19924 mem
= gen_frame_mem (SImode
,
19925 plus_constant (Pmode
,
19929 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19931 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19932 RTX_FRAME_RELATED_P (tmp
) = 1;
19933 tmp
= emit_insn (tmp
);
19935 /* Record the first store insn. */
19936 if (dwarf_index
== 1)
19939 /* Generate dwarf info. */
19940 mem
= gen_frame_mem (SImode
,
19941 plus_constant(Pmode
,
19944 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19945 RTX_FRAME_RELATED_P (tmp
) = 1;
19946 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19955 /* Attach dwarf info to the first insn we generate. */
19956 gcc_assert (insn
!= NULL_RTX
);
19957 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19958 RTX_FRAME_RELATED_P (insn
) = 1;
19961 /* Generate and emit an insn that we will recognize as a push_multi.
19962 Unfortunately, since this insn does not reflect very well the actual
19963 semantics of the operation, we need to annotate the insn for the benefit
19964 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19965 MASK for registers that should be annotated for DWARF2 frame unwind
19968 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19971 int num_dwarf_regs
= 0;
19975 int dwarf_par_index
;
19978 /* We don't record the PC in the dwarf frame information. */
19979 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19981 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19983 if (mask
& (1 << i
))
19985 if (dwarf_regs_mask
& (1 << i
))
19989 gcc_assert (num_regs
&& num_regs
<= 16);
19990 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19992 /* For the body of the insn we are going to generate an UNSPEC in
19993 parallel with several USEs. This allows the insn to be recognized
19994 by the push_multi pattern in the arm.md file.
19996 The body of the insn looks something like this:
19999 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20000 (const_int:SI <num>)))
20001 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20007 For the frame note however, we try to be more explicit and actually
20008 show each register being stored into the stack frame, plus a (single)
20009 decrement of the stack pointer. We do it this way in order to be
20010 friendly to the stack unwinding code, which only wants to see a single
20011 stack decrement per instruction. The RTL we generate for the note looks
20012 something like this:
20015 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20016 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20017 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20018 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20022 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20023 instead we'd have a parallel expression detailing all
20024 the stores to the various memory addresses so that debug
20025 information is more up-to-date. Remember however while writing
20026 this to take care of the constraints with the push instruction.
20028 Note also that this has to be taken care of for the VFP registers.
20030 For more see PR43399. */
20032 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20033 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20034 dwarf_par_index
= 1;
20036 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20038 if (mask
& (1 << i
))
20040 reg
= gen_rtx_REG (SImode
, i
);
20042 XVECEXP (par
, 0, 0)
20043 = gen_rtx_SET (gen_frame_mem
20045 gen_rtx_PRE_MODIFY (Pmode
,
20048 (Pmode
, stack_pointer_rtx
,
20051 gen_rtx_UNSPEC (BLKmode
,
20052 gen_rtvec (1, reg
),
20053 UNSPEC_PUSH_MULT
));
20055 if (dwarf_regs_mask
& (1 << i
))
20057 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20059 RTX_FRAME_RELATED_P (tmp
) = 1;
20060 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20067 for (j
= 1, i
++; j
< num_regs
; i
++)
20069 if (mask
& (1 << i
))
20071 reg
= gen_rtx_REG (SImode
, i
);
20073 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20075 if (dwarf_regs_mask
& (1 << i
))
20078 = gen_rtx_SET (gen_frame_mem
20080 plus_constant (Pmode
, stack_pointer_rtx
,
20083 RTX_FRAME_RELATED_P (tmp
) = 1;
20084 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20091 par
= emit_insn (par
);
20093 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20094 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20095 RTX_FRAME_RELATED_P (tmp
) = 1;
20096 XVECEXP (dwarf
, 0, 0) = tmp
;
20098 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20103 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20104 SIZE is the offset to be adjusted.
20105 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20107 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20111 RTX_FRAME_RELATED_P (insn
) = 1;
20112 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20113 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20116 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20117 SAVED_REGS_MASK shows which registers need to be restored.
20119 Unfortunately, since this insn does not reflect very well the actual
20120 semantics of the operation, we need to annotate the insn for the benefit
20121 of DWARF2 frame unwind information. */
20123 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20128 rtx dwarf
= NULL_RTX
;
20130 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20134 offset_adj
= return_in_pc
? 1 : 0;
20135 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20136 if (saved_regs_mask
& (1 << i
))
20139 gcc_assert (num_regs
&& num_regs
<= 16);
20141 /* If SP is in reglist, then we don't emit SP update insn. */
20142 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20144 /* The parallel needs to hold num_regs SETs
20145 and one SET for the stack update. */
20146 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20149 XVECEXP (par
, 0, 0) = ret_rtx
;
20153 /* Increment the stack pointer, based on there being
20154 num_regs 4-byte registers to restore. */
20155 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20156 plus_constant (Pmode
,
20159 RTX_FRAME_RELATED_P (tmp
) = 1;
20160 XVECEXP (par
, 0, offset_adj
) = tmp
;
20163 /* Now restore every reg, which may include PC. */
20164 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20165 if (saved_regs_mask
& (1 << i
))
20167 reg
= gen_rtx_REG (SImode
, i
);
20168 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20170 /* Emit single load with writeback. */
20171 tmp
= gen_frame_mem (SImode
,
20172 gen_rtx_POST_INC (Pmode
,
20173 stack_pointer_rtx
));
20174 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20175 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20179 tmp
= gen_rtx_SET (reg
,
20182 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20183 RTX_FRAME_RELATED_P (tmp
) = 1;
20184 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20186 /* We need to maintain a sequence for DWARF info too. As dwarf info
20187 should not have PC, skip PC. */
20188 if (i
!= PC_REGNUM
)
20189 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20195 par
= emit_jump_insn (par
);
20197 par
= emit_insn (par
);
20199 REG_NOTES (par
) = dwarf
;
20201 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20202 stack_pointer_rtx
, stack_pointer_rtx
);
20205 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20206 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20208 Unfortunately, since this insn does not reflect very well the actual
20209 semantics of the operation, we need to annotate the insn for the benefit
20210 of DWARF2 frame unwind information. */
20212 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20216 rtx dwarf
= NULL_RTX
;
20219 gcc_assert (num_regs
&& num_regs
<= 32);
20221 /* Workaround ARM10 VFPr1 bug. */
20222 if (num_regs
== 2 && !arm_arch6
)
20224 if (first_reg
== 15)
20230 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20231 there could be up to 32 D-registers to restore.
20232 If there are more than 16 D-registers, make two recursive calls,
20233 each of which emits one pop_multi instruction. */
20236 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20237 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20241 /* The parallel needs to hold num_regs SETs
20242 and one SET for the stack update. */
20243 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20245 /* Increment the stack pointer, based on there being
20246 num_regs 8-byte registers to restore. */
20247 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20248 RTX_FRAME_RELATED_P (tmp
) = 1;
20249 XVECEXP (par
, 0, 0) = tmp
;
20251 /* Now show every reg that will be restored, using a SET for each. */
20252 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20254 reg
= gen_rtx_REG (DFmode
, i
);
20256 tmp
= gen_rtx_SET (reg
,
20259 plus_constant (Pmode
, base_reg
, 8 * j
)));
20260 RTX_FRAME_RELATED_P (tmp
) = 1;
20261 XVECEXP (par
, 0, j
+ 1) = tmp
;
20263 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20268 par
= emit_insn (par
);
20269 REG_NOTES (par
) = dwarf
;
20271 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20272 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20274 RTX_FRAME_RELATED_P (par
) = 1;
20275 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20278 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20279 base_reg
, base_reg
);
20282 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20283 number of registers are being popped, multiple LDRD patterns are created for
20284 all register pairs. If odd number of registers are popped, last register is
20285 loaded by using LDR pattern. */
20287 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20291 rtx par
= NULL_RTX
;
20292 rtx dwarf
= NULL_RTX
;
20293 rtx tmp
, reg
, tmp1
;
20294 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20296 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20297 if (saved_regs_mask
& (1 << i
))
20300 gcc_assert (num_regs
&& num_regs
<= 16);
20302 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20303 to be popped. So, if num_regs is even, now it will become odd,
20304 and we can generate pop with PC. If num_regs is odd, it will be
20305 even now, and ldr with return can be generated for PC. */
20309 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20311 /* Var j iterates over all the registers to gather all the registers in
20312 saved_regs_mask. Var i gives index of saved registers in stack frame.
20313 A PARALLEL RTX of register-pair is created here, so that pattern for
20314 LDRD can be matched. As PC is always last register to be popped, and
20315 we have already decremented num_regs if PC, we don't have to worry
20316 about PC in this loop. */
20317 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20318 if (saved_regs_mask
& (1 << j
))
20320 /* Create RTX for memory load. */
20321 reg
= gen_rtx_REG (SImode
, j
);
20322 tmp
= gen_rtx_SET (reg
,
20323 gen_frame_mem (SImode
,
20324 plus_constant (Pmode
,
20325 stack_pointer_rtx
, 4 * i
)));
20326 RTX_FRAME_RELATED_P (tmp
) = 1;
20330 /* When saved-register index (i) is even, the RTX to be emitted is
20331 yet to be created. Hence create it first. The LDRD pattern we
20332 are generating is :
20333 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20334 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20335 where target registers need not be consecutive. */
20336 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20340 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20341 added as 0th element and if i is odd, reg_i is added as 1st element
20342 of LDRD pattern shown above. */
20343 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20344 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20348 /* When saved-register index (i) is odd, RTXs for both the registers
20349 to be loaded are generated in above given LDRD pattern, and the
20350 pattern can be emitted now. */
20351 par
= emit_insn (par
);
20352 REG_NOTES (par
) = dwarf
;
20353 RTX_FRAME_RELATED_P (par
) = 1;
20359 /* If the number of registers pushed is odd AND return_in_pc is false OR
20360 number of registers are even AND return_in_pc is true, last register is
20361 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20362 then LDR with post increment. */
20364 /* Increment the stack pointer, based on there being
20365 num_regs 4-byte registers to restore. */
20366 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20367 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20368 RTX_FRAME_RELATED_P (tmp
) = 1;
20369 tmp
= emit_insn (tmp
);
20372 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20373 stack_pointer_rtx
, stack_pointer_rtx
);
20378 if (((num_regs
% 2) == 1 && !return_in_pc
)
20379 || ((num_regs
% 2) == 0 && return_in_pc
))
20381 /* Scan for the single register to be popped. Skip until the saved
20382 register is found. */
20383 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20385 /* Gen LDR with post increment here. */
20386 tmp1
= gen_rtx_MEM (SImode
,
20387 gen_rtx_POST_INC (SImode
,
20388 stack_pointer_rtx
));
20389 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20391 reg
= gen_rtx_REG (SImode
, j
);
20392 tmp
= gen_rtx_SET (reg
, tmp1
);
20393 RTX_FRAME_RELATED_P (tmp
) = 1;
20394 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20398 /* If return_in_pc, j must be PC_REGNUM. */
20399 gcc_assert (j
== PC_REGNUM
);
20400 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20401 XVECEXP (par
, 0, 0) = ret_rtx
;
20402 XVECEXP (par
, 0, 1) = tmp
;
20403 par
= emit_jump_insn (par
);
20407 par
= emit_insn (tmp
);
20408 REG_NOTES (par
) = dwarf
;
20409 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20410 stack_pointer_rtx
, stack_pointer_rtx
);
20414 else if ((num_regs
% 2) == 1 && return_in_pc
)
20416 /* There are 2 registers to be popped. So, generate the pattern
20417 pop_multiple_with_stack_update_and_return to pop in PC. */
20418 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20424 /* LDRD in ARM mode needs consecutive registers as operands. This function
20425 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20426 offset addressing and then generates one separate stack udpate. This provides
20427 more scheduling freedom, compared to writeback on every load. However,
20428 if the function returns using load into PC directly
20429 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20430 before the last load. TODO: Add a peephole optimization to recognize
20431 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20432 peephole optimization to merge the load at stack-offset zero
20433 with the stack update instruction using load with writeback
20434 in post-index addressing mode. */
20436 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20440 rtx par
= NULL_RTX
;
20441 rtx dwarf
= NULL_RTX
;
20444 /* Restore saved registers. */
20445 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20447 while (j
<= LAST_ARM_REGNUM
)
20448 if (saved_regs_mask
& (1 << j
))
20451 && (saved_regs_mask
& (1 << (j
+ 1)))
20452 && (j
+ 1) != PC_REGNUM
)
20454 /* Current register and next register form register pair for which
20455 LDRD can be generated. PC is always the last register popped, and
20456 we handle it separately. */
20458 mem
= gen_frame_mem (DImode
,
20459 plus_constant (Pmode
,
20463 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20465 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20466 tmp
= emit_insn (tmp
);
20467 RTX_FRAME_RELATED_P (tmp
) = 1;
20469 /* Generate dwarf info. */
20471 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20472 gen_rtx_REG (SImode
, j
),
20474 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20475 gen_rtx_REG (SImode
, j
+ 1),
20478 REG_NOTES (tmp
) = dwarf
;
20483 else if (j
!= PC_REGNUM
)
20485 /* Emit a single word load. */
20487 mem
= gen_frame_mem (SImode
,
20488 plus_constant (Pmode
,
20492 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20494 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20495 tmp
= emit_insn (tmp
);
20496 RTX_FRAME_RELATED_P (tmp
) = 1;
20498 /* Generate dwarf info. */
20499 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20500 gen_rtx_REG (SImode
, j
),
20506 else /* j == PC_REGNUM */
20512 /* Update the stack. */
20515 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20516 plus_constant (Pmode
,
20519 tmp
= emit_insn (tmp
);
20520 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20521 stack_pointer_rtx
, stack_pointer_rtx
);
20525 if (saved_regs_mask
& (1 << PC_REGNUM
))
20527 /* Only PC is to be popped. */
20528 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20529 XVECEXP (par
, 0, 0) = ret_rtx
;
20530 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20531 gen_frame_mem (SImode
,
20532 gen_rtx_POST_INC (SImode
,
20533 stack_pointer_rtx
)));
20534 RTX_FRAME_RELATED_P (tmp
) = 1;
20535 XVECEXP (par
, 0, 1) = tmp
;
20536 par
= emit_jump_insn (par
);
20538 /* Generate dwarf info. */
20539 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20540 gen_rtx_REG (SImode
, PC_REGNUM
),
20542 REG_NOTES (par
) = dwarf
;
20543 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20544 stack_pointer_rtx
, stack_pointer_rtx
);
20548 /* Calculate the size of the return value that is passed in registers. */
20550 arm_size_return_regs (void)
20554 if (crtl
->return_rtx
!= 0)
20555 mode
= GET_MODE (crtl
->return_rtx
);
20557 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20559 return GET_MODE_SIZE (mode
);
20562 /* Return true if the current function needs to save/restore LR. */
20564 thumb_force_lr_save (void)
20566 return !cfun
->machine
->lr_save_eliminated
20567 && (!leaf_function_p ()
20568 || thumb_far_jump_used_p ()
20569 || df_regs_ever_live_p (LR_REGNUM
));
20572 /* We do not know if r3 will be available because
20573 we do have an indirect tailcall happening in this
20574 particular case. */
20576 is_indirect_tailcall_p (rtx call
)
20578 rtx pat
= PATTERN (call
);
20580 /* Indirect tail call. */
20581 pat
= XVECEXP (pat
, 0, 0);
20582 if (GET_CODE (pat
) == SET
)
20583 pat
= SET_SRC (pat
);
20585 pat
= XEXP (XEXP (pat
, 0), 0);
20586 return REG_P (pat
);
20589 /* Return true if r3 is used by any of the tail call insns in the
20590 current function. */
20592 any_sibcall_could_use_r3 (void)
20597 if (!crtl
->tail_call_emit
)
20599 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20600 if (e
->flags
& EDGE_SIBCALL
)
20602 rtx call
= BB_END (e
->src
);
20603 if (!CALL_P (call
))
20604 call
= prev_nonnote_nondebug_insn (call
);
20605 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20606 if (find_regno_fusage (call
, USE
, 3)
20607 || is_indirect_tailcall_p (call
))
20614 /* Compute the distance from register FROM to register TO.
20615 These can be the arg pointer (26), the soft frame pointer (25),
20616 the stack pointer (13) or the hard frame pointer (11).
20617 In thumb mode r7 is used as the soft frame pointer, if needed.
20618 Typical stack layout looks like this:
20620 old stack pointer -> | |
20623 | | saved arguments for
20624 | | vararg functions
20627 hard FP & arg pointer -> | | \
20635 soft frame pointer -> | | /
20640 locals base pointer -> | | /
20645 current stack pointer -> | | /
20648 For a given function some or all of these stack components
20649 may not be needed, giving rise to the possibility of
20650 eliminating some of the registers.
20652 The values returned by this function must reflect the behavior
20653 of arm_expand_prologue() and arm_compute_save_reg_mask().
20655 The sign of the number returned reflects the direction of stack
20656 growth, so the values are positive for all eliminations except
20657 from the soft frame pointer to the hard frame pointer.
20659 SFP may point just inside the local variables block to ensure correct
20663 /* Calculate stack offsets. These are used to calculate register elimination
20664 offsets and in prologue/epilogue code. Also calculates which registers
20665 should be saved. */
20667 static arm_stack_offsets
*
20668 arm_get_frame_offsets (void)
20670 struct arm_stack_offsets
*offsets
;
20671 unsigned long func_type
;
20675 HOST_WIDE_INT frame_size
;
20678 offsets
= &cfun
->machine
->stack_offsets
;
20680 /* We need to know if we are a leaf function. Unfortunately, it
20681 is possible to be called after start_sequence has been called,
20682 which causes get_insns to return the insns for the sequence,
20683 not the function, which will cause leaf_function_p to return
20684 the incorrect result.
20686 to know about leaf functions once reload has completed, and the
20687 frame size cannot be changed after that time, so we can safely
20688 use the cached value. */
20690 if (reload_completed
)
20693 /* Initially this is the size of the local variables. It will translated
20694 into an offset once we have determined the size of preceding data. */
20695 frame_size
= ROUND_UP_WORD (get_frame_size ());
20697 leaf
= leaf_function_p ();
20699 /* Space for variadic functions. */
20700 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20702 /* In Thumb mode this is incorrect, but never used. */
20704 = (offsets
->saved_args
20705 + arm_compute_static_chain_stack_bytes ()
20706 + (frame_pointer_needed
? 4 : 0));
20710 unsigned int regno
;
20712 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20713 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20714 saved
= core_saved
;
20716 /* We know that SP will be doubleword aligned on entry, and we must
20717 preserve that condition at any subroutine call. We also require the
20718 soft frame pointer to be doubleword aligned. */
20720 if (TARGET_REALLY_IWMMXT
)
20722 /* Check for the call-saved iWMMXt registers. */
20723 for (regno
= FIRST_IWMMXT_REGNUM
;
20724 regno
<= LAST_IWMMXT_REGNUM
;
20726 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20730 func_type
= arm_current_func_type ();
20731 /* Space for saved VFP registers. */
20732 if (! IS_VOLATILE (func_type
)
20733 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20734 saved
+= arm_get_vfp_saved_size ();
20736 else /* TARGET_THUMB1 */
20738 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20739 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20740 saved
= core_saved
;
20741 if (TARGET_BACKTRACE
)
20745 /* Saved registers include the stack frame. */
20746 offsets
->saved_regs
20747 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20748 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20750 /* A leaf function does not need any stack alignment if it has nothing
20752 if (leaf
&& frame_size
== 0
20753 /* However if it calls alloca(), we have a dynamically allocated
20754 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20755 && ! cfun
->calls_alloca
)
20757 offsets
->outgoing_args
= offsets
->soft_frame
;
20758 offsets
->locals_base
= offsets
->soft_frame
;
20762 /* Ensure SFP has the correct alignment. */
20763 if (ARM_DOUBLEWORD_ALIGN
20764 && (offsets
->soft_frame
& 7))
20766 offsets
->soft_frame
+= 4;
20767 /* Try to align stack by pushing an extra reg. Don't bother doing this
20768 when there is a stack frame as the alignment will be rolled into
20769 the normal stack adjustment. */
20770 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20774 /* Register r3 is caller-saved. Normally it does not need to be
20775 saved on entry by the prologue. However if we choose to save
20776 it for padding then we may confuse the compiler into thinking
20777 a prologue sequence is required when in fact it is not. This
20778 will occur when shrink-wrapping if r3 is used as a scratch
20779 register and there are no other callee-saved writes.
20781 This situation can be avoided when other callee-saved registers
20782 are available and r3 is not mandatory if we choose a callee-saved
20783 register for padding. */
20784 bool prefer_callee_reg_p
= false;
20786 /* If it is safe to use r3, then do so. This sometimes
20787 generates better code on Thumb-2 by avoiding the need to
20788 use 32-bit push/pop instructions. */
20789 if (! any_sibcall_could_use_r3 ()
20790 && arm_size_return_regs () <= 12
20791 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20793 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20796 if (!TARGET_THUMB2
)
20797 prefer_callee_reg_p
= true;
20800 || prefer_callee_reg_p
)
20802 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20804 /* Avoid fixed registers; they may be changed at
20805 arbitrary times so it's unsafe to restore them
20806 during the epilogue. */
20808 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20818 offsets
->saved_regs
+= 4;
20819 offsets
->saved_regs_mask
|= (1 << reg
);
20824 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20825 offsets
->outgoing_args
= (offsets
->locals_base
20826 + crtl
->outgoing_args_size
);
20828 if (ARM_DOUBLEWORD_ALIGN
)
20830 /* Ensure SP remains doubleword aligned. */
20831 if (offsets
->outgoing_args
& 7)
20832 offsets
->outgoing_args
+= 4;
20833 gcc_assert (!(offsets
->outgoing_args
& 7));
20840 /* Calculate the relative offsets for the different stack pointers. Positive
20841 offsets are in the direction of stack growth. */
20844 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20846 arm_stack_offsets
*offsets
;
20848 offsets
= arm_get_frame_offsets ();
20850 /* OK, now we have enough information to compute the distances.
20851 There must be an entry in these switch tables for each pair
20852 of registers in ELIMINABLE_REGS, even if some of the entries
20853 seem to be redundant or useless. */
20856 case ARG_POINTER_REGNUM
:
20859 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20862 case FRAME_POINTER_REGNUM
:
20863 /* This is the reverse of the soft frame pointer
20864 to hard frame pointer elimination below. */
20865 return offsets
->soft_frame
- offsets
->saved_args
;
20867 case ARM_HARD_FRAME_POINTER_REGNUM
:
20868 /* This is only non-zero in the case where the static chain register
20869 is stored above the frame. */
20870 return offsets
->frame
- offsets
->saved_args
- 4;
20872 case STACK_POINTER_REGNUM
:
20873 /* If nothing has been pushed on the stack at all
20874 then this will return -4. This *is* correct! */
20875 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20878 gcc_unreachable ();
20880 gcc_unreachable ();
20882 case FRAME_POINTER_REGNUM
:
20885 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20888 case ARM_HARD_FRAME_POINTER_REGNUM
:
20889 /* The hard frame pointer points to the top entry in the
20890 stack frame. The soft frame pointer to the bottom entry
20891 in the stack frame. If there is no stack frame at all,
20892 then they are identical. */
20894 return offsets
->frame
- offsets
->soft_frame
;
20896 case STACK_POINTER_REGNUM
:
20897 return offsets
->outgoing_args
- offsets
->soft_frame
;
20900 gcc_unreachable ();
20902 gcc_unreachable ();
20905 /* You cannot eliminate from the stack pointer.
20906 In theory you could eliminate from the hard frame
20907 pointer to the stack pointer, but this will never
20908 happen, since if a stack frame is not needed the
20909 hard frame pointer will never be used. */
20910 gcc_unreachable ();
20914 /* Given FROM and TO register numbers, say whether this elimination is
20915 allowed. Frame pointer elimination is automatically handled.
20917 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20918 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20919 pointer, we must eliminate FRAME_POINTER_REGNUM into
20920 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20921 ARG_POINTER_REGNUM. */
20924 arm_can_eliminate (const int from
, const int to
)
20926 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20927 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20928 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20929 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20933 /* Emit RTL to save coprocessor registers on function entry. Returns the
20934 number of bytes pushed. */
20937 arm_save_coproc_regs(void)
20939 int saved_size
= 0;
20941 unsigned start_reg
;
20944 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20945 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20947 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20948 insn
= gen_rtx_MEM (V2SImode
, insn
);
20949 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20950 RTX_FRAME_RELATED_P (insn
) = 1;
20954 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20956 start_reg
= FIRST_VFP_REGNUM
;
20958 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20960 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20961 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20963 if (start_reg
!= reg
)
20964 saved_size
+= vfp_emit_fstmd (start_reg
,
20965 (reg
- start_reg
) / 2);
20966 start_reg
= reg
+ 2;
20969 if (start_reg
!= reg
)
20970 saved_size
+= vfp_emit_fstmd (start_reg
,
20971 (reg
- start_reg
) / 2);
20977 /* Set the Thumb frame pointer from the stack pointer. */
20980 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20982 HOST_WIDE_INT amount
;
20985 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20987 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20988 stack_pointer_rtx
, GEN_INT (amount
)));
20991 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20992 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20993 expects the first two operands to be the same. */
20996 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20998 hard_frame_pointer_rtx
));
21002 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21003 hard_frame_pointer_rtx
,
21004 stack_pointer_rtx
));
21006 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21007 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21008 RTX_FRAME_RELATED_P (dwarf
) = 1;
21009 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21012 RTX_FRAME_RELATED_P (insn
) = 1;
21015 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21018 arm_expand_prologue (void)
21023 unsigned long live_regs_mask
;
21024 unsigned long func_type
;
21026 int saved_pretend_args
= 0;
21027 int saved_regs
= 0;
21028 unsigned HOST_WIDE_INT args_to_push
;
21029 arm_stack_offsets
*offsets
;
21031 func_type
= arm_current_func_type ();
21033 /* Naked functions don't have prologues. */
21034 if (IS_NAKED (func_type
))
21037 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21038 args_to_push
= crtl
->args
.pretend_args_size
;
21040 /* Compute which register we will have to save onto the stack. */
21041 offsets
= arm_get_frame_offsets ();
21042 live_regs_mask
= offsets
->saved_regs_mask
;
21044 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21046 if (IS_STACKALIGN (func_type
))
21050 /* Handle a word-aligned stack pointer. We generate the following:
21055 <save and restore r0 in normal prologue/epilogue>
21059 The unwinder doesn't need to know about the stack realignment.
21060 Just tell it we saved SP in r0. */
21061 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21063 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21064 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21066 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21067 RTX_FRAME_RELATED_P (insn
) = 1;
21068 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21070 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21072 /* ??? The CFA changes here, which may cause GDB to conclude that it
21073 has entered a different function. That said, the unwind info is
21074 correct, individually, before and after this instruction because
21075 we've described the save of SP, which will override the default
21076 handling of SP as restoring from the CFA. */
21077 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21080 /* For APCS frames, if IP register is clobbered
21081 when creating frame, save that register in a special
21083 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21085 if (IS_INTERRUPT (func_type
))
21087 /* Interrupt functions must not corrupt any registers.
21088 Creating a frame pointer however, corrupts the IP
21089 register, so we must push it first. */
21090 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21092 /* Do not set RTX_FRAME_RELATED_P on this insn.
21093 The dwarf stack unwinding code only wants to see one
21094 stack decrement per function, and this is not it. If
21095 this instruction is labeled as being part of the frame
21096 creation sequence then dwarf2out_frame_debug_expr will
21097 die when it encounters the assignment of IP to FP
21098 later on, since the use of SP here establishes SP as
21099 the CFA register and not IP.
21101 Anyway this instruction is not really part of the stack
21102 frame creation although it is part of the prologue. */
21104 else if (IS_NESTED (func_type
))
21106 /* The static chain register is the same as the IP register
21107 used as a scratch register during stack frame creation.
21108 To get around this need to find somewhere to store IP
21109 whilst the frame is being created. We try the following
21112 1. The last argument register r3 if it is available.
21113 2. A slot on the stack above the frame if there are no
21114 arguments to push onto the stack.
21115 3. Register r3 again, after pushing the argument registers
21116 onto the stack, if this is a varargs function.
21117 4. The last slot on the stack created for the arguments to
21118 push, if this isn't a varargs function.
21120 Note - we only need to tell the dwarf2 backend about the SP
21121 adjustment in the second variant; the static chain register
21122 doesn't need to be unwound, as it doesn't contain a value
21123 inherited from the caller. */
21125 if (!arm_r3_live_at_start_p ())
21126 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21127 else if (args_to_push
== 0)
21131 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21134 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21135 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21138 /* Just tell the dwarf backend that we adjusted SP. */
21139 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21140 plus_constant (Pmode
, stack_pointer_rtx
,
21142 RTX_FRAME_RELATED_P (insn
) = 1;
21143 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21147 /* Store the args on the stack. */
21148 if (cfun
->machine
->uses_anonymous_args
)
21151 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21152 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21153 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21154 saved_pretend_args
= 1;
21160 if (args_to_push
== 4)
21161 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21164 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21165 plus_constant (Pmode
,
21169 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21171 /* Just tell the dwarf backend that we adjusted SP. */
21173 = gen_rtx_SET (stack_pointer_rtx
,
21174 plus_constant (Pmode
, stack_pointer_rtx
,
21176 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21179 RTX_FRAME_RELATED_P (insn
) = 1;
21180 fp_offset
= args_to_push
;
21185 insn
= emit_set_insn (ip_rtx
,
21186 plus_constant (Pmode
, stack_pointer_rtx
,
21188 RTX_FRAME_RELATED_P (insn
) = 1;
21193 /* Push the argument registers, or reserve space for them. */
21194 if (cfun
->machine
->uses_anonymous_args
)
21195 insn
= emit_multi_reg_push
21196 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21197 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21200 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21201 GEN_INT (- args_to_push
)));
21202 RTX_FRAME_RELATED_P (insn
) = 1;
21205 /* If this is an interrupt service routine, and the link register
21206 is going to be pushed, and we're not generating extra
21207 push of IP (needed when frame is needed and frame layout if apcs),
21208 subtracting four from LR now will mean that the function return
21209 can be done with a single instruction. */
21210 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21211 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21212 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21215 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21217 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21220 if (live_regs_mask
)
21222 unsigned long dwarf_regs_mask
= live_regs_mask
;
21224 saved_regs
+= bit_count (live_regs_mask
) * 4;
21225 if (optimize_size
&& !frame_pointer_needed
21226 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21228 /* If no coprocessor registers are being pushed and we don't have
21229 to worry about a frame pointer then push extra registers to
21230 create the stack frame. This is done is a way that does not
21231 alter the frame layout, so is independent of the epilogue. */
21235 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21237 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21238 if (frame
&& n
* 4 >= frame
)
21241 live_regs_mask
|= (1 << n
) - 1;
21242 saved_regs
+= frame
;
21247 && current_tune
->prefer_ldrd_strd
21248 && !optimize_function_for_size_p (cfun
))
21250 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21252 thumb2_emit_strd_push (live_regs_mask
);
21253 else if (TARGET_ARM
21254 && !TARGET_APCS_FRAME
21255 && !IS_INTERRUPT (func_type
))
21256 arm_emit_strd_push (live_regs_mask
);
21259 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21260 RTX_FRAME_RELATED_P (insn
) = 1;
21265 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21266 RTX_FRAME_RELATED_P (insn
) = 1;
21270 if (! IS_VOLATILE (func_type
))
21271 saved_regs
+= arm_save_coproc_regs ();
21273 if (frame_pointer_needed
&& TARGET_ARM
)
21275 /* Create the new frame pointer. */
21276 if (TARGET_APCS_FRAME
)
21278 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21279 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21280 RTX_FRAME_RELATED_P (insn
) = 1;
21282 if (IS_NESTED (func_type
))
21284 /* Recover the static chain register. */
21285 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21286 insn
= gen_rtx_REG (SImode
, 3);
21289 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21290 insn
= gen_frame_mem (SImode
, insn
);
21292 emit_set_insn (ip_rtx
, insn
);
21293 /* Add a USE to stop propagate_one_insn() from barfing. */
21294 emit_insn (gen_force_register_use (ip_rtx
));
21299 insn
= GEN_INT (saved_regs
- 4);
21300 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21301 stack_pointer_rtx
, insn
));
21302 RTX_FRAME_RELATED_P (insn
) = 1;
21306 if (flag_stack_usage_info
)
21307 current_function_static_stack_size
21308 = offsets
->outgoing_args
- offsets
->saved_args
;
21310 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21312 /* This add can produce multiple insns for a large constant, so we
21313 need to get tricky. */
21314 rtx_insn
*last
= get_last_insn ();
21316 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21317 - offsets
->outgoing_args
);
21319 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21323 last
= last
? NEXT_INSN (last
) : get_insns ();
21324 RTX_FRAME_RELATED_P (last
) = 1;
21326 while (last
!= insn
);
21328 /* If the frame pointer is needed, emit a special barrier that
21329 will prevent the scheduler from moving stores to the frame
21330 before the stack adjustment. */
21331 if (frame_pointer_needed
)
21332 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21333 hard_frame_pointer_rtx
));
21337 if (frame_pointer_needed
&& TARGET_THUMB2
)
21338 thumb_set_frame_pointer (offsets
);
21340 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21342 unsigned long mask
;
21344 mask
= live_regs_mask
;
21345 mask
&= THUMB2_WORK_REGS
;
21346 if (!IS_NESTED (func_type
))
21347 mask
|= (1 << IP_REGNUM
);
21348 arm_load_pic_register (mask
);
21351 /* If we are profiling, make sure no instructions are scheduled before
21352 the call to mcount. Similarly if the user has requested no
21353 scheduling in the prolog. Similarly if we want non-call exceptions
21354 using the EABI unwinder, to prevent faulting instructions from being
21355 swapped with a stack adjustment. */
21356 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21357 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21358 && cfun
->can_throw_non_call_exceptions
))
21359 emit_insn (gen_blockage ());
21361 /* If the link register is being kept alive, with the return address in it,
21362 then make sure that it does not get reused by the ce2 pass. */
21363 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21364 cfun
->machine
->lr_save_eliminated
= 1;
21367 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21369 arm_print_condition (FILE *stream
)
21371 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21373 /* Branch conversion is not implemented for Thumb-2. */
21376 output_operand_lossage ("predicated Thumb instruction");
21379 if (current_insn_predicate
!= NULL
)
21381 output_operand_lossage
21382 ("predicated instruction in conditional sequence");
21386 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21388 else if (current_insn_predicate
)
21390 enum arm_cond_code code
;
21394 output_operand_lossage ("predicated Thumb instruction");
21398 code
= get_arm_condition_code (current_insn_predicate
);
21399 fputs (arm_condition_codes
[code
], stream
);
21404 /* Globally reserved letters: acln
21405 Puncutation letters currently used: @_|?().!#
21406 Lower case letters currently used: bcdefhimpqtvwxyz
21407 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21408 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21410 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21412 If CODE is 'd', then the X is a condition operand and the instruction
21413 should only be executed if the condition is true.
21414 if CODE is 'D', then the X is a condition operand and the instruction
21415 should only be executed if the condition is false: however, if the mode
21416 of the comparison is CCFPEmode, then always execute the instruction -- we
21417 do this because in these circumstances !GE does not necessarily imply LT;
21418 in these cases the instruction pattern will take care to make sure that
21419 an instruction containing %d will follow, thereby undoing the effects of
21420 doing this instruction unconditionally.
21421 If CODE is 'N' then X is a floating point operand that must be negated
21423 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21424 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21426 arm_print_operand (FILE *stream
, rtx x
, int code
)
21431 fputs (ASM_COMMENT_START
, stream
);
21435 fputs (user_label_prefix
, stream
);
21439 fputs (REGISTER_PREFIX
, stream
);
21443 arm_print_condition (stream
);
21447 /* Nothing in unified syntax, otherwise the current condition code. */
21448 if (!TARGET_UNIFIED_ASM
)
21449 arm_print_condition (stream
);
21453 /* The current condition code in unified syntax, otherwise nothing. */
21454 if (TARGET_UNIFIED_ASM
)
21455 arm_print_condition (stream
);
21459 /* The current condition code for a condition code setting instruction.
21460 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21461 if (TARGET_UNIFIED_ASM
)
21463 fputc('s', stream
);
21464 arm_print_condition (stream
);
21468 arm_print_condition (stream
);
21469 fputc('s', stream
);
21474 /* If the instruction is conditionally executed then print
21475 the current condition code, otherwise print 's'. */
21476 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21477 if (current_insn_predicate
)
21478 arm_print_condition (stream
);
21480 fputc('s', stream
);
21483 /* %# is a "break" sequence. It doesn't output anything, but is used to
21484 separate e.g. operand numbers from following text, if that text consists
21485 of further digits which we don't want to be part of the operand
21493 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21494 r
= real_value_negate (&r
);
21495 fprintf (stream
, "%s", fp_const_from_val (&r
));
21499 /* An integer or symbol address without a preceding # sign. */
21501 switch (GET_CODE (x
))
21504 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21508 output_addr_const (stream
, x
);
21512 if (GET_CODE (XEXP (x
, 0)) == PLUS
21513 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21515 output_addr_const (stream
, x
);
21518 /* Fall through. */
21521 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21525 /* An integer that we want to print in HEX. */
21527 switch (GET_CODE (x
))
21530 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21534 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21539 if (CONST_INT_P (x
))
21542 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21543 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21547 putc ('~', stream
);
21548 output_addr_const (stream
, x
);
21553 /* Print the log2 of a CONST_INT. */
21557 if (!CONST_INT_P (x
)
21558 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21559 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21561 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21566 /* The low 16 bits of an immediate constant. */
21567 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21571 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21575 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21583 shift
= shift_op (x
, &val
);
21587 fprintf (stream
, ", %s ", shift
);
21589 arm_print_operand (stream
, XEXP (x
, 1), 0);
21591 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21596 /* An explanation of the 'Q', 'R' and 'H' register operands:
21598 In a pair of registers containing a DI or DF value the 'Q'
21599 operand returns the register number of the register containing
21600 the least significant part of the value. The 'R' operand returns
21601 the register number of the register containing the most
21602 significant part of the value.
21604 The 'H' operand returns the higher of the two register numbers.
21605 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21606 same as the 'Q' operand, since the most significant part of the
21607 value is held in the lower number register. The reverse is true
21608 on systems where WORDS_BIG_ENDIAN is false.
21610 The purpose of these operands is to distinguish between cases
21611 where the endian-ness of the values is important (for example
21612 when they are added together), and cases where the endian-ness
21613 is irrelevant, but the order of register operations is important.
21614 For example when loading a value from memory into a register
21615 pair, the endian-ness does not matter. Provided that the value
21616 from the lower memory address is put into the lower numbered
21617 register, and the value from the higher address is put into the
21618 higher numbered register, the load will work regardless of whether
21619 the value being loaded is big-wordian or little-wordian. The
21620 order of the two register loads can matter however, if the address
21621 of the memory location is actually held in one of the registers
21622 being overwritten by the load.
21624 The 'Q' and 'R' constraints are also available for 64-bit
21627 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21629 rtx part
= gen_lowpart (SImode
, x
);
21630 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21634 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21636 output_operand_lossage ("invalid operand for code '%c'", code
);
21640 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21644 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21646 machine_mode mode
= GET_MODE (x
);
21649 if (mode
== VOIDmode
)
21651 part
= gen_highpart_mode (SImode
, mode
, x
);
21652 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21656 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21658 output_operand_lossage ("invalid operand for code '%c'", code
);
21662 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21666 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21668 output_operand_lossage ("invalid operand for code '%c'", code
);
21672 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21676 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21678 output_operand_lossage ("invalid operand for code '%c'", code
);
21682 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21686 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21688 output_operand_lossage ("invalid operand for code '%c'", code
);
21692 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21696 asm_fprintf (stream
, "%r",
21697 REG_P (XEXP (x
, 0))
21698 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21702 asm_fprintf (stream
, "{%r-%r}",
21704 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21707 /* Like 'M', but writing doubleword vector registers, for use by Neon
21711 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21712 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21714 asm_fprintf (stream
, "{d%d}", regno
);
21716 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21721 /* CONST_TRUE_RTX means always -- that's the default. */
21722 if (x
== const_true_rtx
)
21725 if (!COMPARISON_P (x
))
21727 output_operand_lossage ("invalid operand for code '%c'", code
);
21731 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21736 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21737 want to do that. */
21738 if (x
== const_true_rtx
)
21740 output_operand_lossage ("instruction never executed");
21743 if (!COMPARISON_P (x
))
21745 output_operand_lossage ("invalid operand for code '%c'", code
);
21749 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21750 (get_arm_condition_code (x
))],
21760 /* Former Maverick support, removed after GCC-4.7. */
21761 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21766 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21767 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21768 /* Bad value for wCG register number. */
21770 output_operand_lossage ("invalid operand for code '%c'", code
);
21775 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21778 /* Print an iWMMXt control register name. */
21780 if (!CONST_INT_P (x
)
21782 || INTVAL (x
) >= 16)
21783 /* Bad value for wC register number. */
21785 output_operand_lossage ("invalid operand for code '%c'", code
);
21791 static const char * wc_reg_names
[16] =
21793 "wCID", "wCon", "wCSSF", "wCASF",
21794 "wC4", "wC5", "wC6", "wC7",
21795 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21796 "wC12", "wC13", "wC14", "wC15"
21799 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21803 /* Print the high single-precision register of a VFP double-precision
21807 machine_mode mode
= GET_MODE (x
);
21810 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21812 output_operand_lossage ("invalid operand for code '%c'", code
);
21817 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21819 output_operand_lossage ("invalid operand for code '%c'", code
);
21823 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21827 /* Print a VFP/Neon double precision or quad precision register name. */
21831 machine_mode mode
= GET_MODE (x
);
21832 int is_quad
= (code
== 'q');
21835 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21837 output_operand_lossage ("invalid operand for code '%c'", code
);
21842 || !IS_VFP_REGNUM (REGNO (x
)))
21844 output_operand_lossage ("invalid operand for code '%c'", code
);
21849 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21850 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21852 output_operand_lossage ("invalid operand for code '%c'", code
);
21856 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21857 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21861 /* These two codes print the low/high doubleword register of a Neon quad
21862 register, respectively. For pair-structure types, can also print
21863 low/high quadword registers. */
21867 machine_mode mode
= GET_MODE (x
);
21870 if ((GET_MODE_SIZE (mode
) != 16
21871 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21873 output_operand_lossage ("invalid operand for code '%c'", code
);
21878 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21880 output_operand_lossage ("invalid operand for code '%c'", code
);
21884 if (GET_MODE_SIZE (mode
) == 16)
21885 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21886 + (code
== 'f' ? 1 : 0));
21888 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21889 + (code
== 'f' ? 1 : 0));
21893 /* Print a VFPv3 floating-point constant, represented as an integer
21897 int index
= vfp3_const_double_index (x
);
21898 gcc_assert (index
!= -1);
21899 fprintf (stream
, "%d", index
);
21903 /* Print bits representing opcode features for Neon.
21905 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21906 and polynomials as unsigned.
21908 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21910 Bit 2 is 1 for rounding functions, 0 otherwise. */
21912 /* Identify the type as 's', 'u', 'p' or 'f'. */
21915 HOST_WIDE_INT bits
= INTVAL (x
);
21916 fputc ("uspf"[bits
& 3], stream
);
21920 /* Likewise, but signed and unsigned integers are both 'i'. */
21923 HOST_WIDE_INT bits
= INTVAL (x
);
21924 fputc ("iipf"[bits
& 3], stream
);
21928 /* As for 'T', but emit 'u' instead of 'p'. */
21931 HOST_WIDE_INT bits
= INTVAL (x
);
21932 fputc ("usuf"[bits
& 3], stream
);
21936 /* Bit 2: rounding (vs none). */
21939 HOST_WIDE_INT bits
= INTVAL (x
);
21940 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21944 /* Memory operand for vld1/vst1 instruction. */
21948 bool postinc
= FALSE
;
21949 rtx postinc_reg
= NULL
;
21950 unsigned align
, memsize
, align_bits
;
21952 gcc_assert (MEM_P (x
));
21953 addr
= XEXP (x
, 0);
21954 if (GET_CODE (addr
) == POST_INC
)
21957 addr
= XEXP (addr
, 0);
21959 if (GET_CODE (addr
) == POST_MODIFY
)
21961 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21962 addr
= XEXP (addr
, 0);
21964 asm_fprintf (stream
, "[%r", REGNO (addr
));
21966 /* We know the alignment of this access, so we can emit a hint in the
21967 instruction (for some alignments) as an aid to the memory subsystem
21969 align
= MEM_ALIGN (x
) >> 3;
21970 memsize
= MEM_SIZE (x
);
21972 /* Only certain alignment specifiers are supported by the hardware. */
21973 if (memsize
== 32 && (align
% 32) == 0)
21975 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21977 else if (memsize
>= 8 && (align
% 8) == 0)
21982 if (align_bits
!= 0)
21983 asm_fprintf (stream
, ":%d", align_bits
);
21985 asm_fprintf (stream
, "]");
21988 fputs("!", stream
);
21990 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
21998 gcc_assert (MEM_P (x
));
21999 addr
= XEXP (x
, 0);
22000 gcc_assert (REG_P (addr
));
22001 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22005 /* Translate an S register number into a D register number and element index. */
22008 machine_mode mode
= GET_MODE (x
);
22011 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22013 output_operand_lossage ("invalid operand for code '%c'", code
);
22018 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22020 output_operand_lossage ("invalid operand for code '%c'", code
);
22024 regno
= regno
- FIRST_VFP_REGNUM
;
22025 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22030 gcc_assert (CONST_DOUBLE_P (x
));
22032 result
= vfp3_const_double_for_fract_bits (x
);
22034 result
= vfp3_const_double_for_bits (x
);
22035 fprintf (stream
, "#%d", result
);
22038 /* Register specifier for vld1.16/vst1.16. Translate the S register
22039 number into a D register number and element index. */
22042 machine_mode mode
= GET_MODE (x
);
22045 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22047 output_operand_lossage ("invalid operand for code '%c'", code
);
22052 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22054 output_operand_lossage ("invalid operand for code '%c'", code
);
22058 regno
= regno
- FIRST_VFP_REGNUM
;
22059 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22066 output_operand_lossage ("missing operand");
22070 switch (GET_CODE (x
))
22073 asm_fprintf (stream
, "%r", REGNO (x
));
22077 output_memory_reference_mode
= GET_MODE (x
);
22078 output_address (XEXP (x
, 0));
22084 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22085 sizeof (fpstr
), 0, 1);
22086 fprintf (stream
, "#%s", fpstr
);
22091 gcc_assert (GET_CODE (x
) != NEG
);
22092 fputc ('#', stream
);
22093 if (GET_CODE (x
) == HIGH
)
22095 fputs (":lower16:", stream
);
22099 output_addr_const (stream
, x
);
22105 /* Target hook for printing a memory address. */
22107 arm_print_operand_address (FILE *stream
, rtx x
)
22111 int is_minus
= GET_CODE (x
) == MINUS
;
22114 asm_fprintf (stream
, "[%r]", REGNO (x
));
22115 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22117 rtx base
= XEXP (x
, 0);
22118 rtx index
= XEXP (x
, 1);
22119 HOST_WIDE_INT offset
= 0;
22121 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22123 /* Ensure that BASE is a register. */
22124 /* (one of them must be). */
22125 /* Also ensure the SP is not used as in index register. */
22126 std::swap (base
, index
);
22128 switch (GET_CODE (index
))
22131 offset
= INTVAL (index
);
22134 asm_fprintf (stream
, "[%r, #%wd]",
22135 REGNO (base
), offset
);
22139 asm_fprintf (stream
, "[%r, %s%r]",
22140 REGNO (base
), is_minus
? "-" : "",
22150 asm_fprintf (stream
, "[%r, %s%r",
22151 REGNO (base
), is_minus
? "-" : "",
22152 REGNO (XEXP (index
, 0)));
22153 arm_print_operand (stream
, index
, 'S');
22154 fputs ("]", stream
);
22159 gcc_unreachable ();
22162 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22163 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22165 extern machine_mode output_memory_reference_mode
;
22167 gcc_assert (REG_P (XEXP (x
, 0)));
22169 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22170 asm_fprintf (stream
, "[%r, #%s%d]!",
22171 REGNO (XEXP (x
, 0)),
22172 GET_CODE (x
) == PRE_DEC
? "-" : "",
22173 GET_MODE_SIZE (output_memory_reference_mode
));
22175 asm_fprintf (stream
, "[%r], #%s%d",
22176 REGNO (XEXP (x
, 0)),
22177 GET_CODE (x
) == POST_DEC
? "-" : "",
22178 GET_MODE_SIZE (output_memory_reference_mode
));
22180 else if (GET_CODE (x
) == PRE_MODIFY
)
22182 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22183 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22184 asm_fprintf (stream
, "#%wd]!",
22185 INTVAL (XEXP (XEXP (x
, 1), 1)));
22187 asm_fprintf (stream
, "%r]!",
22188 REGNO (XEXP (XEXP (x
, 1), 1)));
22190 else if (GET_CODE (x
) == POST_MODIFY
)
22192 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22193 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22194 asm_fprintf (stream
, "#%wd",
22195 INTVAL (XEXP (XEXP (x
, 1), 1)));
22197 asm_fprintf (stream
, "%r",
22198 REGNO (XEXP (XEXP (x
, 1), 1)));
22200 else output_addr_const (stream
, x
);
22205 asm_fprintf (stream
, "[%r]", REGNO (x
));
22206 else if (GET_CODE (x
) == POST_INC
)
22207 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22208 else if (GET_CODE (x
) == PLUS
)
22210 gcc_assert (REG_P (XEXP (x
, 0)));
22211 if (CONST_INT_P (XEXP (x
, 1)))
22212 asm_fprintf (stream
, "[%r, #%wd]",
22213 REGNO (XEXP (x
, 0)),
22214 INTVAL (XEXP (x
, 1)));
22216 asm_fprintf (stream
, "[%r, %r]",
22217 REGNO (XEXP (x
, 0)),
22218 REGNO (XEXP (x
, 1)));
22221 output_addr_const (stream
, x
);
22225 /* Target hook for indicating whether a punctuation character for
22226 TARGET_PRINT_OPERAND is valid. */
22228 arm_print_operand_punct_valid_p (unsigned char code
)
22230 return (code
== '@' || code
== '|' || code
== '.'
22231 || code
== '(' || code
== ')' || code
== '#'
22232 || (TARGET_32BIT
&& (code
== '?'))
22233 || (TARGET_THUMB2
&& (code
== '!'))
22234 || (TARGET_THUMB
&& (code
== '_')));
22237 /* Target hook for assembling integer objects. The ARM version needs to
22238 handle word-sized values specially. */
22240 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22244 if (size
== UNITS_PER_WORD
&& aligned_p
)
22246 fputs ("\t.word\t", asm_out_file
);
22247 output_addr_const (asm_out_file
, x
);
22249 /* Mark symbols as position independent. We only do this in the
22250 .text segment, not in the .data segment. */
22251 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22252 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22254 /* See legitimize_pic_address for an explanation of the
22255 TARGET_VXWORKS_RTP check. */
22256 if (!arm_pic_data_is_text_relative
22257 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22258 fputs ("(GOT)", asm_out_file
);
22260 fputs ("(GOTOFF)", asm_out_file
);
22262 fputc ('\n', asm_out_file
);
22266 mode
= GET_MODE (x
);
22268 if (arm_vector_mode_supported_p (mode
))
22272 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22274 units
= CONST_VECTOR_NUNITS (x
);
22275 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22277 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22278 for (i
= 0; i
< units
; i
++)
22280 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22282 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22285 for (i
= 0; i
< units
; i
++)
22287 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22288 REAL_VALUE_TYPE rval
;
22290 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22293 (rval
, GET_MODE_INNER (mode
),
22294 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22300 return default_assemble_integer (x
, size
, aligned_p
);
22304 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22308 if (!TARGET_AAPCS_BASED
)
22311 default_named_section_asm_out_constructor
22312 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22316 /* Put these in the .init_array section, using a special relocation. */
22317 if (priority
!= DEFAULT_INIT_PRIORITY
)
22320 sprintf (buf
, "%s.%.5u",
22321 is_ctor
? ".init_array" : ".fini_array",
22323 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22330 switch_to_section (s
);
22331 assemble_align (POINTER_SIZE
);
22332 fputs ("\t.word\t", asm_out_file
);
22333 output_addr_const (asm_out_file
, symbol
);
22334 fputs ("(target1)\n", asm_out_file
);
22337 /* Add a function to the list of static constructors. */
22340 arm_elf_asm_constructor (rtx symbol
, int priority
)
22342 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22345 /* Add a function to the list of static destructors. */
22348 arm_elf_asm_destructor (rtx symbol
, int priority
)
22350 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22353 /* A finite state machine takes care of noticing whether or not instructions
22354 can be conditionally executed, and thus decrease execution time and code
22355 size by deleting branch instructions. The fsm is controlled by
22356 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22358 /* The state of the fsm controlling condition codes are:
22359 0: normal, do nothing special
22360 1: make ASM_OUTPUT_OPCODE not output this instruction
22361 2: make ASM_OUTPUT_OPCODE not output this instruction
22362 3: make instructions conditional
22363 4: make instructions conditional
22365 State transitions (state->state by whom under condition):
22366 0 -> 1 final_prescan_insn if the `target' is a label
22367 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22368 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22369 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22370 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22371 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22372 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22373 (the target insn is arm_target_insn).
22375 If the jump clobbers the conditions then we use states 2 and 4.
22377 A similar thing can be done with conditional return insns.
22379 XXX In case the `target' is an unconditional branch, this conditionalising
22380 of the instructions always reduces code size, but not always execution
22381 time. But then, I want to reduce the code size to somewhere near what
22382 /bin/cc produces. */
22384 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22385 instructions. When a COND_EXEC instruction is seen the subsequent
22386 instructions are scanned so that multiple conditional instructions can be
22387 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22388 specify the length and true/false mask for the IT block. These will be
22389 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22391 /* Returns the index of the ARM condition code string in
22392 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22393 COMPARISON should be an rtx like `(eq (...) (...))'. */
22396 maybe_get_arm_condition_code (rtx comparison
)
22398 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22399 enum arm_cond_code code
;
22400 enum rtx_code comp_code
= GET_CODE (comparison
);
22402 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22403 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22404 XEXP (comparison
, 1));
22408 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22409 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22410 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22411 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22412 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22413 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22414 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22415 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22416 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22417 case CC_DLTUmode
: code
= ARM_CC
;
22420 if (comp_code
== EQ
)
22421 return ARM_INVERSE_CONDITION_CODE (code
);
22422 if (comp_code
== NE
)
22429 case NE
: return ARM_NE
;
22430 case EQ
: return ARM_EQ
;
22431 case GE
: return ARM_PL
;
22432 case LT
: return ARM_MI
;
22433 default: return ARM_NV
;
22439 case NE
: return ARM_NE
;
22440 case EQ
: return ARM_EQ
;
22441 default: return ARM_NV
;
22447 case NE
: return ARM_MI
;
22448 case EQ
: return ARM_PL
;
22449 default: return ARM_NV
;
22454 /* We can handle all cases except UNEQ and LTGT. */
22457 case GE
: return ARM_GE
;
22458 case GT
: return ARM_GT
;
22459 case LE
: return ARM_LS
;
22460 case LT
: return ARM_MI
;
22461 case NE
: return ARM_NE
;
22462 case EQ
: return ARM_EQ
;
22463 case ORDERED
: return ARM_VC
;
22464 case UNORDERED
: return ARM_VS
;
22465 case UNLT
: return ARM_LT
;
22466 case UNLE
: return ARM_LE
;
22467 case UNGT
: return ARM_HI
;
22468 case UNGE
: return ARM_PL
;
22469 /* UNEQ and LTGT do not have a representation. */
22470 case UNEQ
: /* Fall through. */
22471 case LTGT
: /* Fall through. */
22472 default: return ARM_NV
;
22478 case NE
: return ARM_NE
;
22479 case EQ
: return ARM_EQ
;
22480 case GE
: return ARM_LE
;
22481 case GT
: return ARM_LT
;
22482 case LE
: return ARM_GE
;
22483 case LT
: return ARM_GT
;
22484 case GEU
: return ARM_LS
;
22485 case GTU
: return ARM_CC
;
22486 case LEU
: return ARM_CS
;
22487 case LTU
: return ARM_HI
;
22488 default: return ARM_NV
;
22494 case LTU
: return ARM_CS
;
22495 case GEU
: return ARM_CC
;
22496 default: return ARM_NV
;
22502 case NE
: return ARM_NE
;
22503 case EQ
: return ARM_EQ
;
22504 case GEU
: return ARM_CS
;
22505 case GTU
: return ARM_HI
;
22506 case LEU
: return ARM_LS
;
22507 case LTU
: return ARM_CC
;
22508 default: return ARM_NV
;
22514 case GE
: return ARM_GE
;
22515 case LT
: return ARM_LT
;
22516 case GEU
: return ARM_CS
;
22517 case LTU
: return ARM_CC
;
22518 default: return ARM_NV
;
22524 case NE
: return ARM_NE
;
22525 case EQ
: return ARM_EQ
;
22526 case GE
: return ARM_GE
;
22527 case GT
: return ARM_GT
;
22528 case LE
: return ARM_LE
;
22529 case LT
: return ARM_LT
;
22530 case GEU
: return ARM_CS
;
22531 case GTU
: return ARM_HI
;
22532 case LEU
: return ARM_LS
;
22533 case LTU
: return ARM_CC
;
22534 default: return ARM_NV
;
22537 default: gcc_unreachable ();
22541 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22542 static enum arm_cond_code
22543 get_arm_condition_code (rtx comparison
)
22545 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22546 gcc_assert (code
!= ARM_NV
);
22550 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22553 thumb2_final_prescan_insn (rtx_insn
*insn
)
22555 rtx_insn
*first_insn
= insn
;
22556 rtx body
= PATTERN (insn
);
22558 enum arm_cond_code code
;
22563 /* max_insns_skipped in the tune was already taken into account in the
22564 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22565 just emit the IT blocks as we can. It does not make sense to split
22567 max
= MAX_INSN_PER_IT_BLOCK
;
22569 /* Remove the previous insn from the count of insns to be output. */
22570 if (arm_condexec_count
)
22571 arm_condexec_count
--;
22573 /* Nothing to do if we are already inside a conditional block. */
22574 if (arm_condexec_count
)
22577 if (GET_CODE (body
) != COND_EXEC
)
22580 /* Conditional jumps are implemented directly. */
22584 predicate
= COND_EXEC_TEST (body
);
22585 arm_current_cc
= get_arm_condition_code (predicate
);
22587 n
= get_attr_ce_count (insn
);
22588 arm_condexec_count
= 1;
22589 arm_condexec_mask
= (1 << n
) - 1;
22590 arm_condexec_masklen
= n
;
22591 /* See if subsequent instructions can be combined into the same block. */
22594 insn
= next_nonnote_insn (insn
);
22596 /* Jumping into the middle of an IT block is illegal, so a label or
22597 barrier terminates the block. */
22598 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22601 body
= PATTERN (insn
);
22602 /* USE and CLOBBER aren't really insns, so just skip them. */
22603 if (GET_CODE (body
) == USE
22604 || GET_CODE (body
) == CLOBBER
)
22607 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22608 if (GET_CODE (body
) != COND_EXEC
)
22610 /* Maximum number of conditionally executed instructions in a block. */
22611 n
= get_attr_ce_count (insn
);
22612 if (arm_condexec_masklen
+ n
> max
)
22615 predicate
= COND_EXEC_TEST (body
);
22616 code
= get_arm_condition_code (predicate
);
22617 mask
= (1 << n
) - 1;
22618 if (arm_current_cc
== code
)
22619 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22620 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22623 arm_condexec_count
++;
22624 arm_condexec_masklen
+= n
;
22626 /* A jump must be the last instruction in a conditional block. */
22630 /* Restore recog_data (getting the attributes of other insns can
22631 destroy this array, but final.c assumes that it remains intact
22632 across this call). */
22633 extract_constrain_insn_cached (first_insn
);
22637 arm_final_prescan_insn (rtx_insn
*insn
)
22639 /* BODY will hold the body of INSN. */
22640 rtx body
= PATTERN (insn
);
22642 /* This will be 1 if trying to repeat the trick, and things need to be
22643 reversed if it appears to fail. */
22646 /* If we start with a return insn, we only succeed if we find another one. */
22647 int seeking_return
= 0;
22648 enum rtx_code return_code
= UNKNOWN
;
22650 /* START_INSN will hold the insn from where we start looking. This is the
22651 first insn after the following code_label if REVERSE is true. */
22652 rtx_insn
*start_insn
= insn
;
22654 /* If in state 4, check if the target branch is reached, in order to
22655 change back to state 0. */
22656 if (arm_ccfsm_state
== 4)
22658 if (insn
== arm_target_insn
)
22660 arm_target_insn
= NULL
;
22661 arm_ccfsm_state
= 0;
22666 /* If in state 3, it is possible to repeat the trick, if this insn is an
22667 unconditional branch to a label, and immediately following this branch
22668 is the previous target label which is only used once, and the label this
22669 branch jumps to is not too far off. */
22670 if (arm_ccfsm_state
== 3)
22672 if (simplejump_p (insn
))
22674 start_insn
= next_nonnote_insn (start_insn
);
22675 if (BARRIER_P (start_insn
))
22677 /* XXX Isn't this always a barrier? */
22678 start_insn
= next_nonnote_insn (start_insn
);
22680 if (LABEL_P (start_insn
)
22681 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22682 && LABEL_NUSES (start_insn
) == 1)
22687 else if (ANY_RETURN_P (body
))
22689 start_insn
= next_nonnote_insn (start_insn
);
22690 if (BARRIER_P (start_insn
))
22691 start_insn
= next_nonnote_insn (start_insn
);
22692 if (LABEL_P (start_insn
)
22693 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22694 && LABEL_NUSES (start_insn
) == 1)
22697 seeking_return
= 1;
22698 return_code
= GET_CODE (body
);
22707 gcc_assert (!arm_ccfsm_state
|| reverse
);
22708 if (!JUMP_P (insn
))
22711 /* This jump might be paralleled with a clobber of the condition codes
22712 the jump should always come first */
22713 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22714 body
= XVECEXP (body
, 0, 0);
22717 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22718 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22721 int fail
= FALSE
, succeed
= FALSE
;
22722 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22723 int then_not_else
= TRUE
;
22724 rtx_insn
*this_insn
= start_insn
;
22727 /* Register the insn jumped to. */
22730 if (!seeking_return
)
22731 label
= XEXP (SET_SRC (body
), 0);
22733 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22734 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22735 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22737 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22738 then_not_else
= FALSE
;
22740 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22742 seeking_return
= 1;
22743 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22745 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22747 seeking_return
= 1;
22748 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22749 then_not_else
= FALSE
;
22752 gcc_unreachable ();
22754 /* See how many insns this branch skips, and what kind of insns. If all
22755 insns are okay, and the label or unconditional branch to the same
22756 label is not too far away, succeed. */
22757 for (insns_skipped
= 0;
22758 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22762 this_insn
= next_nonnote_insn (this_insn
);
22766 switch (GET_CODE (this_insn
))
22769 /* Succeed if it is the target label, otherwise fail since
22770 control falls in from somewhere else. */
22771 if (this_insn
== label
)
22773 arm_ccfsm_state
= 1;
22781 /* Succeed if the following insn is the target label.
22783 If return insns are used then the last insn in a function
22784 will be a barrier. */
22785 this_insn
= next_nonnote_insn (this_insn
);
22786 if (this_insn
&& this_insn
== label
)
22788 arm_ccfsm_state
= 1;
22796 /* The AAPCS says that conditional calls should not be
22797 used since they make interworking inefficient (the
22798 linker can't transform BL<cond> into BLX). That's
22799 only a problem if the machine has BLX. */
22806 /* Succeed if the following insn is the target label, or
22807 if the following two insns are a barrier and the
22809 this_insn
= next_nonnote_insn (this_insn
);
22810 if (this_insn
&& BARRIER_P (this_insn
))
22811 this_insn
= next_nonnote_insn (this_insn
);
22813 if (this_insn
&& this_insn
== label
22814 && insns_skipped
< max_insns_skipped
)
22816 arm_ccfsm_state
= 1;
22824 /* If this is an unconditional branch to the same label, succeed.
22825 If it is to another label, do nothing. If it is conditional,
22827 /* XXX Probably, the tests for SET and the PC are
22830 scanbody
= PATTERN (this_insn
);
22831 if (GET_CODE (scanbody
) == SET
22832 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22834 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22835 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22837 arm_ccfsm_state
= 2;
22840 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22843 /* Fail if a conditional return is undesirable (e.g. on a
22844 StrongARM), but still allow this if optimizing for size. */
22845 else if (GET_CODE (scanbody
) == return_code
22846 && !use_return_insn (TRUE
, NULL
)
22849 else if (GET_CODE (scanbody
) == return_code
)
22851 arm_ccfsm_state
= 2;
22854 else if (GET_CODE (scanbody
) == PARALLEL
)
22856 switch (get_attr_conds (this_insn
))
22866 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22871 /* Instructions using or affecting the condition codes make it
22873 scanbody
= PATTERN (this_insn
);
22874 if (!(GET_CODE (scanbody
) == SET
22875 || GET_CODE (scanbody
) == PARALLEL
)
22876 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22886 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22887 arm_target_label
= CODE_LABEL_NUMBER (label
);
22890 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22892 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22894 this_insn
= next_nonnote_insn (this_insn
);
22895 gcc_assert (!this_insn
22896 || (!BARRIER_P (this_insn
)
22897 && !LABEL_P (this_insn
)));
22901 /* Oh, dear! we ran off the end.. give up. */
22902 extract_constrain_insn_cached (insn
);
22903 arm_ccfsm_state
= 0;
22904 arm_target_insn
= NULL
;
22907 arm_target_insn
= this_insn
;
22910 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22913 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22915 if (reverse
|| then_not_else
)
22916 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22919 /* Restore recog_data (getting the attributes of other insns can
22920 destroy this array, but final.c assumes that it remains intact
22921 across this call. */
22922 extract_constrain_insn_cached (insn
);
22926 /* Output IT instructions. */
22928 thumb2_asm_output_opcode (FILE * stream
)
22933 if (arm_condexec_mask
)
22935 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22936 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22938 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22939 arm_condition_codes
[arm_current_cc
]);
22940 arm_condexec_mask
= 0;
22944 /* Returns true if REGNO is a valid register
22945 for holding a quantity of type MODE. */
22947 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22949 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22950 return (regno
== CC_REGNUM
22951 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22952 && regno
== VFPCC_REGNUM
));
22954 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
22958 /* For the Thumb we only allow values bigger than SImode in
22959 registers 0 - 6, so that there is always a second low
22960 register available to hold the upper part of the value.
22961 We probably we ought to ensure that the register is the
22962 start of an even numbered register pair. */
22963 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22965 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22966 && IS_VFP_REGNUM (regno
))
22968 if (mode
== SFmode
|| mode
== SImode
)
22969 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22971 if (mode
== DFmode
)
22972 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22974 /* VFP registers can hold HFmode values, but there is no point in
22975 putting them there unless we have hardware conversion insns. */
22976 if (mode
== HFmode
)
22977 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22980 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22981 || (VALID_NEON_QREG_MODE (mode
)
22982 && NEON_REGNO_OK_FOR_QUAD (regno
))
22983 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22984 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22985 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22986 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22987 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22992 if (TARGET_REALLY_IWMMXT
)
22994 if (IS_IWMMXT_GR_REGNUM (regno
))
22995 return mode
== SImode
;
22997 if (IS_IWMMXT_REGNUM (regno
))
22998 return VALID_IWMMXT_REG_MODE (mode
);
23001 /* We allow almost any value to be stored in the general registers.
23002 Restrict doubleword quantities to even register pairs in ARM state
23003 so that we can use ldrd. Do not allow very large Neon structure
23004 opaque modes in general registers; they would use too many. */
23005 if (regno
<= LAST_ARM_REGNUM
)
23007 if (ARM_NUM_REGS (mode
) > 4)
23013 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23016 if (regno
== FRAME_POINTER_REGNUM
23017 || regno
== ARG_POINTER_REGNUM
)
23018 /* We only allow integers in the fake hard registers. */
23019 return GET_MODE_CLASS (mode
) == MODE_INT
;
23024 /* Implement MODES_TIEABLE_P. */
23027 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23029 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23032 /* We specifically want to allow elements of "structure" modes to
23033 be tieable to the structure. This more general condition allows
23034 other rarer situations too. */
23036 && (VALID_NEON_DREG_MODE (mode1
)
23037 || VALID_NEON_QREG_MODE (mode1
)
23038 || VALID_NEON_STRUCT_MODE (mode1
))
23039 && (VALID_NEON_DREG_MODE (mode2
)
23040 || VALID_NEON_QREG_MODE (mode2
)
23041 || VALID_NEON_STRUCT_MODE (mode2
)))
23047 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23048 not used in arm mode. */
23051 arm_regno_class (int regno
)
23053 if (regno
== PC_REGNUM
)
23058 if (regno
== STACK_POINTER_REGNUM
)
23060 if (regno
== CC_REGNUM
)
23067 if (TARGET_THUMB2
&& regno
< 8)
23070 if ( regno
<= LAST_ARM_REGNUM
23071 || regno
== FRAME_POINTER_REGNUM
23072 || regno
== ARG_POINTER_REGNUM
)
23073 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23075 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23076 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23078 if (IS_VFP_REGNUM (regno
))
23080 if (regno
<= D7_VFP_REGNUM
)
23081 return VFP_D0_D7_REGS
;
23082 else if (regno
<= LAST_LO_VFP_REGNUM
)
23083 return VFP_LO_REGS
;
23085 return VFP_HI_REGS
;
23088 if (IS_IWMMXT_REGNUM (regno
))
23089 return IWMMXT_REGS
;
23091 if (IS_IWMMXT_GR_REGNUM (regno
))
23092 return IWMMXT_GR_REGS
;
23097 /* Handle a special case when computing the offset
23098 of an argument from the frame pointer. */
23100 arm_debugger_arg_offset (int value
, rtx addr
)
23104 /* We are only interested if dbxout_parms() failed to compute the offset. */
23108 /* We can only cope with the case where the address is held in a register. */
23112 /* If we are using the frame pointer to point at the argument, then
23113 an offset of 0 is correct. */
23114 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23117 /* If we are using the stack pointer to point at the
23118 argument, then an offset of 0 is correct. */
23119 /* ??? Check this is consistent with thumb2 frame layout. */
23120 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23121 && REGNO (addr
) == SP_REGNUM
)
23124 /* Oh dear. The argument is pointed to by a register rather
23125 than being held in a register, or being stored at a known
23126 offset from the frame pointer. Since GDB only understands
23127 those two kinds of argument we must translate the address
23128 held in the register into an offset from the frame pointer.
23129 We do this by searching through the insns for the function
23130 looking to see where this register gets its value. If the
23131 register is initialized from the frame pointer plus an offset
23132 then we are in luck and we can continue, otherwise we give up.
23134 This code is exercised by producing debugging information
23135 for a function with arguments like this:
23137 double func (double a, double b, int c, double d) {return d;}
23139 Without this code the stab for parameter 'd' will be set to
23140 an offset of 0 from the frame pointer, rather than 8. */
23142 /* The if() statement says:
23144 If the insn is a normal instruction
23145 and if the insn is setting the value in a register
23146 and if the register being set is the register holding the address of the argument
23147 and if the address is computing by an addition
23148 that involves adding to a register
23149 which is the frame pointer
23154 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23156 if ( NONJUMP_INSN_P (insn
)
23157 && GET_CODE (PATTERN (insn
)) == SET
23158 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23159 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23160 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23161 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23162 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23165 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23174 warning (0, "unable to compute real location of stacked parameter");
23175 value
= 8; /* XXX magic hack */
23181 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23183 static const char *
23184 arm_invalid_parameter_type (const_tree t
)
23186 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23187 return N_("function parameters cannot have __fp16 type");
23191 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23193 static const char *
23194 arm_invalid_return_type (const_tree t
)
23196 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23197 return N_("functions cannot return __fp16 type");
23201 /* Implement TARGET_PROMOTED_TYPE. */
23204 arm_promoted_type (const_tree t
)
23206 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23207 return float_type_node
;
23211 /* Implement TARGET_CONVERT_TO_TYPE.
23212 Specifically, this hook implements the peculiarity of the ARM
23213 half-precision floating-point C semantics that requires conversions between
23214 __fp16 to or from double to do an intermediate conversion to float. */
23217 arm_convert_to_type (tree type
, tree expr
)
23219 tree fromtype
= TREE_TYPE (expr
);
23220 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23222 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23223 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23224 return convert (type
, convert (float_type_node
, expr
));
23228 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23229 This simply adds HFmode as a supported mode; even though we don't
23230 implement arithmetic on this type directly, it's supported by
23231 optabs conversions, much the way the double-word arithmetic is
23232 special-cased in the default hook. */
23235 arm_scalar_mode_supported_p (machine_mode mode
)
23237 if (mode
== HFmode
)
23238 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23239 else if (ALL_FIXED_POINT_MODE_P (mode
))
23242 return default_scalar_mode_supported_p (mode
);
23245 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23247 neon_reinterpret (rtx dest
, rtx src
)
23249 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23252 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23253 not to early-clobber SRC registers in the process.
23255 We assume that the operands described by SRC and DEST represent a
23256 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23257 number of components into which the copy has been decomposed. */
23259 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23263 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23264 || REGNO (operands
[0]) < REGNO (operands
[1]))
23266 for (i
= 0; i
< count
; i
++)
23268 operands
[2 * i
] = dest
[i
];
23269 operands
[2 * i
+ 1] = src
[i
];
23274 for (i
= 0; i
< count
; i
++)
23276 operands
[2 * i
] = dest
[count
- i
- 1];
23277 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23282 /* Split operands into moves from op[1] + op[2] into op[0]. */
23285 neon_split_vcombine (rtx operands
[3])
23287 unsigned int dest
= REGNO (operands
[0]);
23288 unsigned int src1
= REGNO (operands
[1]);
23289 unsigned int src2
= REGNO (operands
[2]);
23290 machine_mode halfmode
= GET_MODE (operands
[1]);
23291 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23292 rtx destlo
, desthi
;
23294 if (src1
== dest
&& src2
== dest
+ halfregs
)
23296 /* No-op move. Can't split to nothing; emit something. */
23297 emit_note (NOTE_INSN_DELETED
);
23301 /* Preserve register attributes for variable tracking. */
23302 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23303 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23304 GET_MODE_SIZE (halfmode
));
23306 /* Special case of reversed high/low parts. Use VSWP. */
23307 if (src2
== dest
&& src1
== dest
+ halfregs
)
23309 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23310 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23311 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23315 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23317 /* Try to avoid unnecessary moves if part of the result
23318 is in the right place already. */
23320 emit_move_insn (destlo
, operands
[1]);
23321 if (src2
!= dest
+ halfregs
)
23322 emit_move_insn (desthi
, operands
[2]);
23326 if (src2
!= dest
+ halfregs
)
23327 emit_move_insn (desthi
, operands
[2]);
23329 emit_move_insn (destlo
, operands
[1]);
23333 /* Return the number (counting from 0) of
23334 the least significant set bit in MASK. */
23337 number_of_first_bit_set (unsigned mask
)
23339 return ctz_hwi (mask
);
23342 /* Like emit_multi_reg_push, but allowing for a different set of
23343 registers to be described as saved. MASK is the set of registers
23344 to be saved; REAL_REGS is the set of registers to be described as
23345 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23348 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23350 unsigned long regno
;
23351 rtx par
[10], tmp
, reg
;
23355 /* Build the parallel of the registers actually being stored. */
23356 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23358 regno
= ctz_hwi (mask
);
23359 reg
= gen_rtx_REG (SImode
, regno
);
23362 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23364 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23369 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23370 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23371 tmp
= gen_frame_mem (BLKmode
, tmp
);
23372 tmp
= gen_rtx_SET (tmp
, par
[0]);
23375 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23376 insn
= emit_insn (tmp
);
23378 /* Always build the stack adjustment note for unwind info. */
23379 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23380 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23383 /* Build the parallel of the registers recorded as saved for unwind. */
23384 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23386 regno
= ctz_hwi (real_regs
);
23387 reg
= gen_rtx_REG (SImode
, regno
);
23389 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23390 tmp
= gen_frame_mem (SImode
, tmp
);
23391 tmp
= gen_rtx_SET (tmp
, reg
);
23392 RTX_FRAME_RELATED_P (tmp
) = 1;
23400 RTX_FRAME_RELATED_P (par
[0]) = 1;
23401 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23404 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23409 /* Emit code to push or pop registers to or from the stack. F is the
23410 assembly file. MASK is the registers to pop. */
23412 thumb_pop (FILE *f
, unsigned long mask
)
23415 int lo_mask
= mask
& 0xFF;
23416 int pushed_words
= 0;
23420 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23422 /* Special case. Do not generate a POP PC statement here, do it in
23424 thumb_exit (f
, -1);
23428 fprintf (f
, "\tpop\t{");
23430 /* Look at the low registers first. */
23431 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23435 asm_fprintf (f
, "%r", regno
);
23437 if ((lo_mask
& ~1) != 0)
23444 if (mask
& (1 << PC_REGNUM
))
23446 /* Catch popping the PC. */
23447 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23448 || crtl
->calls_eh_return
)
23450 /* The PC is never poped directly, instead
23451 it is popped into r3 and then BX is used. */
23452 fprintf (f
, "}\n");
23454 thumb_exit (f
, -1);
23463 asm_fprintf (f
, "%r", PC_REGNUM
);
23467 fprintf (f
, "}\n");
23470 /* Generate code to return from a thumb function.
23471 If 'reg_containing_return_addr' is -1, then the return address is
23472 actually on the stack, at the stack pointer. */
23474 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23476 unsigned regs_available_for_popping
;
23477 unsigned regs_to_pop
;
23479 unsigned available
;
23483 int restore_a4
= FALSE
;
23485 /* Compute the registers we need to pop. */
23489 if (reg_containing_return_addr
== -1)
23491 regs_to_pop
|= 1 << LR_REGNUM
;
23495 if (TARGET_BACKTRACE
)
23497 /* Restore the (ARM) frame pointer and stack pointer. */
23498 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23502 /* If there is nothing to pop then just emit the BX instruction and
23504 if (pops_needed
== 0)
23506 if (crtl
->calls_eh_return
)
23507 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23509 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23512 /* Otherwise if we are not supporting interworking and we have not created
23513 a backtrace structure and the function was not entered in ARM mode then
23514 just pop the return address straight into the PC. */
23515 else if (!TARGET_INTERWORK
23516 && !TARGET_BACKTRACE
23517 && !is_called_in_ARM_mode (current_function_decl
)
23518 && !crtl
->calls_eh_return
)
23520 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23524 /* Find out how many of the (return) argument registers we can corrupt. */
23525 regs_available_for_popping
= 0;
23527 /* If returning via __builtin_eh_return, the bottom three registers
23528 all contain information needed for the return. */
23529 if (crtl
->calls_eh_return
)
23533 /* If we can deduce the registers used from the function's
23534 return value. This is more reliable that examining
23535 df_regs_ever_live_p () because that will be set if the register is
23536 ever used in the function, not just if the register is used
23537 to hold a return value. */
23539 if (crtl
->return_rtx
!= 0)
23540 mode
= GET_MODE (crtl
->return_rtx
);
23542 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23544 size
= GET_MODE_SIZE (mode
);
23548 /* In a void function we can use any argument register.
23549 In a function that returns a structure on the stack
23550 we can use the second and third argument registers. */
23551 if (mode
== VOIDmode
)
23552 regs_available_for_popping
=
23553 (1 << ARG_REGISTER (1))
23554 | (1 << ARG_REGISTER (2))
23555 | (1 << ARG_REGISTER (3));
23557 regs_available_for_popping
=
23558 (1 << ARG_REGISTER (2))
23559 | (1 << ARG_REGISTER (3));
23561 else if (size
<= 4)
23562 regs_available_for_popping
=
23563 (1 << ARG_REGISTER (2))
23564 | (1 << ARG_REGISTER (3));
23565 else if (size
<= 8)
23566 regs_available_for_popping
=
23567 (1 << ARG_REGISTER (3));
23570 /* Match registers to be popped with registers into which we pop them. */
23571 for (available
= regs_available_for_popping
,
23572 required
= regs_to_pop
;
23573 required
!= 0 && available
!= 0;
23574 available
&= ~(available
& - available
),
23575 required
&= ~(required
& - required
))
23578 /* If we have any popping registers left over, remove them. */
23580 regs_available_for_popping
&= ~available
;
23582 /* Otherwise if we need another popping register we can use
23583 the fourth argument register. */
23584 else if (pops_needed
)
23586 /* If we have not found any free argument registers and
23587 reg a4 contains the return address, we must move it. */
23588 if (regs_available_for_popping
== 0
23589 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23591 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23592 reg_containing_return_addr
= LR_REGNUM
;
23594 else if (size
> 12)
23596 /* Register a4 is being used to hold part of the return value,
23597 but we have dire need of a free, low register. */
23600 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23603 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23605 /* The fourth argument register is available. */
23606 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23612 /* Pop as many registers as we can. */
23613 thumb_pop (f
, regs_available_for_popping
);
23615 /* Process the registers we popped. */
23616 if (reg_containing_return_addr
== -1)
23618 /* The return address was popped into the lowest numbered register. */
23619 regs_to_pop
&= ~(1 << LR_REGNUM
);
23621 reg_containing_return_addr
=
23622 number_of_first_bit_set (regs_available_for_popping
);
23624 /* Remove this register for the mask of available registers, so that
23625 the return address will not be corrupted by further pops. */
23626 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23629 /* If we popped other registers then handle them here. */
23630 if (regs_available_for_popping
)
23634 /* Work out which register currently contains the frame pointer. */
23635 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23637 /* Move it into the correct place. */
23638 asm_fprintf (f
, "\tmov\t%r, %r\n",
23639 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23641 /* (Temporarily) remove it from the mask of popped registers. */
23642 regs_available_for_popping
&= ~(1 << frame_pointer
);
23643 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23645 if (regs_available_for_popping
)
23649 /* We popped the stack pointer as well,
23650 find the register that contains it. */
23651 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23653 /* Move it into the stack register. */
23654 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23656 /* At this point we have popped all necessary registers, so
23657 do not worry about restoring regs_available_for_popping
23658 to its correct value:
23660 assert (pops_needed == 0)
23661 assert (regs_available_for_popping == (1 << frame_pointer))
23662 assert (regs_to_pop == (1 << STACK_POINTER)) */
23666 /* Since we have just move the popped value into the frame
23667 pointer, the popping register is available for reuse, and
23668 we know that we still have the stack pointer left to pop. */
23669 regs_available_for_popping
|= (1 << frame_pointer
);
23673 /* If we still have registers left on the stack, but we no longer have
23674 any registers into which we can pop them, then we must move the return
23675 address into the link register and make available the register that
23677 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23679 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23681 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23682 reg_containing_return_addr
);
23684 reg_containing_return_addr
= LR_REGNUM
;
23687 /* If we have registers left on the stack then pop some more.
23688 We know that at most we will want to pop FP and SP. */
23689 if (pops_needed
> 0)
23694 thumb_pop (f
, regs_available_for_popping
);
23696 /* We have popped either FP or SP.
23697 Move whichever one it is into the correct register. */
23698 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23699 move_to
= number_of_first_bit_set (regs_to_pop
);
23701 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23703 regs_to_pop
&= ~(1 << move_to
);
23708 /* If we still have not popped everything then we must have only
23709 had one register available to us and we are now popping the SP. */
23710 if (pops_needed
> 0)
23714 thumb_pop (f
, regs_available_for_popping
);
23716 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23718 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23720 assert (regs_to_pop == (1 << STACK_POINTER))
23721 assert (pops_needed == 1)
23725 /* If necessary restore the a4 register. */
23728 if (reg_containing_return_addr
!= LR_REGNUM
)
23730 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23731 reg_containing_return_addr
= LR_REGNUM
;
23734 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23737 if (crtl
->calls_eh_return
)
23738 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23740 /* Return to caller. */
23741 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23744 /* Scan INSN just before assembler is output for it.
23745 For Thumb-1, we track the status of the condition codes; this
23746 information is used in the cbranchsi4_insn pattern. */
23748 thumb1_final_prescan_insn (rtx_insn
*insn
)
23750 if (flag_print_asm_name
)
23751 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23752 INSN_ADDRESSES (INSN_UID (insn
)));
23753 /* Don't overwrite the previous setter when we get to a cbranch. */
23754 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23756 enum attr_conds conds
;
23758 if (cfun
->machine
->thumb1_cc_insn
)
23760 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23761 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23764 conds
= get_attr_conds (insn
);
23765 if (conds
== CONDS_SET
)
23767 rtx set
= single_set (insn
);
23768 cfun
->machine
->thumb1_cc_insn
= insn
;
23769 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23770 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23771 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23772 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23774 rtx src1
= XEXP (SET_SRC (set
), 1);
23775 if (src1
== const0_rtx
)
23776 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23778 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23780 /* Record the src register operand instead of dest because
23781 cprop_hardreg pass propagates src. */
23782 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23785 else if (conds
!= CONDS_NOCOND
)
23786 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23789 /* Check if unexpected far jump is used. */
23790 if (cfun
->machine
->lr_save_eliminated
23791 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23792 internal_error("Unexpected thumb1 far jump");
23796 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23798 unsigned HOST_WIDE_INT mask
= 0xff;
23801 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23802 if (val
== 0) /* XXX */
23805 for (i
= 0; i
< 25; i
++)
23806 if ((val
& (mask
<< i
)) == val
)
23812 /* Returns nonzero if the current function contains,
23813 or might contain a far jump. */
23815 thumb_far_jump_used_p (void)
23818 bool far_jump
= false;
23819 unsigned int func_size
= 0;
23821 /* This test is only important for leaf functions. */
23822 /* assert (!leaf_function_p ()); */
23824 /* If we have already decided that far jumps may be used,
23825 do not bother checking again, and always return true even if
23826 it turns out that they are not being used. Once we have made
23827 the decision that far jumps are present (and that hence the link
23828 register will be pushed onto the stack) we cannot go back on it. */
23829 if (cfun
->machine
->far_jump_used
)
23832 /* If this function is not being called from the prologue/epilogue
23833 generation code then it must be being called from the
23834 INITIAL_ELIMINATION_OFFSET macro. */
23835 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23837 /* In this case we know that we are being asked about the elimination
23838 of the arg pointer register. If that register is not being used,
23839 then there are no arguments on the stack, and we do not have to
23840 worry that a far jump might force the prologue to push the link
23841 register, changing the stack offsets. In this case we can just
23842 return false, since the presence of far jumps in the function will
23843 not affect stack offsets.
23845 If the arg pointer is live (or if it was live, but has now been
23846 eliminated and so set to dead) then we do have to test to see if
23847 the function might contain a far jump. This test can lead to some
23848 false negatives, since before reload is completed, then length of
23849 branch instructions is not known, so gcc defaults to returning their
23850 longest length, which in turn sets the far jump attribute to true.
23852 A false negative will not result in bad code being generated, but it
23853 will result in a needless push and pop of the link register. We
23854 hope that this does not occur too often.
23856 If we need doubleword stack alignment this could affect the other
23857 elimination offsets so we can't risk getting it wrong. */
23858 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23859 cfun
->machine
->arg_pointer_live
= 1;
23860 else if (!cfun
->machine
->arg_pointer_live
)
23864 /* We should not change far_jump_used during or after reload, as there is
23865 no chance to change stack frame layout. */
23866 if (reload_in_progress
|| reload_completed
)
23869 /* Check to see if the function contains a branch
23870 insn with the far jump attribute set. */
23871 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23873 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23877 func_size
+= get_attr_length (insn
);
23880 /* Attribute far_jump will always be true for thumb1 before
23881 shorten_branch pass. So checking far_jump attribute before
23882 shorten_branch isn't much useful.
23884 Following heuristic tries to estimate more accurately if a far jump
23885 may finally be used. The heuristic is very conservative as there is
23886 no chance to roll-back the decision of not to use far jump.
23888 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23889 2-byte insn is associated with a 4 byte constant pool. Using
23890 function size 2048/3 as the threshold is conservative enough. */
23893 if ((func_size
* 3) >= 2048)
23895 /* Record the fact that we have decided that
23896 the function does use far jumps. */
23897 cfun
->machine
->far_jump_used
= 1;
23905 /* Return nonzero if FUNC must be entered in ARM mode. */
23907 is_called_in_ARM_mode (tree func
)
23909 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23911 /* Ignore the problem about functions whose address is taken. */
23912 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23916 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23922 /* Given the stack offsets and register mask in OFFSETS, decide how
23923 many additional registers to push instead of subtracting a constant
23924 from SP. For epilogues the principle is the same except we use pop.
23925 FOR_PROLOGUE indicates which we're generating. */
23927 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23929 HOST_WIDE_INT amount
;
23930 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23931 /* Extract a mask of the ones we can give to the Thumb's push/pop
23933 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23934 /* Then count how many other high registers will need to be pushed. */
23935 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23936 int n_free
, reg_base
, size
;
23938 if (!for_prologue
&& frame_pointer_needed
)
23939 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23941 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23943 /* If the stack frame size is 512 exactly, we can save one load
23944 instruction, which should make this a win even when optimizing
23946 if (!optimize_size
&& amount
!= 512)
23949 /* Can't do this if there are high registers to push. */
23950 if (high_regs_pushed
!= 0)
23953 /* Shouldn't do it in the prologue if no registers would normally
23954 be pushed at all. In the epilogue, also allow it if we'll have
23955 a pop insn for the PC. */
23958 || TARGET_BACKTRACE
23959 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
23960 || TARGET_INTERWORK
23961 || crtl
->args
.pretend_args_size
!= 0))
23964 /* Don't do this if thumb_expand_prologue wants to emit instructions
23965 between the push and the stack frame allocation. */
23967 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23968 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
23975 size
= arm_size_return_regs ();
23976 reg_base
= ARM_NUM_INTS (size
);
23977 live_regs_mask
>>= reg_base
;
23980 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
23981 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
23983 live_regs_mask
>>= 1;
23989 gcc_assert (amount
/ 4 * 4 == amount
);
23991 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
23992 return (amount
- 508) / 4;
23993 if (amount
<= n_free
* 4)
23998 /* The bits which aren't usefully expanded as rtl. */
24000 thumb1_unexpanded_epilogue (void)
24002 arm_stack_offsets
*offsets
;
24004 unsigned long live_regs_mask
= 0;
24005 int high_regs_pushed
= 0;
24007 int had_to_push_lr
;
24010 if (cfun
->machine
->return_used_this_function
!= 0)
24013 if (IS_NAKED (arm_current_func_type ()))
24016 offsets
= arm_get_frame_offsets ();
24017 live_regs_mask
= offsets
->saved_regs_mask
;
24018 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24020 /* If we can deduce the registers used from the function's return value.
24021 This is more reliable that examining df_regs_ever_live_p () because that
24022 will be set if the register is ever used in the function, not just if
24023 the register is used to hold a return value. */
24024 size
= arm_size_return_regs ();
24026 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24029 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24030 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24033 /* The prolog may have pushed some high registers to use as
24034 work registers. e.g. the testsuite file:
24035 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24036 compiles to produce:
24037 push {r4, r5, r6, r7, lr}
24041 as part of the prolog. We have to undo that pushing here. */
24043 if (high_regs_pushed
)
24045 unsigned long mask
= live_regs_mask
& 0xff;
24048 /* The available low registers depend on the size of the value we are
24056 /* Oh dear! We have no low registers into which we can pop
24059 ("no low registers available for popping high registers");
24061 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24062 if (live_regs_mask
& (1 << next_hi_reg
))
24065 while (high_regs_pushed
)
24067 /* Find lo register(s) into which the high register(s) can
24069 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24071 if (mask
& (1 << regno
))
24072 high_regs_pushed
--;
24073 if (high_regs_pushed
== 0)
24077 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24079 /* Pop the values into the low register(s). */
24080 thumb_pop (asm_out_file
, mask
);
24082 /* Move the value(s) into the high registers. */
24083 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24085 if (mask
& (1 << regno
))
24087 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24090 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24091 if (live_regs_mask
& (1 << next_hi_reg
))
24096 live_regs_mask
&= ~0x0f00;
24099 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24100 live_regs_mask
&= 0xff;
24102 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24104 /* Pop the return address into the PC. */
24105 if (had_to_push_lr
)
24106 live_regs_mask
|= 1 << PC_REGNUM
;
24108 /* Either no argument registers were pushed or a backtrace
24109 structure was created which includes an adjusted stack
24110 pointer, so just pop everything. */
24111 if (live_regs_mask
)
24112 thumb_pop (asm_out_file
, live_regs_mask
);
24114 /* We have either just popped the return address into the
24115 PC or it is was kept in LR for the entire function.
24116 Note that thumb_pop has already called thumb_exit if the
24117 PC was in the list. */
24118 if (!had_to_push_lr
)
24119 thumb_exit (asm_out_file
, LR_REGNUM
);
24123 /* Pop everything but the return address. */
24124 if (live_regs_mask
)
24125 thumb_pop (asm_out_file
, live_regs_mask
);
24127 if (had_to_push_lr
)
24131 /* We have no free low regs, so save one. */
24132 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24136 /* Get the return address into a temporary register. */
24137 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24141 /* Move the return address to lr. */
24142 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24144 /* Restore the low register. */
24145 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24150 regno
= LAST_ARG_REGNUM
;
24155 /* Remove the argument registers that were pushed onto the stack. */
24156 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24157 SP_REGNUM
, SP_REGNUM
,
24158 crtl
->args
.pretend_args_size
);
24160 thumb_exit (asm_out_file
, regno
);
24166 /* Functions to save and restore machine-specific function data. */
24167 static struct machine_function
*
24168 arm_init_machine_status (void)
24170 struct machine_function
*machine
;
24171 machine
= ggc_cleared_alloc
<machine_function
> ();
24173 #if ARM_FT_UNKNOWN != 0
24174 machine
->func_type
= ARM_FT_UNKNOWN
;
24179 /* Return an RTX indicating where the return address to the
24180 calling function can be found. */
24182 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24187 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24190 /* Do anything needed before RTL is emitted for each function. */
24192 arm_init_expanders (void)
24194 /* Arrange to initialize and mark the machine per-function status. */
24195 init_machine_status
= arm_init_machine_status
;
24197 /* This is to stop the combine pass optimizing away the alignment
24198 adjustment of va_arg. */
24199 /* ??? It is claimed that this should not be necessary. */
24201 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24205 /* Like arm_compute_initial_elimination offset. Simpler because there
24206 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24207 to point at the base of the local variables after static stack
24208 space for a function has been allocated. */
24211 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24213 arm_stack_offsets
*offsets
;
24215 offsets
= arm_get_frame_offsets ();
24219 case ARG_POINTER_REGNUM
:
24222 case STACK_POINTER_REGNUM
:
24223 return offsets
->outgoing_args
- offsets
->saved_args
;
24225 case FRAME_POINTER_REGNUM
:
24226 return offsets
->soft_frame
- offsets
->saved_args
;
24228 case ARM_HARD_FRAME_POINTER_REGNUM
:
24229 return offsets
->saved_regs
- offsets
->saved_args
;
24231 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24232 return offsets
->locals_base
- offsets
->saved_args
;
24235 gcc_unreachable ();
24239 case FRAME_POINTER_REGNUM
:
24242 case STACK_POINTER_REGNUM
:
24243 return offsets
->outgoing_args
- offsets
->soft_frame
;
24245 case ARM_HARD_FRAME_POINTER_REGNUM
:
24246 return offsets
->saved_regs
- offsets
->soft_frame
;
24248 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24249 return offsets
->locals_base
- offsets
->soft_frame
;
24252 gcc_unreachable ();
24257 gcc_unreachable ();
24261 /* Generate the function's prologue. */
24264 thumb1_expand_prologue (void)
24268 HOST_WIDE_INT amount
;
24269 arm_stack_offsets
*offsets
;
24270 unsigned long func_type
;
24272 unsigned long live_regs_mask
;
24273 unsigned long l_mask
;
24274 unsigned high_regs_pushed
= 0;
24276 func_type
= arm_current_func_type ();
24278 /* Naked functions don't have prologues. */
24279 if (IS_NAKED (func_type
))
24282 if (IS_INTERRUPT (func_type
))
24284 error ("interrupt Service Routines cannot be coded in Thumb mode");
24288 if (is_called_in_ARM_mode (current_function_decl
))
24289 emit_insn (gen_prologue_thumb1_interwork ());
24291 offsets
= arm_get_frame_offsets ();
24292 live_regs_mask
= offsets
->saved_regs_mask
;
24294 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24295 l_mask
= live_regs_mask
& 0x40ff;
24296 /* Then count how many other high registers will need to be pushed. */
24297 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24299 if (crtl
->args
.pretend_args_size
)
24301 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24303 if (cfun
->machine
->uses_anonymous_args
)
24305 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24306 unsigned long mask
;
24308 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24309 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24311 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24315 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24316 stack_pointer_rtx
, x
));
24318 RTX_FRAME_RELATED_P (insn
) = 1;
24321 if (TARGET_BACKTRACE
)
24323 HOST_WIDE_INT offset
= 0;
24324 unsigned work_register
;
24325 rtx work_reg
, x
, arm_hfp_rtx
;
24327 /* We have been asked to create a stack backtrace structure.
24328 The code looks like this:
24332 0 sub SP, #16 Reserve space for 4 registers.
24333 2 push {R7} Push low registers.
24334 4 add R7, SP, #20 Get the stack pointer before the push.
24335 6 str R7, [SP, #8] Store the stack pointer
24336 (before reserving the space).
24337 8 mov R7, PC Get hold of the start of this code + 12.
24338 10 str R7, [SP, #16] Store it.
24339 12 mov R7, FP Get hold of the current frame pointer.
24340 14 str R7, [SP, #4] Store it.
24341 16 mov R7, LR Get hold of the current return address.
24342 18 str R7, [SP, #12] Store it.
24343 20 add R7, SP, #16 Point at the start of the
24344 backtrace structure.
24345 22 mov FP, R7 Put this value into the frame pointer. */
24347 work_register
= thumb_find_work_register (live_regs_mask
);
24348 work_reg
= gen_rtx_REG (SImode
, work_register
);
24349 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24351 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24352 stack_pointer_rtx
, GEN_INT (-16)));
24353 RTX_FRAME_RELATED_P (insn
) = 1;
24357 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24358 RTX_FRAME_RELATED_P (insn
) = 1;
24360 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24363 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24364 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24366 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24367 x
= gen_frame_mem (SImode
, x
);
24368 emit_move_insn (x
, work_reg
);
24370 /* Make sure that the instruction fetching the PC is in the right place
24371 to calculate "start of backtrace creation code + 12". */
24372 /* ??? The stores using the common WORK_REG ought to be enough to
24373 prevent the scheduler from doing anything weird. Failing that
24374 we could always move all of the following into an UNSPEC_VOLATILE. */
24377 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24378 emit_move_insn (work_reg
, x
);
24380 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24381 x
= gen_frame_mem (SImode
, x
);
24382 emit_move_insn (x
, work_reg
);
24384 emit_move_insn (work_reg
, arm_hfp_rtx
);
24386 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24387 x
= gen_frame_mem (SImode
, x
);
24388 emit_move_insn (x
, work_reg
);
24392 emit_move_insn (work_reg
, arm_hfp_rtx
);
24394 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24395 x
= gen_frame_mem (SImode
, x
);
24396 emit_move_insn (x
, work_reg
);
24398 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24399 emit_move_insn (work_reg
, x
);
24401 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24402 x
= gen_frame_mem (SImode
, x
);
24403 emit_move_insn (x
, work_reg
);
24406 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24407 emit_move_insn (work_reg
, x
);
24409 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24410 x
= gen_frame_mem (SImode
, x
);
24411 emit_move_insn (x
, work_reg
);
24413 x
= GEN_INT (offset
+ 12);
24414 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24416 emit_move_insn (arm_hfp_rtx
, work_reg
);
24418 /* Optimization: If we are not pushing any low registers but we are going
24419 to push some high registers then delay our first push. This will just
24420 be a push of LR and we can combine it with the push of the first high
24422 else if ((l_mask
& 0xff) != 0
24423 || (high_regs_pushed
== 0 && l_mask
))
24425 unsigned long mask
= l_mask
;
24426 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24427 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24428 RTX_FRAME_RELATED_P (insn
) = 1;
24431 if (high_regs_pushed
)
24433 unsigned pushable_regs
;
24434 unsigned next_hi_reg
;
24435 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24436 : crtl
->args
.info
.nregs
;
24437 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24439 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24440 if (live_regs_mask
& (1 << next_hi_reg
))
24443 /* Here we need to mask out registers used for passing arguments
24444 even if they can be pushed. This is to avoid using them to stash the high
24445 registers. Such kind of stash may clobber the use of arguments. */
24446 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24448 if (pushable_regs
== 0)
24449 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24451 while (high_regs_pushed
> 0)
24453 unsigned long real_regs_mask
= 0;
24455 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24457 if (pushable_regs
& (1 << regno
))
24459 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24460 gen_rtx_REG (SImode
, next_hi_reg
));
24462 high_regs_pushed
--;
24463 real_regs_mask
|= (1 << next_hi_reg
);
24465 if (high_regs_pushed
)
24467 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24469 if (live_regs_mask
& (1 << next_hi_reg
))
24474 pushable_regs
&= ~((1 << regno
) - 1);
24480 /* If we had to find a work register and we have not yet
24481 saved the LR then add it to the list of regs to push. */
24482 if (l_mask
== (1 << LR_REGNUM
))
24484 pushable_regs
|= l_mask
;
24485 real_regs_mask
|= l_mask
;
24489 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24490 RTX_FRAME_RELATED_P (insn
) = 1;
24494 /* Load the pic register before setting the frame pointer,
24495 so we can use r7 as a temporary work register. */
24496 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24497 arm_load_pic_register (live_regs_mask
);
24499 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24500 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24501 stack_pointer_rtx
);
24503 if (flag_stack_usage_info
)
24504 current_function_static_stack_size
24505 = offsets
->outgoing_args
- offsets
->saved_args
;
24507 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24508 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24513 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24514 GEN_INT (- amount
)));
24515 RTX_FRAME_RELATED_P (insn
) = 1;
24521 /* The stack decrement is too big for an immediate value in a single
24522 insn. In theory we could issue multiple subtracts, but after
24523 three of them it becomes more space efficient to place the full
24524 value in the constant pool and load into a register. (Also the
24525 ARM debugger really likes to see only one stack decrement per
24526 function). So instead we look for a scratch register into which
24527 we can load the decrement, and then we subtract this from the
24528 stack pointer. Unfortunately on the thumb the only available
24529 scratch registers are the argument registers, and we cannot use
24530 these as they may hold arguments to the function. Instead we
24531 attempt to locate a call preserved register which is used by this
24532 function. If we can find one, then we know that it will have
24533 been pushed at the start of the prologue and so we can corrupt
24535 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24536 if (live_regs_mask
& (1 << regno
))
24539 gcc_assert(regno
<= LAST_LO_REGNUM
);
24541 reg
= gen_rtx_REG (SImode
, regno
);
24543 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24545 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24546 stack_pointer_rtx
, reg
));
24548 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24549 plus_constant (Pmode
, stack_pointer_rtx
,
24551 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24552 RTX_FRAME_RELATED_P (insn
) = 1;
24556 if (frame_pointer_needed
)
24557 thumb_set_frame_pointer (offsets
);
24559 /* If we are profiling, make sure no instructions are scheduled before
24560 the call to mcount. Similarly if the user has requested no
24561 scheduling in the prolog. Similarly if we want non-call exceptions
24562 using the EABI unwinder, to prevent faulting instructions from being
24563 swapped with a stack adjustment. */
24564 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24565 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24566 && cfun
->can_throw_non_call_exceptions
))
24567 emit_insn (gen_blockage ());
24569 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24570 if (live_regs_mask
& 0xff)
24571 cfun
->machine
->lr_save_eliminated
= 0;
24574 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24575 POP instruction can be generated. LR should be replaced by PC. All
24576 the checks required are already done by USE_RETURN_INSN (). Hence,
24577 all we really need to check here is if single register is to be
24578 returned, or multiple register return. */
24580 thumb2_expand_return (bool simple_return
)
24583 unsigned long saved_regs_mask
;
24584 arm_stack_offsets
*offsets
;
24586 offsets
= arm_get_frame_offsets ();
24587 saved_regs_mask
= offsets
->saved_regs_mask
;
24589 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24590 if (saved_regs_mask
& (1 << i
))
24593 if (!simple_return
&& saved_regs_mask
)
24597 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24598 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24599 rtx addr
= gen_rtx_MEM (SImode
,
24600 gen_rtx_POST_INC (SImode
,
24601 stack_pointer_rtx
));
24602 set_mem_alias_set (addr
, get_frame_alias_set ());
24603 XVECEXP (par
, 0, 0) = ret_rtx
;
24604 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24605 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24606 emit_jump_insn (par
);
24610 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24611 saved_regs_mask
|= (1 << PC_REGNUM
);
24612 arm_emit_multi_reg_pop (saved_regs_mask
);
24617 emit_jump_insn (simple_return_rtx
);
24622 thumb1_expand_epilogue (void)
24624 HOST_WIDE_INT amount
;
24625 arm_stack_offsets
*offsets
;
24628 /* Naked functions don't have prologues. */
24629 if (IS_NAKED (arm_current_func_type ()))
24632 offsets
= arm_get_frame_offsets ();
24633 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24635 if (frame_pointer_needed
)
24637 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24638 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24640 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24642 gcc_assert (amount
>= 0);
24645 emit_insn (gen_blockage ());
24648 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24649 GEN_INT (amount
)));
24652 /* r3 is always free in the epilogue. */
24653 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24655 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24656 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24660 /* Emit a USE (stack_pointer_rtx), so that
24661 the stack adjustment will not be deleted. */
24662 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24664 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24665 emit_insn (gen_blockage ());
24667 /* Emit a clobber for each insn that will be restored in the epilogue,
24668 so that flow2 will get register lifetimes correct. */
24669 for (regno
= 0; regno
< 13; regno
++)
24670 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24671 emit_clobber (gen_rtx_REG (SImode
, regno
));
24673 if (! df_regs_ever_live_p (LR_REGNUM
))
24674 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24677 /* Epilogue code for APCS frame. */
24679 arm_expand_epilogue_apcs_frame (bool really_return
)
24681 unsigned long func_type
;
24682 unsigned long saved_regs_mask
;
24685 int floats_from_frame
= 0;
24686 arm_stack_offsets
*offsets
;
24688 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24689 func_type
= arm_current_func_type ();
24691 /* Get frame offsets for ARM. */
24692 offsets
= arm_get_frame_offsets ();
24693 saved_regs_mask
= offsets
->saved_regs_mask
;
24695 /* Find the offset of the floating-point save area in the frame. */
24697 = (offsets
->saved_args
24698 + arm_compute_static_chain_stack_bytes ()
24701 /* Compute how many core registers saved and how far away the floats are. */
24702 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24703 if (saved_regs_mask
& (1 << i
))
24706 floats_from_frame
+= 4;
24709 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24712 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24714 /* The offset is from IP_REGNUM. */
24715 int saved_size
= arm_get_vfp_saved_size ();
24716 if (saved_size
> 0)
24719 floats_from_frame
+= saved_size
;
24720 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24721 hard_frame_pointer_rtx
,
24722 GEN_INT (-floats_from_frame
)));
24723 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24724 ip_rtx
, hard_frame_pointer_rtx
);
24727 /* Generate VFP register multi-pop. */
24728 start_reg
= FIRST_VFP_REGNUM
;
24730 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24731 /* Look for a case where a reg does not need restoring. */
24732 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24733 && (!df_regs_ever_live_p (i
+ 1)
24734 || call_used_regs
[i
+ 1]))
24736 if (start_reg
!= i
)
24737 arm_emit_vfp_multi_reg_pop (start_reg
,
24738 (i
- start_reg
) / 2,
24739 gen_rtx_REG (SImode
,
24744 /* Restore the remaining regs that we have discovered (or possibly
24745 even all of them, if the conditional in the for loop never
24747 if (start_reg
!= i
)
24748 arm_emit_vfp_multi_reg_pop (start_reg
,
24749 (i
- start_reg
) / 2,
24750 gen_rtx_REG (SImode
, IP_REGNUM
));
24755 /* The frame pointer is guaranteed to be non-double-word aligned, as
24756 it is set to double-word-aligned old_stack_pointer - 4. */
24758 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24760 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24761 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24763 rtx addr
= gen_frame_mem (V2SImode
,
24764 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24766 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24767 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24768 gen_rtx_REG (V2SImode
, i
),
24774 /* saved_regs_mask should contain IP which contains old stack pointer
24775 at the time of activation creation. Since SP and IP are adjacent registers,
24776 we can restore the value directly into SP. */
24777 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24778 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24779 saved_regs_mask
|= (1 << SP_REGNUM
);
24781 /* There are two registers left in saved_regs_mask - LR and PC. We
24782 only need to restore LR (the return address), but to
24783 save time we can load it directly into PC, unless we need a
24784 special function exit sequence, or we are not really returning. */
24786 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24787 && !crtl
->calls_eh_return
)
24788 /* Delete LR from the register mask, so that LR on
24789 the stack is loaded into the PC in the register mask. */
24790 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24792 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24794 num_regs
= bit_count (saved_regs_mask
);
24795 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24798 emit_insn (gen_blockage ());
24799 /* Unwind the stack to just below the saved registers. */
24800 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24801 hard_frame_pointer_rtx
,
24802 GEN_INT (- 4 * num_regs
)));
24804 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24805 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24808 arm_emit_multi_reg_pop (saved_regs_mask
);
24810 if (IS_INTERRUPT (func_type
))
24812 /* Interrupt handlers will have pushed the
24813 IP onto the stack, so restore it now. */
24815 rtx addr
= gen_rtx_MEM (SImode
,
24816 gen_rtx_POST_INC (SImode
,
24817 stack_pointer_rtx
));
24818 set_mem_alias_set (addr
, get_frame_alias_set ());
24819 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24820 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24821 gen_rtx_REG (SImode
, IP_REGNUM
),
24825 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24828 if (crtl
->calls_eh_return
)
24829 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24831 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24833 if (IS_STACKALIGN (func_type
))
24834 /* Restore the original stack pointer. Before prologue, the stack was
24835 realigned and the original stack pointer saved in r0. For details,
24836 see comment in arm_expand_prologue. */
24837 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24839 emit_jump_insn (simple_return_rtx
);
24842 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24843 function is not a sibcall. */
24845 arm_expand_epilogue (bool really_return
)
24847 unsigned long func_type
;
24848 unsigned long saved_regs_mask
;
24852 arm_stack_offsets
*offsets
;
24854 func_type
= arm_current_func_type ();
24856 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24857 let output_return_instruction take care of instruction emission if any. */
24858 if (IS_NAKED (func_type
)
24859 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24862 emit_jump_insn (simple_return_rtx
);
24866 /* If we are throwing an exception, then we really must be doing a
24867 return, so we can't tail-call. */
24868 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24870 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24872 arm_expand_epilogue_apcs_frame (really_return
);
24876 /* Get frame offsets for ARM. */
24877 offsets
= arm_get_frame_offsets ();
24878 saved_regs_mask
= offsets
->saved_regs_mask
;
24879 num_regs
= bit_count (saved_regs_mask
);
24881 if (frame_pointer_needed
)
24884 /* Restore stack pointer if necessary. */
24887 /* In ARM mode, frame pointer points to first saved register.
24888 Restore stack pointer to last saved register. */
24889 amount
= offsets
->frame
- offsets
->saved_regs
;
24891 /* Force out any pending memory operations that reference stacked data
24892 before stack de-allocation occurs. */
24893 emit_insn (gen_blockage ());
24894 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24895 hard_frame_pointer_rtx
,
24896 GEN_INT (amount
)));
24897 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24899 hard_frame_pointer_rtx
);
24901 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24903 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24907 /* In Thumb-2 mode, the frame pointer points to the last saved
24909 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24912 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24913 hard_frame_pointer_rtx
,
24914 GEN_INT (amount
)));
24915 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24916 hard_frame_pointer_rtx
,
24917 hard_frame_pointer_rtx
);
24920 /* Force out any pending memory operations that reference stacked data
24921 before stack de-allocation occurs. */
24922 emit_insn (gen_blockage ());
24923 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24924 hard_frame_pointer_rtx
));
24925 arm_add_cfa_adjust_cfa_note (insn
, 0,
24927 hard_frame_pointer_rtx
);
24928 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24930 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24935 /* Pop off outgoing args and local frame to adjust stack pointer to
24936 last saved register. */
24937 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24941 /* Force out any pending memory operations that reference stacked data
24942 before stack de-allocation occurs. */
24943 emit_insn (gen_blockage ());
24944 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24946 GEN_INT (amount
)));
24947 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24948 stack_pointer_rtx
, stack_pointer_rtx
);
24949 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24951 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24955 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24957 /* Generate VFP register multi-pop. */
24958 int end_reg
= LAST_VFP_REGNUM
+ 1;
24960 /* Scan the registers in reverse order. We need to match
24961 any groupings made in the prologue and generate matching
24962 vldm operations. The need to match groups is because,
24963 unlike pop, vldm can only do consecutive regs. */
24964 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
24965 /* Look for a case where a reg does not need restoring. */
24966 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24967 && (!df_regs_ever_live_p (i
+ 1)
24968 || call_used_regs
[i
+ 1]))
24970 /* Restore the regs discovered so far (from reg+2 to
24972 if (end_reg
> i
+ 2)
24973 arm_emit_vfp_multi_reg_pop (i
+ 2,
24974 (end_reg
- (i
+ 2)) / 2,
24975 stack_pointer_rtx
);
24979 /* Restore the remaining regs that we have discovered (or possibly
24980 even all of them, if the conditional in the for loop never
24982 if (end_reg
> i
+ 2)
24983 arm_emit_vfp_multi_reg_pop (i
+ 2,
24984 (end_reg
- (i
+ 2)) / 2,
24985 stack_pointer_rtx
);
24989 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
24990 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24993 rtx addr
= gen_rtx_MEM (V2SImode
,
24994 gen_rtx_POST_INC (SImode
,
24995 stack_pointer_rtx
));
24996 set_mem_alias_set (addr
, get_frame_alias_set ());
24997 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24998 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24999 gen_rtx_REG (V2SImode
, i
),
25001 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25002 stack_pointer_rtx
, stack_pointer_rtx
);
25005 if (saved_regs_mask
)
25008 bool return_in_pc
= false;
25010 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25011 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25012 && !IS_STACKALIGN (func_type
)
25014 && crtl
->args
.pretend_args_size
== 0
25015 && saved_regs_mask
& (1 << LR_REGNUM
)
25016 && !crtl
->calls_eh_return
)
25018 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25019 saved_regs_mask
|= (1 << PC_REGNUM
);
25020 return_in_pc
= true;
25023 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25025 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25026 if (saved_regs_mask
& (1 << i
))
25028 rtx addr
= gen_rtx_MEM (SImode
,
25029 gen_rtx_POST_INC (SImode
,
25030 stack_pointer_rtx
));
25031 set_mem_alias_set (addr
, get_frame_alias_set ());
25033 if (i
== PC_REGNUM
)
25035 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25036 XVECEXP (insn
, 0, 0) = ret_rtx
;
25037 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25039 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25040 insn
= emit_jump_insn (insn
);
25044 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25046 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25047 gen_rtx_REG (SImode
, i
),
25049 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25051 stack_pointer_rtx
);
25058 && current_tune
->prefer_ldrd_strd
25059 && !optimize_function_for_size_p (cfun
))
25062 thumb2_emit_ldrd_pop (saved_regs_mask
);
25063 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25064 arm_emit_ldrd_pop (saved_regs_mask
);
25066 arm_emit_multi_reg_pop (saved_regs_mask
);
25069 arm_emit_multi_reg_pop (saved_regs_mask
);
25076 if (crtl
->args
.pretend_args_size
)
25079 rtx dwarf
= NULL_RTX
;
25081 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25083 GEN_INT (crtl
->args
.pretend_args_size
)));
25085 RTX_FRAME_RELATED_P (tmp
) = 1;
25087 if (cfun
->machine
->uses_anonymous_args
)
25089 /* Restore pretend args. Refer arm_expand_prologue on how to save
25090 pretend_args in stack. */
25091 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25092 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25093 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25094 if (saved_regs_mask
& (1 << i
))
25096 rtx reg
= gen_rtx_REG (SImode
, i
);
25097 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25100 REG_NOTES (tmp
) = dwarf
;
25102 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25103 stack_pointer_rtx
, stack_pointer_rtx
);
25106 if (!really_return
)
25109 if (crtl
->calls_eh_return
)
25110 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25112 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25114 if (IS_STACKALIGN (func_type
))
25115 /* Restore the original stack pointer. Before prologue, the stack was
25116 realigned and the original stack pointer saved in r0. For details,
25117 see comment in arm_expand_prologue. */
25118 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25120 emit_jump_insn (simple_return_rtx
);
25123 /* Implementation of insn prologue_thumb1_interwork. This is the first
25124 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25127 thumb1_output_interwork (void)
25130 FILE *f
= asm_out_file
;
25132 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25133 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25135 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25137 /* Generate code sequence to switch us into Thumb mode. */
25138 /* The .code 32 directive has already been emitted by
25139 ASM_DECLARE_FUNCTION_NAME. */
25140 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25141 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25143 /* Generate a label, so that the debugger will notice the
25144 change in instruction sets. This label is also used by
25145 the assembler to bypass the ARM code when this function
25146 is called from a Thumb encoded function elsewhere in the
25147 same file. Hence the definition of STUB_NAME here must
25148 agree with the definition in gas/config/tc-arm.c. */
25150 #define STUB_NAME ".real_start_of"
25152 fprintf (f
, "\t.code\t16\n");
25154 if (arm_dllexport_name_p (name
))
25155 name
= arm_strip_name_encoding (name
);
25157 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25158 fprintf (f
, "\t.thumb_func\n");
25159 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25164 /* Handle the case of a double word load into a low register from
25165 a computed memory address. The computed address may involve a
25166 register which is overwritten by the load. */
25168 thumb_load_double_from_address (rtx
*operands
)
25176 gcc_assert (REG_P (operands
[0]));
25177 gcc_assert (MEM_P (operands
[1]));
25179 /* Get the memory address. */
25180 addr
= XEXP (operands
[1], 0);
25182 /* Work out how the memory address is computed. */
25183 switch (GET_CODE (addr
))
25186 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25188 if (REGNO (operands
[0]) == REGNO (addr
))
25190 output_asm_insn ("ldr\t%H0, %2", operands
);
25191 output_asm_insn ("ldr\t%0, %1", operands
);
25195 output_asm_insn ("ldr\t%0, %1", operands
);
25196 output_asm_insn ("ldr\t%H0, %2", operands
);
25201 /* Compute <address> + 4 for the high order load. */
25202 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25204 output_asm_insn ("ldr\t%0, %1", operands
);
25205 output_asm_insn ("ldr\t%H0, %2", operands
);
25209 arg1
= XEXP (addr
, 0);
25210 arg2
= XEXP (addr
, 1);
25212 if (CONSTANT_P (arg1
))
25213 base
= arg2
, offset
= arg1
;
25215 base
= arg1
, offset
= arg2
;
25217 gcc_assert (REG_P (base
));
25219 /* Catch the case of <address> = <reg> + <reg> */
25220 if (REG_P (offset
))
25222 int reg_offset
= REGNO (offset
);
25223 int reg_base
= REGNO (base
);
25224 int reg_dest
= REGNO (operands
[0]);
25226 /* Add the base and offset registers together into the
25227 higher destination register. */
25228 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25229 reg_dest
+ 1, reg_base
, reg_offset
);
25231 /* Load the lower destination register from the address in
25232 the higher destination register. */
25233 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25234 reg_dest
, reg_dest
+ 1);
25236 /* Load the higher destination register from its own address
25238 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25239 reg_dest
+ 1, reg_dest
+ 1);
25243 /* Compute <address> + 4 for the high order load. */
25244 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25246 /* If the computed address is held in the low order register
25247 then load the high order register first, otherwise always
25248 load the low order register first. */
25249 if (REGNO (operands
[0]) == REGNO (base
))
25251 output_asm_insn ("ldr\t%H0, %2", operands
);
25252 output_asm_insn ("ldr\t%0, %1", operands
);
25256 output_asm_insn ("ldr\t%0, %1", operands
);
25257 output_asm_insn ("ldr\t%H0, %2", operands
);
25263 /* With no registers to worry about we can just load the value
25265 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25267 output_asm_insn ("ldr\t%H0, %2", operands
);
25268 output_asm_insn ("ldr\t%0, %1", operands
);
25272 gcc_unreachable ();
25279 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25286 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25289 operands
[4] = operands
[5];
25292 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25293 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25297 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25298 std::swap (operands
[4], operands
[5]);
25299 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25300 std::swap (operands
[5], operands
[6]);
25301 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25302 std::swap (operands
[4], operands
[5]);
25304 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25305 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25309 gcc_unreachable ();
25315 /* Output a call-via instruction for thumb state. */
25317 thumb_call_via_reg (rtx reg
)
25319 int regno
= REGNO (reg
);
25322 gcc_assert (regno
< LR_REGNUM
);
25324 /* If we are in the normal text section we can use a single instance
25325 per compilation unit. If we are doing function sections, then we need
25326 an entry per section, since we can't rely on reachability. */
25327 if (in_section
== text_section
)
25329 thumb_call_reg_needed
= 1;
25331 if (thumb_call_via_label
[regno
] == NULL
)
25332 thumb_call_via_label
[regno
] = gen_label_rtx ();
25333 labelp
= thumb_call_via_label
+ regno
;
25337 if (cfun
->machine
->call_via
[regno
] == NULL
)
25338 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25339 labelp
= cfun
->machine
->call_via
+ regno
;
25342 output_asm_insn ("bl\t%a0", labelp
);
25346 /* Routines for generating rtl. */
25348 thumb_expand_movmemqi (rtx
*operands
)
25350 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25351 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25352 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25353 HOST_WIDE_INT offset
= 0;
25357 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25363 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25369 rtx reg
= gen_reg_rtx (SImode
);
25370 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25371 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25378 rtx reg
= gen_reg_rtx (HImode
);
25379 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25380 plus_constant (Pmode
, in
,
25382 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25391 rtx reg
= gen_reg_rtx (QImode
);
25392 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25393 plus_constant (Pmode
, in
,
25395 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25402 thumb_reload_out_hi (rtx
*operands
)
25404 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25407 /* Handle reading a half-word from memory during reload. */
25409 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25411 gcc_unreachable ();
25414 /* Return the length of a function name prefix
25415 that starts with the character 'c'. */
25417 arm_get_strip_length (int c
)
25421 ARM_NAME_ENCODING_LENGTHS
25426 /* Return a pointer to a function's name with any
25427 and all prefix encodings stripped from it. */
25429 arm_strip_name_encoding (const char *name
)
25433 while ((skip
= arm_get_strip_length (* name
)))
25439 /* If there is a '*' anywhere in the name's prefix, then
25440 emit the stripped name verbatim, otherwise prepend an
25441 underscore if leading underscores are being used. */
25443 arm_asm_output_labelref (FILE *stream
, const char *name
)
25448 while ((skip
= arm_get_strip_length (* name
)))
25450 verbatim
|= (*name
== '*');
25455 fputs (name
, stream
);
25457 asm_fprintf (stream
, "%U%s", name
);
25460 /* This function is used to emit an EABI tag and its associated value.
25461 We emit the numerical value of the tag in case the assembler does not
25462 support textual tags. (Eg gas prior to 2.20). If requested we include
25463 the tag name in a comment so that anyone reading the assembler output
25464 will know which tag is being set.
25466 This function is not static because arm-c.c needs it too. */
25469 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25471 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25472 if (flag_verbose_asm
|| flag_debug_asm
)
25473 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25474 asm_fprintf (asm_out_file
, "\n");
25477 /* This function is used to print CPU tuning information as comment
25478 in assembler file. Pointers are not printed for now. */
25481 arm_print_tune_info (void)
25483 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25484 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25485 current_tune
->constant_limit
);
25486 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25487 current_tune
->max_insns_skipped
);
25488 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25489 current_tune
->prefetch
.num_slots
);
25490 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25491 current_tune
->prefetch
.l1_cache_size
);
25492 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25493 current_tune
->prefetch
.l1_cache_line_size
);
25494 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25495 (int) current_tune
->prefer_constant_pool
);
25496 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25497 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25498 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25499 current_tune
->branch_cost (false, false));
25500 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25501 current_tune
->branch_cost (false, true));
25502 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25503 current_tune
->branch_cost (true, false));
25504 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25505 current_tune
->branch_cost (true, true));
25506 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25507 (int) current_tune
->prefer_ldrd_strd
);
25508 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25509 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25510 (int) current_tune
->logical_op_non_short_circuit_arm
);
25511 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25512 (int) current_tune
->prefer_neon_for_64bits
);
25513 asm_fprintf (asm_out_file
,
25514 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25515 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25516 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25517 (int) current_tune
->string_ops_prefer_neon
);
25518 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25519 current_tune
->max_insns_inline_memset
);
25520 asm_fprintf (asm_out_file
, "\t\t@fuseable_ops:\t%u\n",
25521 current_tune
->fuseable_ops
);
25522 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25523 (int) current_tune
->sched_autopref
);
25527 arm_file_start (void)
25531 if (TARGET_UNIFIED_ASM
)
25532 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
25536 const char *fpu_name
;
25537 if (arm_selected_arch
)
25539 /* armv7ve doesn't support any extensions. */
25540 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25542 /* Keep backward compatability for assemblers
25543 which don't support armv7ve. */
25544 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25545 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25546 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25547 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25548 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25552 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25556 gcc_assert (strlen (arm_selected_arch
->name
)
25557 <= sizeof (buf
) / sizeof (*pos
));
25558 strncpy (buf
, arm_selected_arch
->name
,
25559 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25560 buf
[pos
- arm_selected_arch
->name
] = '\0';
25561 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25562 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25565 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25568 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25569 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25572 const char* truncated_name
25573 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25574 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25577 if (print_tune_info
)
25578 arm_print_tune_info ();
25580 if (TARGET_SOFT_FLOAT
)
25582 fpu_name
= "softvfp";
25586 fpu_name
= arm_fpu_desc
->name
;
25587 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25589 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25590 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25592 if (TARGET_HARD_FLOAT_ABI
)
25593 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25596 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25598 /* Some of these attributes only apply when the corresponding features
25599 are used. However we don't have any easy way of figuring this out.
25600 Conservatively record the setting that would have been used. */
25602 if (flag_rounding_math
)
25603 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25605 if (!flag_unsafe_math_optimizations
)
25607 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25608 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25610 if (flag_signaling_nans
)
25611 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25613 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25614 flag_finite_math_only
? 1 : 3);
25616 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25617 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25618 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25619 flag_short_enums
? 1 : 2);
25621 /* Tag_ABI_optimization_goals. */
25624 else if (optimize
>= 2)
25630 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25632 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25635 if (arm_fp16_format
)
25636 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25637 (int) arm_fp16_format
);
25639 if (arm_lang_output_object_attributes_hook
)
25640 arm_lang_output_object_attributes_hook();
25643 default_file_start ();
25647 arm_file_end (void)
25651 if (NEED_INDICATE_EXEC_STACK
)
25652 /* Add .note.GNU-stack. */
25653 file_end_indicate_exec_stack ();
25655 if (! thumb_call_reg_needed
)
25658 switch_to_section (text_section
);
25659 asm_fprintf (asm_out_file
, "\t.code 16\n");
25660 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25662 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25664 rtx label
= thumb_call_via_label
[regno
];
25668 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25669 CODE_LABEL_NUMBER (label
));
25670 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25676 /* Symbols in the text segment can be accessed without indirecting via the
25677 constant pool; it may take an extra binary operation, but this is still
25678 faster than indirecting via memory. Don't do this when not optimizing,
25679 since we won't be calculating al of the offsets necessary to do this
25683 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25685 if (optimize
> 0 && TREE_CONSTANT (decl
))
25686 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25688 default_encode_section_info (decl
, rtl
, first
);
25690 #endif /* !ARM_PE */
25693 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25695 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25696 && !strcmp (prefix
, "L"))
25698 arm_ccfsm_state
= 0;
25699 arm_target_insn
= NULL
;
25701 default_internal_label (stream
, prefix
, labelno
);
25704 /* Output code to add DELTA to the first argument, and then jump
25705 to FUNCTION. Used for C++ multiple inheritance. */
25707 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25708 HOST_WIDE_INT delta
,
25709 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25712 static int thunk_label
= 0;
25715 int mi_delta
= delta
;
25716 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25718 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25721 mi_delta
= - mi_delta
;
25723 final_start_function (emit_barrier (), file
, 1);
25727 int labelno
= thunk_label
++;
25728 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25729 /* Thunks are entered in arm mode when avaiable. */
25730 if (TARGET_THUMB1_ONLY
)
25732 /* push r3 so we can use it as a temporary. */
25733 /* TODO: Omit this save if r3 is not used. */
25734 fputs ("\tpush {r3}\n", file
);
25735 fputs ("\tldr\tr3, ", file
);
25739 fputs ("\tldr\tr12, ", file
);
25741 assemble_name (file
, label
);
25742 fputc ('\n', file
);
25745 /* If we are generating PIC, the ldr instruction below loads
25746 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25747 the address of the add + 8, so we have:
25749 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25752 Note that we have "+ 1" because some versions of GNU ld
25753 don't set the low bit of the result for R_ARM_REL32
25754 relocations against thumb function symbols.
25755 On ARMv6M this is +4, not +8. */
25756 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25757 assemble_name (file
, labelpc
);
25758 fputs (":\n", file
);
25759 if (TARGET_THUMB1_ONLY
)
25761 /* This is 2 insns after the start of the thunk, so we know it
25762 is 4-byte aligned. */
25763 fputs ("\tadd\tr3, pc, r3\n", file
);
25764 fputs ("\tmov r12, r3\n", file
);
25767 fputs ("\tadd\tr12, pc, r12\n", file
);
25769 else if (TARGET_THUMB1_ONLY
)
25770 fputs ("\tmov r12, r3\n", file
);
25772 if (TARGET_THUMB1_ONLY
)
25774 if (mi_delta
> 255)
25776 fputs ("\tldr\tr3, ", file
);
25777 assemble_name (file
, label
);
25778 fputs ("+4\n", file
);
25779 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25780 mi_op
, this_regno
, this_regno
);
25782 else if (mi_delta
!= 0)
25784 /* Thumb1 unified syntax requires s suffix in instruction name when
25785 one of the operands is immediate. */
25786 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25787 mi_op
, this_regno
, this_regno
,
25793 /* TODO: Use movw/movt for large constants when available. */
25794 while (mi_delta
!= 0)
25796 if ((mi_delta
& (3 << shift
)) == 0)
25800 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25801 mi_op
, this_regno
, this_regno
,
25802 mi_delta
& (0xff << shift
));
25803 mi_delta
&= ~(0xff << shift
);
25810 if (TARGET_THUMB1_ONLY
)
25811 fputs ("\tpop\t{r3}\n", file
);
25813 fprintf (file
, "\tbx\tr12\n");
25814 ASM_OUTPUT_ALIGN (file
, 2);
25815 assemble_name (file
, label
);
25816 fputs (":\n", file
);
25819 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25820 rtx tem
= XEXP (DECL_RTL (function
), 0);
25821 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25822 pipeline offset is four rather than eight. Adjust the offset
25824 tem
= plus_constant (GET_MODE (tem
), tem
,
25825 TARGET_THUMB1_ONLY
? -3 : -7);
25826 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25828 gen_rtx_SYMBOL_REF (Pmode
,
25829 ggc_strdup (labelpc
)));
25830 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25833 /* Output ".word .LTHUNKn". */
25834 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25836 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25837 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25841 fputs ("\tb\t", file
);
25842 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25843 if (NEED_PLT_RELOC
)
25844 fputs ("(PLT)", file
);
25845 fputc ('\n', file
);
25848 final_end_function ();
25852 arm_emit_vector_const (FILE *file
, rtx x
)
25855 const char * pattern
;
25857 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25859 switch (GET_MODE (x
))
25861 case V2SImode
: pattern
= "%08x"; break;
25862 case V4HImode
: pattern
= "%04x"; break;
25863 case V8QImode
: pattern
= "%02x"; break;
25864 default: gcc_unreachable ();
25867 fprintf (file
, "0x");
25868 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25872 element
= CONST_VECTOR_ELT (x
, i
);
25873 fprintf (file
, pattern
, INTVAL (element
));
25879 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25880 HFmode constant pool entries are actually loaded with ldr. */
25882 arm_emit_fp16_const (rtx c
)
25887 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25888 bits
= real_to_target (NULL
, &r
, HFmode
);
25889 if (WORDS_BIG_ENDIAN
)
25890 assemble_zeros (2);
25891 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25892 if (!WORDS_BIG_ENDIAN
)
25893 assemble_zeros (2);
25897 arm_output_load_gr (rtx
*operands
)
25904 if (!MEM_P (operands
[1])
25905 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25906 || !REG_P (reg
= XEXP (sum
, 0))
25907 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25908 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25909 return "wldrw%?\t%0, %1";
25911 /* Fix up an out-of-range load of a GR register. */
25912 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25913 wcgr
= operands
[0];
25915 output_asm_insn ("ldr%?\t%0, %1", operands
);
25917 operands
[0] = wcgr
;
25919 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25920 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25925 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25927 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25928 named arg and all anonymous args onto the stack.
25929 XXX I know the prologue shouldn't be pushing registers, but it is faster
25933 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25937 int second_time ATTRIBUTE_UNUSED
)
25939 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25942 cfun
->machine
->uses_anonymous_args
= 1;
25943 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25945 nregs
= pcum
->aapcs_ncrn
;
25946 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25950 nregs
= pcum
->nregs
;
25952 if (nregs
< NUM_ARG_REGS
)
25953 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25956 /* We can't rely on the caller doing the proper promotion when
25957 using APCS or ATPCS. */
25960 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25962 return !TARGET_AAPCS_BASED
;
25965 static machine_mode
25966 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25968 int *punsignedp ATTRIBUTE_UNUSED
,
25969 const_tree fntype ATTRIBUTE_UNUSED
,
25970 int for_return ATTRIBUTE_UNUSED
)
25972 if (GET_MODE_CLASS (mode
) == MODE_INT
25973 && GET_MODE_SIZE (mode
) < 4)
25979 /* AAPCS based ABIs use short enums by default. */
25982 arm_default_short_enums (void)
25984 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25988 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25991 arm_align_anon_bitfield (void)
25993 return TARGET_AAPCS_BASED
;
25997 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26000 arm_cxx_guard_type (void)
26002 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26006 /* The EABI says test the least significant bit of a guard variable. */
26009 arm_cxx_guard_mask_bit (void)
26011 return TARGET_AAPCS_BASED
;
26015 /* The EABI specifies that all array cookies are 8 bytes long. */
26018 arm_get_cookie_size (tree type
)
26022 if (!TARGET_AAPCS_BASED
)
26023 return default_cxx_get_cookie_size (type
);
26025 size
= build_int_cst (sizetype
, 8);
26030 /* The EABI says that array cookies should also contain the element size. */
26033 arm_cookie_has_size (void)
26035 return TARGET_AAPCS_BASED
;
26039 /* The EABI says constructors and destructors should return a pointer to
26040 the object constructed/destroyed. */
26043 arm_cxx_cdtor_returns_this (void)
26045 return TARGET_AAPCS_BASED
;
26048 /* The EABI says that an inline function may never be the key
26052 arm_cxx_key_method_may_be_inline (void)
26054 return !TARGET_AAPCS_BASED
;
26058 arm_cxx_determine_class_data_visibility (tree decl
)
26060 if (!TARGET_AAPCS_BASED
26061 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26064 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26065 is exported. However, on systems without dynamic vague linkage,
26066 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26067 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26068 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26070 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26071 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26075 arm_cxx_class_data_always_comdat (void)
26077 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26078 vague linkage if the class has no key function. */
26079 return !TARGET_AAPCS_BASED
;
26083 /* The EABI says __aeabi_atexit should be used to register static
26087 arm_cxx_use_aeabi_atexit (void)
26089 return TARGET_AAPCS_BASED
;
26094 arm_set_return_address (rtx source
, rtx scratch
)
26096 arm_stack_offsets
*offsets
;
26097 HOST_WIDE_INT delta
;
26099 unsigned long saved_regs
;
26101 offsets
= arm_get_frame_offsets ();
26102 saved_regs
= offsets
->saved_regs_mask
;
26104 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26105 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26108 if (frame_pointer_needed
)
26109 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26112 /* LR will be the first saved register. */
26113 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26118 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26119 GEN_INT (delta
& ~4095)));
26124 addr
= stack_pointer_rtx
;
26126 addr
= plus_constant (Pmode
, addr
, delta
);
26128 /* The store needs to be marked as frame related in order to prevent
26129 DSE from deleting it as dead if it is based on fp. */
26130 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26131 RTX_FRAME_RELATED_P (insn
) = 1;
26132 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26138 thumb_set_return_address (rtx source
, rtx scratch
)
26140 arm_stack_offsets
*offsets
;
26141 HOST_WIDE_INT delta
;
26142 HOST_WIDE_INT limit
;
26145 unsigned long mask
;
26149 offsets
= arm_get_frame_offsets ();
26150 mask
= offsets
->saved_regs_mask
;
26151 if (mask
& (1 << LR_REGNUM
))
26154 /* Find the saved regs. */
26155 if (frame_pointer_needed
)
26157 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26158 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26164 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26167 /* Allow for the stack frame. */
26168 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26170 /* The link register is always the first saved register. */
26173 /* Construct the address. */
26174 addr
= gen_rtx_REG (SImode
, reg
);
26177 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26178 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26182 addr
= plus_constant (Pmode
, addr
, delta
);
26184 /* The store needs to be marked as frame related in order to prevent
26185 DSE from deleting it as dead if it is based on fp. */
26186 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26187 RTX_FRAME_RELATED_P (insn
) = 1;
26188 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26191 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26194 /* Implements target hook vector_mode_supported_p. */
26196 arm_vector_mode_supported_p (machine_mode mode
)
26198 /* Neon also supports V2SImode, etc. listed in the clause below. */
26199 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26200 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26203 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26204 && ((mode
== V2SImode
)
26205 || (mode
== V4HImode
)
26206 || (mode
== V8QImode
)))
26209 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26210 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26211 || mode
== V2HAmode
))
26217 /* Implements target hook array_mode_supported_p. */
26220 arm_array_mode_supported_p (machine_mode mode
,
26221 unsigned HOST_WIDE_INT nelems
)
26224 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26225 && (nelems
>= 2 && nelems
<= 4))
26231 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26232 registers when autovectorizing for Neon, at least until multiple vector
26233 widths are supported properly by the middle-end. */
26235 static machine_mode
26236 arm_preferred_simd_mode (machine_mode mode
)
26242 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26244 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26246 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26248 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26250 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26257 if (TARGET_REALLY_IWMMXT
)
26273 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26275 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26276 using r0-r4 for function arguments, r7 for the stack frame and don't have
26277 enough left over to do doubleword arithmetic. For Thumb-2 all the
26278 potentially problematic instructions accept high registers so this is not
26279 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26280 that require many low registers. */
26282 arm_class_likely_spilled_p (reg_class_t rclass
)
26284 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26285 || rclass
== CC_REG
)
26291 /* Implements target hook small_register_classes_for_mode_p. */
26293 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26295 return TARGET_THUMB1
;
26298 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26299 ARM insns and therefore guarantee that the shift count is modulo 256.
26300 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26301 guarantee no particular behavior for out-of-range counts. */
26303 static unsigned HOST_WIDE_INT
26304 arm_shift_truncation_mask (machine_mode mode
)
26306 return mode
== SImode
? 255 : 0;
26310 /* Map internal gcc register numbers to DWARF2 register numbers. */
26313 arm_dbx_register_number (unsigned int regno
)
26318 if (IS_VFP_REGNUM (regno
))
26320 /* See comment in arm_dwarf_register_span. */
26321 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26322 return 64 + regno
- FIRST_VFP_REGNUM
;
26324 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26327 if (IS_IWMMXT_GR_REGNUM (regno
))
26328 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26330 if (IS_IWMMXT_REGNUM (regno
))
26331 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26333 gcc_unreachable ();
26336 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26337 GCC models tham as 64 32-bit registers, so we need to describe this to
26338 the DWARF generation code. Other registers can use the default. */
26340 arm_dwarf_register_span (rtx rtl
)
26348 regno
= REGNO (rtl
);
26349 if (!IS_VFP_REGNUM (regno
))
26352 /* XXX FIXME: The EABI defines two VFP register ranges:
26353 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26355 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26356 corresponding D register. Until GDB supports this, we shall use the
26357 legacy encodings. We also use these encodings for D0-D15 for
26358 compatibility with older debuggers. */
26359 mode
= GET_MODE (rtl
);
26360 if (GET_MODE_SIZE (mode
) < 8)
26363 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26365 nregs
= GET_MODE_SIZE (mode
) / 4;
26366 for (i
= 0; i
< nregs
; i
+= 2)
26367 if (TARGET_BIG_END
)
26369 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26370 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26374 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26375 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26380 nregs
= GET_MODE_SIZE (mode
) / 8;
26381 for (i
= 0; i
< nregs
; i
++)
26382 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26385 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26388 #if ARM_UNWIND_INFO
26389 /* Emit unwind directives for a store-multiple instruction or stack pointer
26390 push during alignment.
26391 These should only ever be generated by the function prologue code, so
26392 expect them to have a particular form.
26393 The store-multiple instruction sometimes pushes pc as the last register,
26394 although it should not be tracked into unwind information, or for -Os
26395 sometimes pushes some dummy registers before first register that needs
26396 to be tracked in unwind information; such dummy registers are there just
26397 to avoid separate stack adjustment, and will not be restored in the
26401 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26404 HOST_WIDE_INT offset
;
26405 HOST_WIDE_INT nregs
;
26409 unsigned padfirst
= 0, padlast
= 0;
26412 e
= XVECEXP (p
, 0, 0);
26413 gcc_assert (GET_CODE (e
) == SET
);
26415 /* First insn will adjust the stack pointer. */
26416 gcc_assert (GET_CODE (e
) == SET
26417 && REG_P (SET_DEST (e
))
26418 && REGNO (SET_DEST (e
)) == SP_REGNUM
26419 && GET_CODE (SET_SRC (e
)) == PLUS
);
26421 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26422 nregs
= XVECLEN (p
, 0) - 1;
26423 gcc_assert (nregs
);
26425 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26428 /* For -Os dummy registers can be pushed at the beginning to
26429 avoid separate stack pointer adjustment. */
26430 e
= XVECEXP (p
, 0, 1);
26431 e
= XEXP (SET_DEST (e
), 0);
26432 if (GET_CODE (e
) == PLUS
)
26433 padfirst
= INTVAL (XEXP (e
, 1));
26434 gcc_assert (padfirst
== 0 || optimize_size
);
26435 /* The function prologue may also push pc, but not annotate it as it is
26436 never restored. We turn this into a stack pointer adjustment. */
26437 e
= XVECEXP (p
, 0, nregs
);
26438 e
= XEXP (SET_DEST (e
), 0);
26439 if (GET_CODE (e
) == PLUS
)
26440 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26442 padlast
= offset
- 4;
26443 gcc_assert (padlast
== 0 || padlast
== 4);
26445 fprintf (asm_out_file
, "\t.pad #4\n");
26447 fprintf (asm_out_file
, "\t.save {");
26449 else if (IS_VFP_REGNUM (reg
))
26452 fprintf (asm_out_file
, "\t.vsave {");
26455 /* Unknown register type. */
26456 gcc_unreachable ();
26458 /* If the stack increment doesn't match the size of the saved registers,
26459 something has gone horribly wrong. */
26460 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26464 /* The remaining insns will describe the stores. */
26465 for (i
= 1; i
<= nregs
; i
++)
26467 /* Expect (set (mem <addr>) (reg)).
26468 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26469 e
= XVECEXP (p
, 0, i
);
26470 gcc_assert (GET_CODE (e
) == SET
26471 && MEM_P (SET_DEST (e
))
26472 && REG_P (SET_SRC (e
)));
26474 reg
= REGNO (SET_SRC (e
));
26475 gcc_assert (reg
>= lastreg
);
26478 fprintf (asm_out_file
, ", ");
26479 /* We can't use %r for vfp because we need to use the
26480 double precision register names. */
26481 if (IS_VFP_REGNUM (reg
))
26482 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26484 asm_fprintf (asm_out_file
, "%r", reg
);
26486 #ifdef ENABLE_CHECKING
26487 /* Check that the addresses are consecutive. */
26488 e
= XEXP (SET_DEST (e
), 0);
26489 if (GET_CODE (e
) == PLUS
)
26490 gcc_assert (REG_P (XEXP (e
, 0))
26491 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26492 && CONST_INT_P (XEXP (e
, 1))
26493 && offset
== INTVAL (XEXP (e
, 1)));
26497 && REGNO (e
) == SP_REGNUM
);
26498 offset
+= reg_size
;
26501 fprintf (asm_out_file
, "}\n");
26503 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26506 /* Emit unwind directives for a SET. */
26509 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26517 switch (GET_CODE (e0
))
26520 /* Pushing a single register. */
26521 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26522 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26523 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26526 asm_fprintf (asm_out_file
, "\t.save ");
26527 if (IS_VFP_REGNUM (REGNO (e1
)))
26528 asm_fprintf(asm_out_file
, "{d%d}\n",
26529 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26531 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26535 if (REGNO (e0
) == SP_REGNUM
)
26537 /* A stack increment. */
26538 if (GET_CODE (e1
) != PLUS
26539 || !REG_P (XEXP (e1
, 0))
26540 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26541 || !CONST_INT_P (XEXP (e1
, 1)))
26544 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26545 -INTVAL (XEXP (e1
, 1)));
26547 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26549 HOST_WIDE_INT offset
;
26551 if (GET_CODE (e1
) == PLUS
)
26553 if (!REG_P (XEXP (e1
, 0))
26554 || !CONST_INT_P (XEXP (e1
, 1)))
26556 reg
= REGNO (XEXP (e1
, 0));
26557 offset
= INTVAL (XEXP (e1
, 1));
26558 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26559 HARD_FRAME_POINTER_REGNUM
, reg
,
26562 else if (REG_P (e1
))
26565 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26566 HARD_FRAME_POINTER_REGNUM
, reg
);
26571 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26573 /* Move from sp to reg. */
26574 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26576 else if (GET_CODE (e1
) == PLUS
26577 && REG_P (XEXP (e1
, 0))
26578 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26579 && CONST_INT_P (XEXP (e1
, 1)))
26581 /* Set reg to offset from sp. */
26582 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26583 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26595 /* Emit unwind directives for the given insn. */
26598 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26601 bool handled_one
= false;
26603 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26606 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26607 && (TREE_NOTHROW (current_function_decl
)
26608 || crtl
->all_throwers_are_sibcalls
))
26611 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26614 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26616 switch (REG_NOTE_KIND (note
))
26618 case REG_FRAME_RELATED_EXPR
:
26619 pat
= XEXP (note
, 0);
26622 case REG_CFA_REGISTER
:
26623 pat
= XEXP (note
, 0);
26626 pat
= PATTERN (insn
);
26627 if (GET_CODE (pat
) == PARALLEL
)
26628 pat
= XVECEXP (pat
, 0, 0);
26631 /* Only emitted for IS_STACKALIGN re-alignment. */
26636 src
= SET_SRC (pat
);
26637 dest
= SET_DEST (pat
);
26639 gcc_assert (src
== stack_pointer_rtx
);
26640 reg
= REGNO (dest
);
26641 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26644 handled_one
= true;
26647 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26648 to get correct dwarf information for shrink-wrap. We should not
26649 emit unwind information for it because these are used either for
26650 pretend arguments or notes to adjust sp and restore registers from
26652 case REG_CFA_DEF_CFA
:
26653 case REG_CFA_ADJUST_CFA
:
26654 case REG_CFA_RESTORE
:
26657 case REG_CFA_EXPRESSION
:
26658 case REG_CFA_OFFSET
:
26659 /* ??? Only handling here what we actually emit. */
26660 gcc_unreachable ();
26668 pat
= PATTERN (insn
);
26671 switch (GET_CODE (pat
))
26674 arm_unwind_emit_set (asm_out_file
, pat
);
26678 /* Store multiple. */
26679 arm_unwind_emit_sequence (asm_out_file
, pat
);
26688 /* Output a reference from a function exception table to the type_info
26689 object X. The EABI specifies that the symbol should be relocated by
26690 an R_ARM_TARGET2 relocation. */
26693 arm_output_ttype (rtx x
)
26695 fputs ("\t.word\t", asm_out_file
);
26696 output_addr_const (asm_out_file
, x
);
26697 /* Use special relocations for symbol references. */
26698 if (!CONST_INT_P (x
))
26699 fputs ("(TARGET2)", asm_out_file
);
26700 fputc ('\n', asm_out_file
);
26705 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26708 arm_asm_emit_except_personality (rtx personality
)
26710 fputs ("\t.personality\t", asm_out_file
);
26711 output_addr_const (asm_out_file
, personality
);
26712 fputc ('\n', asm_out_file
);
26715 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26718 arm_asm_init_sections (void)
26720 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26723 #endif /* ARM_UNWIND_INFO */
26725 /* Output unwind directives for the start/end of a function. */
26728 arm_output_fn_unwind (FILE * f
, bool prologue
)
26730 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26734 fputs ("\t.fnstart\n", f
);
26737 /* If this function will never be unwound, then mark it as such.
26738 The came condition is used in arm_unwind_emit to suppress
26739 the frame annotations. */
26740 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26741 && (TREE_NOTHROW (current_function_decl
)
26742 || crtl
->all_throwers_are_sibcalls
))
26743 fputs("\t.cantunwind\n", f
);
26745 fputs ("\t.fnend\n", f
);
26750 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26752 enum tls_reloc reloc
;
26755 val
= XVECEXP (x
, 0, 0);
26756 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26758 output_addr_const (fp
, val
);
26763 fputs ("(tlsgd)", fp
);
26766 fputs ("(tlsldm)", fp
);
26769 fputs ("(tlsldo)", fp
);
26772 fputs ("(gottpoff)", fp
);
26775 fputs ("(tpoff)", fp
);
26778 fputs ("(tlsdesc)", fp
);
26781 gcc_unreachable ();
26790 fputs (" + (. - ", fp
);
26791 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26792 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26793 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26794 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26804 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26807 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26809 gcc_assert (size
== 4);
26810 fputs ("\t.word\t", file
);
26811 output_addr_const (file
, x
);
26812 fputs ("(tlsldo)", file
);
26815 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26818 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26820 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26821 return arm_emit_tls_decoration (fp
, x
);
26822 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26825 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26827 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26828 assemble_name_raw (fp
, label
);
26832 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26834 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26838 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26842 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26844 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26848 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26852 else if (GET_CODE (x
) == CONST_VECTOR
)
26853 return arm_emit_vector_const (fp
, x
);
26858 /* Output assembly for a shift instruction.
26859 SET_FLAGS determines how the instruction modifies the condition codes.
26860 0 - Do not set condition codes.
26861 1 - Set condition codes.
26862 2 - Use smallest instruction. */
26864 arm_output_shift(rtx
* operands
, int set_flags
)
26867 static const char flag_chars
[3] = {'?', '.', '!'};
26872 c
= flag_chars
[set_flags
];
26873 if (TARGET_UNIFIED_ASM
)
26875 shift
= shift_op(operands
[3], &val
);
26879 operands
[2] = GEN_INT(val
);
26880 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26883 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26886 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26887 output_asm_insn (pattern
, operands
);
26891 /* Output assembly for a WMMX immediate shift instruction. */
26893 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26895 int shift
= INTVAL (operands
[2]);
26897 machine_mode opmode
= GET_MODE (operands
[0]);
26899 gcc_assert (shift
>= 0);
26901 /* If the shift value in the register versions is > 63 (for D qualifier),
26902 31 (for W qualifier) or 15 (for H qualifier). */
26903 if (((opmode
== V4HImode
) && (shift
> 15))
26904 || ((opmode
== V2SImode
) && (shift
> 31))
26905 || ((opmode
== DImode
) && (shift
> 63)))
26909 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26910 output_asm_insn (templ
, operands
);
26911 if (opmode
== DImode
)
26913 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26914 output_asm_insn (templ
, operands
);
26919 /* The destination register will contain all zeros. */
26920 sprintf (templ
, "wzero\t%%0");
26921 output_asm_insn (templ
, operands
);
26926 if ((opmode
== DImode
) && (shift
> 32))
26928 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26929 output_asm_insn (templ
, operands
);
26930 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26931 output_asm_insn (templ
, operands
);
26935 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26936 output_asm_insn (templ
, operands
);
26941 /* Output assembly for a WMMX tinsr instruction. */
26943 arm_output_iwmmxt_tinsr (rtx
*operands
)
26945 int mask
= INTVAL (operands
[3]);
26948 int units
= mode_nunits
[GET_MODE (operands
[0])];
26949 gcc_assert ((mask
& (mask
- 1)) == 0);
26950 for (i
= 0; i
< units
; ++i
)
26952 if ((mask
& 0x01) == 1)
26958 gcc_assert (i
< units
);
26960 switch (GET_MODE (operands
[0]))
26963 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26966 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26969 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26972 gcc_unreachable ();
26975 output_asm_insn (templ
, operands
);
26980 /* Output a Thumb-1 casesi dispatch sequence. */
26982 thumb1_output_casesi (rtx
*operands
)
26984 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26986 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26988 switch (GET_MODE(diff_vec
))
26991 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26992 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26994 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26995 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26997 return "bl\t%___gnu_thumb1_case_si";
26999 gcc_unreachable ();
27003 /* Output a Thumb-2 casesi instruction. */
27005 thumb2_output_casesi (rtx
*operands
)
27007 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27009 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27011 output_asm_insn ("cmp\t%0, %1", operands
);
27012 output_asm_insn ("bhi\t%l3", operands
);
27013 switch (GET_MODE(diff_vec
))
27016 return "tbb\t[%|pc, %0]";
27018 return "tbh\t[%|pc, %0, lsl #1]";
27022 output_asm_insn ("adr\t%4, %l2", operands
);
27023 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27024 output_asm_insn ("add\t%4, %4, %5", operands
);
27029 output_asm_insn ("adr\t%4, %l2", operands
);
27030 return "ldr\t%|pc, [%4, %0, lsl #2]";
27033 gcc_unreachable ();
27037 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27038 per-core tuning structs. */
27040 arm_issue_rate (void)
27042 return current_tune
->issue_rate
;
27045 /* Return how many instructions should scheduler lookahead to choose the
27048 arm_first_cycle_multipass_dfa_lookahead (void)
27050 int issue_rate
= arm_issue_rate ();
27052 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27055 /* Enable modeling of L2 auto-prefetcher. */
27057 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27059 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27063 arm_mangle_type (const_tree type
)
27065 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27066 has to be managled as if it is in the "std" namespace. */
27067 if (TARGET_AAPCS_BASED
27068 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27069 return "St9__va_list";
27071 /* Half-precision float. */
27072 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27075 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27077 if (TYPE_NAME (type
) != NULL
)
27078 return arm_mangle_builtin_type (type
);
27080 /* Use the default mangling. */
27084 /* Order of allocation of core registers for Thumb: this allocation is
27085 written over the corresponding initial entries of the array
27086 initialized with REG_ALLOC_ORDER. We allocate all low registers
27087 first. Saving and restoring a low register is usually cheaper than
27088 using a call-clobbered high register. */
27090 static const int thumb_core_reg_alloc_order
[] =
27092 3, 2, 1, 0, 4, 5, 6, 7,
27093 14, 12, 8, 9, 10, 11
27096 /* Adjust register allocation order when compiling for Thumb. */
27099 arm_order_regs_for_local_alloc (void)
27101 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27102 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27104 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27105 sizeof (thumb_core_reg_alloc_order
));
27108 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27111 arm_frame_pointer_required (void)
27113 return (cfun
->has_nonlocal_label
27114 || SUBTARGET_FRAME_POINTER_REQUIRED
27115 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27118 /* Only thumb1 can't support conditional execution, so return true if
27119 the target is not thumb1. */
27121 arm_have_conditional_execution (void)
27123 return !TARGET_THUMB1
;
27126 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27127 static HOST_WIDE_INT
27128 arm_vector_alignment (const_tree type
)
27130 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27132 if (TARGET_AAPCS_BASED
)
27133 align
= MIN (align
, 64);
27138 static unsigned int
27139 arm_autovectorize_vector_sizes (void)
27141 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27145 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27147 /* Vectors which aren't in packed structures will not be less aligned than
27148 the natural alignment of their element type, so this is safe. */
27149 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27152 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27156 arm_builtin_support_vector_misalignment (machine_mode mode
,
27157 const_tree type
, int misalignment
,
27160 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27162 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27167 /* If the misalignment is unknown, we should be able to handle the access
27168 so long as it is not to a member of a packed data structure. */
27169 if (misalignment
== -1)
27172 /* Return true if the misalignment is a multiple of the natural alignment
27173 of the vector's element type. This is probably always going to be
27174 true in practice, since we've already established that this isn't a
27176 return ((misalignment
% align
) == 0);
27179 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27184 arm_conditional_register_usage (void)
27188 if (TARGET_THUMB1
&& optimize_size
)
27190 /* When optimizing for size on Thumb-1, it's better not
27191 to use the HI regs, because of the overhead of
27193 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27194 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27197 /* The link register can be clobbered by any branch insn,
27198 but we have no way to track that at present, so mark
27199 it as unavailable. */
27201 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27203 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27205 /* VFPv3 registers are disabled when earlier VFP
27206 versions are selected due to the definition of
27207 LAST_VFP_REGNUM. */
27208 for (regno
= FIRST_VFP_REGNUM
;
27209 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27211 fixed_regs
[regno
] = 0;
27212 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27213 || regno
>= FIRST_VFP_REGNUM
+ 32;
27217 if (TARGET_REALLY_IWMMXT
)
27219 regno
= FIRST_IWMMXT_GR_REGNUM
;
27220 /* The 2002/10/09 revision of the XScale ABI has wCG0
27221 and wCG1 as call-preserved registers. The 2002/11/21
27222 revision changed this so that all wCG registers are
27223 scratch registers. */
27224 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27225 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27226 fixed_regs
[regno
] = 0;
27227 /* The XScale ABI has wR0 - wR9 as scratch registers,
27228 the rest as call-preserved registers. */
27229 for (regno
= FIRST_IWMMXT_REGNUM
;
27230 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27232 fixed_regs
[regno
] = 0;
27233 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27237 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27239 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27240 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27242 else if (TARGET_APCS_STACK
)
27244 fixed_regs
[10] = 1;
27245 call_used_regs
[10] = 1;
27247 /* -mcaller-super-interworking reserves r11 for calls to
27248 _interwork_r11_call_via_rN(). Making the register global
27249 is an easy way of ensuring that it remains valid for all
27251 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27252 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27254 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27255 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27256 if (TARGET_CALLER_INTERWORKING
)
27257 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27259 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27263 arm_preferred_rename_class (reg_class_t rclass
)
27265 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27266 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27267 and code size can be reduced. */
27268 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27274 /* Compute the atrribute "length" of insn "*push_multi".
27275 So this function MUST be kept in sync with that insn pattern. */
27277 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27279 int i
, regno
, hi_reg
;
27280 int num_saves
= XVECLEN (parallel_op
, 0);
27290 regno
= REGNO (first_op
);
27291 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27292 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27294 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27295 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27303 /* Compute the number of instructions emitted by output_move_double. */
27305 arm_count_output_move_double_insns (rtx
*operands
)
27309 /* output_move_double may modify the operands array, so call it
27310 here on a copy of the array. */
27311 ops
[0] = operands
[0];
27312 ops
[1] = operands
[1];
27313 output_move_double (ops
, false, &count
);
27318 vfp3_const_double_for_fract_bits (rtx operand
)
27320 REAL_VALUE_TYPE r0
;
27322 if (!CONST_DOUBLE_P (operand
))
27325 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27326 if (exact_real_inverse (DFmode
, &r0
))
27328 if (exact_real_truncate (DFmode
, &r0
))
27330 HOST_WIDE_INT value
= real_to_integer (&r0
);
27331 value
= value
& 0xffffffff;
27332 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27333 return int_log2 (value
);
27340 vfp3_const_double_for_bits (rtx operand
)
27342 REAL_VALUE_TYPE r0
;
27344 if (!CONST_DOUBLE_P (operand
))
27347 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27348 if (exact_real_truncate (DFmode
, &r0
))
27350 HOST_WIDE_INT value
= real_to_integer (&r0
);
27351 value
= value
& 0xffffffff;
27352 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27353 return int_log2 (value
);
27359 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27362 arm_pre_atomic_barrier (enum memmodel model
)
27364 if (need_atomic_barrier_p (model
, true))
27365 emit_insn (gen_memory_barrier ());
27369 arm_post_atomic_barrier (enum memmodel model
)
27371 if (need_atomic_barrier_p (model
, false))
27372 emit_insn (gen_memory_barrier ());
27375 /* Emit the load-exclusive and store-exclusive instructions.
27376 Use acquire and release versions if necessary. */
27379 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27381 rtx (*gen
) (rtx
, rtx
);
27387 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27388 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27389 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27390 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27392 gcc_unreachable ();
27399 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27400 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27401 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27402 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27404 gcc_unreachable ();
27408 emit_insn (gen (rval
, mem
));
27412 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27415 rtx (*gen
) (rtx
, rtx
, rtx
);
27421 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27422 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27423 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27424 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27426 gcc_unreachable ();
27433 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27434 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27435 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27436 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27438 gcc_unreachable ();
27442 emit_insn (gen (bval
, rval
, mem
));
27445 /* Mark the previous jump instruction as unlikely. */
27448 emit_unlikely_jump (rtx insn
)
27450 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27452 insn
= emit_jump_insn (insn
);
27453 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27456 /* Expand a compare and swap pattern. */
27459 arm_expand_compare_and_swap (rtx operands
[])
27461 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27463 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27465 bval
= operands
[0];
27466 rval
= operands
[1];
27468 oldval
= operands
[3];
27469 newval
= operands
[4];
27470 is_weak
= operands
[5];
27471 mod_s
= operands
[6];
27472 mod_f
= operands
[7];
27473 mode
= GET_MODE (mem
);
27475 /* Normally the succ memory model must be stronger than fail, but in the
27476 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27477 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27479 if (TARGET_HAVE_LDACQ
27480 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
27481 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
27482 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27488 /* For narrow modes, we're going to perform the comparison in SImode,
27489 so do the zero-extension now. */
27490 rval
= gen_reg_rtx (SImode
);
27491 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27495 /* Force the value into a register if needed. We waited until after
27496 the zero-extension above to do this properly. */
27497 if (!arm_add_operand (oldval
, SImode
))
27498 oldval
= force_reg (SImode
, oldval
);
27502 if (!cmpdi_operand (oldval
, mode
))
27503 oldval
= force_reg (mode
, oldval
);
27507 gcc_unreachable ();
27512 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27513 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27514 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27515 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27517 gcc_unreachable ();
27520 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27522 if (mode
== QImode
|| mode
== HImode
)
27523 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27525 /* In all cases, we arrange for success to be signaled by Z set.
27526 This arrangement allows for the boolean result to be used directly
27527 in a subsequent branch, post optimization. */
27528 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27529 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27530 emit_insn (gen_rtx_SET (bval
, x
));
27533 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27534 another memory store between the load-exclusive and store-exclusive can
27535 reset the monitor from Exclusive to Open state. This means we must wait
27536 until after reload to split the pattern, lest we get a register spill in
27537 the middle of the atomic sequence. */
27540 arm_split_compare_and_swap (rtx operands
[])
27542 rtx rval
, mem
, oldval
, newval
, scratch
;
27544 enum memmodel mod_s
, mod_f
;
27546 rtx_code_label
*label1
, *label2
;
27549 rval
= operands
[0];
27551 oldval
= operands
[2];
27552 newval
= operands
[3];
27553 is_weak
= (operands
[4] != const0_rtx
);
27554 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
27555 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
27556 scratch
= operands
[7];
27557 mode
= GET_MODE (mem
);
27559 bool use_acquire
= TARGET_HAVE_LDACQ
27560 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27561 || is_mm_release (mod_s
));
27563 bool use_release
= TARGET_HAVE_LDACQ
27564 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27565 || is_mm_acquire (mod_s
));
27567 /* Checks whether a barrier is needed and emits one accordingly. */
27568 if (!(use_acquire
|| use_release
))
27569 arm_pre_atomic_barrier (mod_s
);
27574 label1
= gen_label_rtx ();
27575 emit_label (label1
);
27577 label2
= gen_label_rtx ();
27579 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27581 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27582 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27583 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27584 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27585 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27587 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27589 /* Weak or strong, we want EQ to be true for success, so that we
27590 match the flags that we got from the compare above. */
27591 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27592 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27593 emit_insn (gen_rtx_SET (cond
, x
));
27597 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27598 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27599 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27600 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27603 if (!is_mm_relaxed (mod_f
))
27604 emit_label (label2
);
27606 /* Checks whether a barrier is needed and emits one accordingly. */
27607 if (!(use_acquire
|| use_release
))
27608 arm_post_atomic_barrier (mod_s
);
27610 if (is_mm_relaxed (mod_f
))
27611 emit_label (label2
);
27615 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27616 rtx value
, rtx model_rtx
, rtx cond
)
27618 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
27619 machine_mode mode
= GET_MODE (mem
);
27620 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27621 rtx_code_label
*label
;
27624 bool use_acquire
= TARGET_HAVE_LDACQ
27625 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27626 || is_mm_release (model
));
27628 bool use_release
= TARGET_HAVE_LDACQ
27629 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27630 || is_mm_acquire (model
));
27632 /* Checks whether a barrier is needed and emits one accordingly. */
27633 if (!(use_acquire
|| use_release
))
27634 arm_pre_atomic_barrier (model
);
27636 label
= gen_label_rtx ();
27637 emit_label (label
);
27640 new_out
= gen_lowpart (wmode
, new_out
);
27642 old_out
= gen_lowpart (wmode
, old_out
);
27645 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27647 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27656 x
= gen_rtx_AND (wmode
, old_out
, value
);
27657 emit_insn (gen_rtx_SET (new_out
, x
));
27658 x
= gen_rtx_NOT (wmode
, new_out
);
27659 emit_insn (gen_rtx_SET (new_out
, x
));
27663 if (CONST_INT_P (value
))
27665 value
= GEN_INT (-INTVAL (value
));
27671 if (mode
== DImode
)
27673 /* DImode plus/minus need to clobber flags. */
27674 /* The adddi3 and subdi3 patterns are incorrectly written so that
27675 they require matching operands, even when we could easily support
27676 three operands. Thankfully, this can be fixed up post-splitting,
27677 as the individual add+adc patterns do accept three operands and
27678 post-reload cprop can make these moves go away. */
27679 emit_move_insn (new_out
, old_out
);
27681 x
= gen_adddi3 (new_out
, new_out
, value
);
27683 x
= gen_subdi3 (new_out
, new_out
, value
);
27690 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27691 emit_insn (gen_rtx_SET (new_out
, x
));
27695 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27698 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27699 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27701 /* Checks whether a barrier is needed and emits one accordingly. */
27702 if (!(use_acquire
|| use_release
))
27703 arm_post_atomic_barrier (model
);
27706 #define MAX_VECT_LEN 16
27708 struct expand_vec_perm_d
27710 rtx target
, op0
, op1
;
27711 unsigned char perm
[MAX_VECT_LEN
];
27712 machine_mode vmode
;
27713 unsigned char nelt
;
27718 /* Generate a variable permutation. */
27721 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27723 machine_mode vmode
= GET_MODE (target
);
27724 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27726 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27727 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27728 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27729 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27730 gcc_checking_assert (TARGET_NEON
);
27734 if (vmode
== V8QImode
)
27735 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27737 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27743 if (vmode
== V8QImode
)
27745 pair
= gen_reg_rtx (V16QImode
);
27746 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27747 pair
= gen_lowpart (TImode
, pair
);
27748 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27752 pair
= gen_reg_rtx (OImode
);
27753 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27754 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27760 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27762 machine_mode vmode
= GET_MODE (target
);
27763 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27764 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27765 rtx rmask
[MAX_VECT_LEN
], mask
;
27767 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27768 numbering of elements for big-endian, we must reverse the order. */
27769 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27771 /* The VTBL instruction does not use a modulo index, so we must take care
27772 of that ourselves. */
27773 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27774 for (i
= 0; i
< nelt
; ++i
)
27776 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27777 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27779 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27782 /* Generate or test for an insn that supports a constant permutation. */
27784 /* Recognize patterns for the VUZP insns. */
27787 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27789 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27790 rtx out0
, out1
, in0
, in1
, x
;
27791 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27793 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27796 /* Note that these are little-endian tests. Adjust for big-endian later. */
27797 if (d
->perm
[0] == 0)
27799 else if (d
->perm
[0] == 1)
27803 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27805 for (i
= 0; i
< nelt
; i
++)
27807 unsigned elt
= (i
* 2 + odd
) & mask
;
27808 if (d
->perm
[i
] != elt
)
27818 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27819 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27820 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27821 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27822 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27823 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27824 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27825 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27827 gcc_unreachable ();
27832 if (BYTES_BIG_ENDIAN
)
27834 x
= in0
, in0
= in1
, in1
= x
;
27839 out1
= gen_reg_rtx (d
->vmode
);
27841 x
= out0
, out0
= out1
, out1
= x
;
27843 emit_insn (gen (out0
, in0
, in1
, out1
));
27847 /* Recognize patterns for the VZIP insns. */
27850 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27852 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27853 rtx out0
, out1
, in0
, in1
, x
;
27854 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27856 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27859 /* Note that these are little-endian tests. Adjust for big-endian later. */
27861 if (d
->perm
[0] == high
)
27863 else if (d
->perm
[0] == 0)
27867 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27869 for (i
= 0; i
< nelt
/ 2; i
++)
27871 unsigned elt
= (i
+ high
) & mask
;
27872 if (d
->perm
[i
* 2] != elt
)
27874 elt
= (elt
+ nelt
) & mask
;
27875 if (d
->perm
[i
* 2 + 1] != elt
)
27885 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27886 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27887 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27888 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27889 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27890 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27891 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27892 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27894 gcc_unreachable ();
27899 if (BYTES_BIG_ENDIAN
)
27901 x
= in0
, in0
= in1
, in1
= x
;
27906 out1
= gen_reg_rtx (d
->vmode
);
27908 x
= out0
, out0
= out1
, out1
= x
;
27910 emit_insn (gen (out0
, in0
, in1
, out1
));
27914 /* Recognize patterns for the VREV insns. */
27917 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27919 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27920 rtx (*gen
)(rtx
, rtx
);
27922 if (!d
->one_vector_p
)
27931 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27932 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27940 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27941 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27942 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27943 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27951 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27952 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27953 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27954 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27955 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27956 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27957 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27958 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27967 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27968 for (j
= 0; j
<= diff
; j
+= 1)
27970 /* This is guaranteed to be true as the value of diff
27971 is 7, 3, 1 and we should have enough elements in the
27972 queue to generate this. Getting a vector mask with a
27973 value of diff other than these values implies that
27974 something is wrong by the time we get here. */
27975 gcc_assert (i
+ j
< nelt
);
27976 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27984 emit_insn (gen (d
->target
, d
->op0
));
27988 /* Recognize patterns for the VTRN insns. */
27991 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27993 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27994 rtx out0
, out1
, in0
, in1
, x
;
27995 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27997 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28000 /* Note that these are little-endian tests. Adjust for big-endian later. */
28001 if (d
->perm
[0] == 0)
28003 else if (d
->perm
[0] == 1)
28007 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28009 for (i
= 0; i
< nelt
; i
+= 2)
28011 if (d
->perm
[i
] != i
+ odd
)
28013 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28023 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28024 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28025 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28026 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28027 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28028 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28029 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28030 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28032 gcc_unreachable ();
28037 if (BYTES_BIG_ENDIAN
)
28039 x
= in0
, in0
= in1
, in1
= x
;
28044 out1
= gen_reg_rtx (d
->vmode
);
28046 x
= out0
, out0
= out1
, out1
= x
;
28048 emit_insn (gen (out0
, in0
, in1
, out1
));
28052 /* Recognize patterns for the VEXT insns. */
28055 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28057 unsigned int i
, nelt
= d
->nelt
;
28058 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28061 unsigned int location
;
28063 unsigned int next
= d
->perm
[0] + 1;
28065 /* TODO: Handle GCC's numbering of elements for big-endian. */
28066 if (BYTES_BIG_ENDIAN
)
28069 /* Check if the extracted indexes are increasing by one. */
28070 for (i
= 1; i
< nelt
; next
++, i
++)
28072 /* If we hit the most significant element of the 2nd vector in
28073 the previous iteration, no need to test further. */
28074 if (next
== 2 * nelt
)
28077 /* If we are operating on only one vector: it could be a
28078 rotation. If there are only two elements of size < 64, let
28079 arm_evpc_neon_vrev catch it. */
28080 if (d
->one_vector_p
&& (next
== nelt
))
28082 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28088 if (d
->perm
[i
] != next
)
28092 location
= d
->perm
[0];
28096 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28097 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28098 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28099 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28100 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28101 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28102 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28103 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28104 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28113 offset
= GEN_INT (location
);
28114 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28118 /* The NEON VTBL instruction is a fully variable permuation that's even
28119 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28120 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28121 can do slightly better by expanding this as a constant where we don't
28122 have to apply a mask. */
28125 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28127 rtx rperm
[MAX_VECT_LEN
], sel
;
28128 machine_mode vmode
= d
->vmode
;
28129 unsigned int i
, nelt
= d
->nelt
;
28131 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28132 numbering of elements for big-endian, we must reverse the order. */
28133 if (BYTES_BIG_ENDIAN
)
28139 /* Generic code will try constant permutation twice. Once with the
28140 original mode and again with the elements lowered to QImode.
28141 So wait and don't do the selector expansion ourselves. */
28142 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28145 for (i
= 0; i
< nelt
; ++i
)
28146 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28147 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28148 sel
= force_reg (vmode
, sel
);
28150 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28155 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28157 /* Check if the input mask matches vext before reordering the
28160 if (arm_evpc_neon_vext (d
))
28163 /* The pattern matching functions above are written to look for a small
28164 number to begin the sequence (0, 1, N/2). If we begin with an index
28165 from the second operand, we can swap the operands. */
28166 if (d
->perm
[0] >= d
->nelt
)
28168 unsigned i
, nelt
= d
->nelt
;
28171 for (i
= 0; i
< nelt
; ++i
)
28172 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28181 if (arm_evpc_neon_vuzp (d
))
28183 if (arm_evpc_neon_vzip (d
))
28185 if (arm_evpc_neon_vrev (d
))
28187 if (arm_evpc_neon_vtrn (d
))
28189 return arm_evpc_neon_vtbl (d
);
28194 /* Expand a vec_perm_const pattern. */
28197 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28199 struct expand_vec_perm_d d
;
28200 int i
, nelt
, which
;
28206 d
.vmode
= GET_MODE (target
);
28207 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28208 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28209 d
.testing_p
= false;
28211 for (i
= which
= 0; i
< nelt
; ++i
)
28213 rtx e
= XVECEXP (sel
, 0, i
);
28214 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28215 which
|= (ei
< nelt
? 1 : 2);
28225 d
.one_vector_p
= false;
28226 if (!rtx_equal_p (op0
, op1
))
28229 /* The elements of PERM do not suggest that only the first operand
28230 is used, but both operands are identical. Allow easier matching
28231 of the permutation by folding the permutation into the single
28235 for (i
= 0; i
< nelt
; ++i
)
28236 d
.perm
[i
] &= nelt
- 1;
28238 d
.one_vector_p
= true;
28243 d
.one_vector_p
= true;
28247 return arm_expand_vec_perm_const_1 (&d
);
28250 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28253 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28254 const unsigned char *sel
)
28256 struct expand_vec_perm_d d
;
28257 unsigned int i
, nelt
, which
;
28261 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28262 d
.testing_p
= true;
28263 memcpy (d
.perm
, sel
, nelt
);
28265 /* Categorize the set of elements in the selector. */
28266 for (i
= which
= 0; i
< nelt
; ++i
)
28268 unsigned char e
= d
.perm
[i
];
28269 gcc_assert (e
< 2 * nelt
);
28270 which
|= (e
< nelt
? 1 : 2);
28273 /* For all elements from second vector, fold the elements to first. */
28275 for (i
= 0; i
< nelt
; ++i
)
28278 /* Check whether the mask can be applied to the vector type. */
28279 d
.one_vector_p
= (which
!= 3);
28281 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28282 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28283 if (!d
.one_vector_p
)
28284 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28287 ret
= arm_expand_vec_perm_const_1 (&d
);
28294 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28296 /* If we are soft float and we do not have ldrd
28297 then all auto increment forms are ok. */
28298 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28303 /* Post increment and Pre Decrement are supported for all
28304 instruction forms except for vector forms. */
28307 if (VECTOR_MODE_P (mode
))
28309 if (code
!= ARM_PRE_DEC
)
28319 /* Without LDRD and mode size greater than
28320 word size, there is no point in auto-incrementing
28321 because ldm and stm will not have these forms. */
28322 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28325 /* Vector and floating point modes do not support
28326 these auto increment forms. */
28327 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28340 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28341 on ARM, since we know that shifts by negative amounts are no-ops.
28342 Additionally, the default expansion code is not available or suitable
28343 for post-reload insn splits (this can occur when the register allocator
28344 chooses not to do a shift in NEON).
28346 This function is used in both initial expand and post-reload splits, and
28347 handles all kinds of 64-bit shifts.
28349 Input requirements:
28350 - It is safe for the input and output to be the same register, but
28351 early-clobber rules apply for the shift amount and scratch registers.
28352 - Shift by register requires both scratch registers. In all other cases
28353 the scratch registers may be NULL.
28354 - Ashiftrt by a register also clobbers the CC register. */
28356 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28357 rtx amount
, rtx scratch1
, rtx scratch2
)
28359 rtx out_high
= gen_highpart (SImode
, out
);
28360 rtx out_low
= gen_lowpart (SImode
, out
);
28361 rtx in_high
= gen_highpart (SImode
, in
);
28362 rtx in_low
= gen_lowpart (SImode
, in
);
28365 in = the register pair containing the input value.
28366 out = the destination register pair.
28367 up = the high- or low-part of each pair.
28368 down = the opposite part to "up".
28369 In a shift, we can consider bits to shift from "up"-stream to
28370 "down"-stream, so in a left-shift "up" is the low-part and "down"
28371 is the high-part of each register pair. */
28373 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28374 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28375 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28376 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28378 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28380 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28381 && GET_MODE (out
) == DImode
);
28383 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28384 && GET_MODE (in
) == DImode
);
28386 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28387 && GET_MODE (amount
) == SImode
)
28388 || CONST_INT_P (amount
)));
28389 gcc_assert (scratch1
== NULL
28390 || (GET_CODE (scratch1
) == SCRATCH
)
28391 || (GET_MODE (scratch1
) == SImode
28392 && REG_P (scratch1
)));
28393 gcc_assert (scratch2
== NULL
28394 || (GET_CODE (scratch2
) == SCRATCH
)
28395 || (GET_MODE (scratch2
) == SImode
28396 && REG_P (scratch2
)));
28397 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28398 || !HARD_REGISTER_P (out
)
28399 || (REGNO (out
) != REGNO (amount
)
28400 && REGNO (out
) + 1 != REGNO (amount
)));
28402 /* Macros to make following code more readable. */
28403 #define SUB_32(DEST,SRC) \
28404 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28405 #define RSB_32(DEST,SRC) \
28406 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28407 #define SUB_S_32(DEST,SRC) \
28408 gen_addsi3_compare0 ((DEST), (SRC), \
28410 #define SET(DEST,SRC) \
28411 gen_rtx_SET ((DEST), (SRC))
28412 #define SHIFT(CODE,SRC,AMOUNT) \
28413 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28414 #define LSHIFT(CODE,SRC,AMOUNT) \
28415 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28416 SImode, (SRC), (AMOUNT))
28417 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28418 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28419 SImode, (SRC), (AMOUNT))
28421 gen_rtx_IOR (SImode, (A), (B))
28422 #define BRANCH(COND,LABEL) \
28423 gen_arm_cond_branch ((LABEL), \
28424 gen_rtx_ ## COND (CCmode, cc_reg, \
28428 /* Shifts by register and shifts by constant are handled separately. */
28429 if (CONST_INT_P (amount
))
28431 /* We have a shift-by-constant. */
28433 /* First, handle out-of-range shift amounts.
28434 In both cases we try to match the result an ARM instruction in a
28435 shift-by-register would give. This helps reduce execution
28436 differences between optimization levels, but it won't stop other
28437 parts of the compiler doing different things. This is "undefined
28438 behaviour, in any case. */
28439 if (INTVAL (amount
) <= 0)
28440 emit_insn (gen_movdi (out
, in
));
28441 else if (INTVAL (amount
) >= 64)
28443 if (code
== ASHIFTRT
)
28445 rtx const31_rtx
= GEN_INT (31);
28446 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28447 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28450 emit_insn (gen_movdi (out
, const0_rtx
));
28453 /* Now handle valid shifts. */
28454 else if (INTVAL (amount
) < 32)
28456 /* Shifts by a constant less than 32. */
28457 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28459 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28460 emit_insn (SET (out_down
,
28461 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28463 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28467 /* Shifts by a constant greater than 31. */
28468 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28470 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28471 if (code
== ASHIFTRT
)
28472 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28475 emit_insn (SET (out_up
, const0_rtx
));
28480 /* We have a shift-by-register. */
28481 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28483 /* This alternative requires the scratch registers. */
28484 gcc_assert (scratch1
&& REG_P (scratch1
));
28485 gcc_assert (scratch2
&& REG_P (scratch2
));
28487 /* We will need the values "amount-32" and "32-amount" later.
28488 Swapping them around now allows the later code to be more general. */
28492 emit_insn (SUB_32 (scratch1
, amount
));
28493 emit_insn (RSB_32 (scratch2
, amount
));
28496 emit_insn (RSB_32 (scratch1
, amount
));
28497 /* Also set CC = amount > 32. */
28498 emit_insn (SUB_S_32 (scratch2
, amount
));
28501 emit_insn (RSB_32 (scratch1
, amount
));
28502 emit_insn (SUB_32 (scratch2
, amount
));
28505 gcc_unreachable ();
28508 /* Emit code like this:
28511 out_down = in_down << amount;
28512 out_down = (in_up << (amount - 32)) | out_down;
28513 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28514 out_up = in_up << amount;
28517 out_down = in_down >> amount;
28518 out_down = (in_up << (32 - amount)) | out_down;
28520 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28521 out_up = in_up << amount;
28524 out_down = in_down >> amount;
28525 out_down = (in_up << (32 - amount)) | out_down;
28527 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28528 out_up = in_up << amount;
28530 The ARM and Thumb2 variants are the same but implemented slightly
28531 differently. If this were only called during expand we could just
28532 use the Thumb2 case and let combine do the right thing, but this
28533 can also be called from post-reload splitters. */
28535 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28537 if (!TARGET_THUMB2
)
28539 /* Emit code for ARM mode. */
28540 emit_insn (SET (out_down
,
28541 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28542 if (code
== ASHIFTRT
)
28544 rtx_code_label
*done_label
= gen_label_rtx ();
28545 emit_jump_insn (BRANCH (LT
, done_label
));
28546 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28548 emit_label (done_label
);
28551 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28556 /* Emit code for Thumb2 mode.
28557 Thumb2 can't do shift and or in one insn. */
28558 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28559 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28561 if (code
== ASHIFTRT
)
28563 rtx_code_label
*done_label
= gen_label_rtx ();
28564 emit_jump_insn (BRANCH (LT
, done_label
));
28565 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28566 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28567 emit_label (done_label
);
28571 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28572 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28576 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28591 /* Returns true if a valid comparison operation and makes
28592 the operands in a form that is valid. */
28594 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28596 enum rtx_code code
= GET_CODE (*comparison
);
28598 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28599 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28601 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28603 if (code
== UNEQ
|| code
== LTGT
)
28606 code_int
= (int)code
;
28607 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28608 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28613 if (!arm_add_operand (*op1
, mode
))
28614 *op1
= force_reg (mode
, *op1
);
28615 if (!arm_add_operand (*op2
, mode
))
28616 *op2
= force_reg (mode
, *op2
);
28620 if (!cmpdi_operand (*op1
, mode
))
28621 *op1
= force_reg (mode
, *op1
);
28622 if (!cmpdi_operand (*op2
, mode
))
28623 *op2
= force_reg (mode
, *op2
);
28628 if (!arm_float_compare_operand (*op1
, mode
))
28629 *op1
= force_reg (mode
, *op1
);
28630 if (!arm_float_compare_operand (*op2
, mode
))
28631 *op2
= force_reg (mode
, *op2
);
28641 /* Maximum number of instructions to set block of memory. */
28643 arm_block_set_max_insns (void)
28645 if (optimize_function_for_size_p (cfun
))
28648 return current_tune
->max_insns_inline_memset
;
28651 /* Return TRUE if it's profitable to set block of memory for
28652 non-vectorized case. VAL is the value to set the memory
28653 with. LENGTH is the number of bytes to set. ALIGN is the
28654 alignment of the destination memory in bytes. UNALIGNED_P
28655 is TRUE if we can only set the memory with instructions
28656 meeting alignment requirements. USE_STRD_P is TRUE if we
28657 can use strd to set the memory. */
28659 arm_block_set_non_vect_profit_p (rtx val
,
28660 unsigned HOST_WIDE_INT length
,
28661 unsigned HOST_WIDE_INT align
,
28662 bool unaligned_p
, bool use_strd_p
)
28665 /* For leftovers in bytes of 0-7, we can set the memory block using
28666 strb/strh/str with minimum instruction number. */
28667 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28671 num
= arm_const_inline_cost (SET
, val
);
28672 num
+= length
/ align
+ length
% align
;
28674 else if (use_strd_p
)
28676 num
= arm_const_double_inline_cost (val
);
28677 num
+= (length
>> 3) + leftover
[length
& 7];
28681 num
= arm_const_inline_cost (SET
, val
);
28682 num
+= (length
>> 2) + leftover
[length
& 3];
28685 /* We may be able to combine last pair STRH/STRB into a single STR
28686 by shifting one byte back. */
28687 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28690 return (num
<= arm_block_set_max_insns ());
28693 /* Return TRUE if it's profitable to set block of memory for
28694 vectorized case. LENGTH is the number of bytes to set.
28695 ALIGN is the alignment of destination memory in bytes.
28696 MODE is the vector mode used to set the memory. */
28698 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28699 unsigned HOST_WIDE_INT align
,
28703 bool unaligned_p
= ((align
& 3) != 0);
28704 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28706 /* Instruction loading constant value. */
28708 /* Instructions storing the memory. */
28709 num
+= (length
+ nelt
- 1) / nelt
;
28710 /* Instructions adjusting the address expression. Only need to
28711 adjust address expression if it's 4 bytes aligned and bytes
28712 leftover can only be stored by mis-aligned store instruction. */
28713 if (!unaligned_p
&& (length
& 3) != 0)
28716 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28717 if (!unaligned_p
&& mode
== V16QImode
)
28720 return (num
<= arm_block_set_max_insns ());
28723 /* Set a block of memory using vectorization instructions for the
28724 unaligned case. We fill the first LENGTH bytes of the memory
28725 area starting from DSTBASE with byte constant VALUE. ALIGN is
28726 the alignment requirement of memory. Return TRUE if succeeded. */
28728 arm_block_set_unaligned_vect (rtx dstbase
,
28729 unsigned HOST_WIDE_INT length
,
28730 unsigned HOST_WIDE_INT value
,
28731 unsigned HOST_WIDE_INT align
)
28733 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28735 rtx val_elt
, val_vec
, reg
;
28736 rtx rval
[MAX_VECT_LEN
];
28737 rtx (*gen_func
) (rtx
, rtx
);
28739 unsigned HOST_WIDE_INT v
= value
;
28741 gcc_assert ((align
& 0x3) != 0);
28742 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28743 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28744 if (length
>= nelt_v16
)
28747 gen_func
= gen_movmisalignv16qi
;
28752 gen_func
= gen_movmisalignv8qi
;
28754 nelt_mode
= GET_MODE_NUNITS (mode
);
28755 gcc_assert (length
>= nelt_mode
);
28756 /* Skip if it isn't profitable. */
28757 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28760 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28761 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28763 v
= sext_hwi (v
, BITS_PER_WORD
);
28764 val_elt
= GEN_INT (v
);
28765 for (j
= 0; j
< nelt_mode
; j
++)
28768 reg
= gen_reg_rtx (mode
);
28769 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28770 /* Emit instruction loading the constant value. */
28771 emit_move_insn (reg
, val_vec
);
28773 /* Handle nelt_mode bytes in a vector. */
28774 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28776 emit_insn ((*gen_func
) (mem
, reg
));
28777 if (i
+ 2 * nelt_mode
<= length
)
28778 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28781 /* If there are not less than nelt_v8 bytes leftover, we must be in
28783 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28785 /* Handle (8, 16) bytes leftover. */
28786 if (i
+ nelt_v8
< length
)
28788 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28789 /* We are shifting bytes back, set the alignment accordingly. */
28790 if ((length
& 1) != 0 && align
>= 2)
28791 set_mem_align (mem
, BITS_PER_UNIT
);
28793 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28795 /* Handle (0, 8] bytes leftover. */
28796 else if (i
< length
&& i
+ nelt_v8
>= length
)
28798 if (mode
== V16QImode
)
28800 reg
= gen_lowpart (V8QImode
, reg
);
28801 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28803 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28804 + (nelt_mode
- nelt_v8
))));
28805 /* We are shifting bytes back, set the alignment accordingly. */
28806 if ((length
& 1) != 0 && align
>= 2)
28807 set_mem_align (mem
, BITS_PER_UNIT
);
28809 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28815 /* Set a block of memory using vectorization instructions for the
28816 aligned case. We fill the first LENGTH bytes of the memory area
28817 starting from DSTBASE with byte constant VALUE. ALIGN is the
28818 alignment requirement of memory. Return TRUE if succeeded. */
28820 arm_block_set_aligned_vect (rtx dstbase
,
28821 unsigned HOST_WIDE_INT length
,
28822 unsigned HOST_WIDE_INT value
,
28823 unsigned HOST_WIDE_INT align
)
28825 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28826 rtx dst
, addr
, mem
;
28827 rtx val_elt
, val_vec
, reg
;
28828 rtx rval
[MAX_VECT_LEN
];
28830 unsigned HOST_WIDE_INT v
= value
;
28832 gcc_assert ((align
& 0x3) == 0);
28833 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28834 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28835 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28840 nelt_mode
= GET_MODE_NUNITS (mode
);
28841 gcc_assert (length
>= nelt_mode
);
28842 /* Skip if it isn't profitable. */
28843 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28846 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28848 v
= sext_hwi (v
, BITS_PER_WORD
);
28849 val_elt
= GEN_INT (v
);
28850 for (j
= 0; j
< nelt_mode
; j
++)
28853 reg
= gen_reg_rtx (mode
);
28854 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28855 /* Emit instruction loading the constant value. */
28856 emit_move_insn (reg
, val_vec
);
28859 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28860 if (mode
== V16QImode
)
28862 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28863 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28865 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28866 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28868 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28869 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28870 /* We are shifting bytes back, set the alignment accordingly. */
28871 if ((length
& 0x3) == 0)
28872 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28873 else if ((length
& 0x1) == 0)
28874 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28876 set_mem_align (mem
, BITS_PER_UNIT
);
28878 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28881 /* Fall through for bytes leftover. */
28883 nelt_mode
= GET_MODE_NUNITS (mode
);
28884 reg
= gen_lowpart (V8QImode
, reg
);
28887 /* Handle 8 bytes in a vector. */
28888 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28890 addr
= plus_constant (Pmode
, dst
, i
);
28891 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28892 emit_move_insn (mem
, reg
);
28895 /* Handle single word leftover by shifting 4 bytes back. We can
28896 use aligned access for this case. */
28897 if (i
+ UNITS_PER_WORD
== length
)
28899 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28900 mem
= adjust_automodify_address (dstbase
, mode
,
28901 addr
, i
- UNITS_PER_WORD
);
28902 /* We are shifting 4 bytes back, set the alignment accordingly. */
28903 if (align
> UNITS_PER_WORD
)
28904 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28906 emit_move_insn (mem
, reg
);
28908 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28909 We have to use unaligned access for this case. */
28910 else if (i
< length
)
28912 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28913 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28914 /* We are shifting bytes back, set the alignment accordingly. */
28915 if ((length
& 1) == 0)
28916 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28918 set_mem_align (mem
, BITS_PER_UNIT
);
28920 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28926 /* Set a block of memory using plain strh/strb instructions, only
28927 using instructions allowed by ALIGN on processor. We fill the
28928 first LENGTH bytes of the memory area starting from DSTBASE
28929 with byte constant VALUE. ALIGN is the alignment requirement
28932 arm_block_set_unaligned_non_vect (rtx dstbase
,
28933 unsigned HOST_WIDE_INT length
,
28934 unsigned HOST_WIDE_INT value
,
28935 unsigned HOST_WIDE_INT align
)
28938 rtx dst
, addr
, mem
;
28939 rtx val_exp
, val_reg
, reg
;
28941 HOST_WIDE_INT v
= value
;
28943 gcc_assert (align
== 1 || align
== 2);
28946 v
|= (value
<< BITS_PER_UNIT
);
28948 v
= sext_hwi (v
, BITS_PER_WORD
);
28949 val_exp
= GEN_INT (v
);
28950 /* Skip if it isn't profitable. */
28951 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28952 align
, true, false))
28955 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28956 mode
= (align
== 2 ? HImode
: QImode
);
28957 val_reg
= force_reg (SImode
, val_exp
);
28958 reg
= gen_lowpart (mode
, val_reg
);
28960 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28962 addr
= plus_constant (Pmode
, dst
, i
);
28963 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28964 emit_move_insn (mem
, reg
);
28967 /* Handle single byte leftover. */
28968 if (i
+ 1 == length
)
28970 reg
= gen_lowpart (QImode
, val_reg
);
28971 addr
= plus_constant (Pmode
, dst
, i
);
28972 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28973 emit_move_insn (mem
, reg
);
28977 gcc_assert (i
== length
);
28981 /* Set a block of memory using plain strd/str/strh/strb instructions,
28982 to permit unaligned copies on processors which support unaligned
28983 semantics for those instructions. We fill the first LENGTH bytes
28984 of the memory area starting from DSTBASE with byte constant VALUE.
28985 ALIGN is the alignment requirement of memory. */
28987 arm_block_set_aligned_non_vect (rtx dstbase
,
28988 unsigned HOST_WIDE_INT length
,
28989 unsigned HOST_WIDE_INT value
,
28990 unsigned HOST_WIDE_INT align
)
28993 rtx dst
, addr
, mem
;
28994 rtx val_exp
, val_reg
, reg
;
28995 unsigned HOST_WIDE_INT v
;
28998 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
28999 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29001 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29002 if (length
< UNITS_PER_WORD
)
29003 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29006 v
|= (v
<< BITS_PER_WORD
);
29008 v
= sext_hwi (v
, BITS_PER_WORD
);
29010 val_exp
= GEN_INT (v
);
29011 /* Skip if it isn't profitable. */
29012 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29013 align
, false, use_strd_p
))
29018 /* Try without strd. */
29019 v
= (v
>> BITS_PER_WORD
);
29020 v
= sext_hwi (v
, BITS_PER_WORD
);
29021 val_exp
= GEN_INT (v
);
29022 use_strd_p
= false;
29023 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29024 align
, false, use_strd_p
))
29029 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29030 /* Handle double words using strd if possible. */
29033 val_reg
= force_reg (DImode
, val_exp
);
29035 for (; (i
+ 8 <= length
); i
+= 8)
29037 addr
= plus_constant (Pmode
, dst
, i
);
29038 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29039 emit_move_insn (mem
, reg
);
29043 val_reg
= force_reg (SImode
, val_exp
);
29045 /* Handle words. */
29046 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29047 for (; (i
+ 4 <= length
); i
+= 4)
29049 addr
= plus_constant (Pmode
, dst
, i
);
29050 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29051 if ((align
& 3) == 0)
29052 emit_move_insn (mem
, reg
);
29054 emit_insn (gen_unaligned_storesi (mem
, reg
));
29057 /* Merge last pair of STRH and STRB into a STR if possible. */
29058 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29060 addr
= plus_constant (Pmode
, dst
, i
- 1);
29061 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29062 /* We are shifting one byte back, set the alignment accordingly. */
29063 if ((align
& 1) == 0)
29064 set_mem_align (mem
, BITS_PER_UNIT
);
29066 /* Most likely this is an unaligned access, and we can't tell at
29067 compilation time. */
29068 emit_insn (gen_unaligned_storesi (mem
, reg
));
29072 /* Handle half word leftover. */
29073 if (i
+ 2 <= length
)
29075 reg
= gen_lowpart (HImode
, val_reg
);
29076 addr
= plus_constant (Pmode
, dst
, i
);
29077 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29078 if ((align
& 1) == 0)
29079 emit_move_insn (mem
, reg
);
29081 emit_insn (gen_unaligned_storehi (mem
, reg
));
29086 /* Handle single byte leftover. */
29087 if (i
+ 1 == length
)
29089 reg
= gen_lowpart (QImode
, val_reg
);
29090 addr
= plus_constant (Pmode
, dst
, i
);
29091 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29092 emit_move_insn (mem
, reg
);
29098 /* Set a block of memory using vectorization instructions for both
29099 aligned and unaligned cases. We fill the first LENGTH bytes of
29100 the memory area starting from DSTBASE with byte constant VALUE.
29101 ALIGN is the alignment requirement of memory. */
29103 arm_block_set_vect (rtx dstbase
,
29104 unsigned HOST_WIDE_INT length
,
29105 unsigned HOST_WIDE_INT value
,
29106 unsigned HOST_WIDE_INT align
)
29108 /* Check whether we need to use unaligned store instruction. */
29109 if (((align
& 3) != 0 || (length
& 3) != 0)
29110 /* Check whether unaligned store instruction is available. */
29111 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29114 if ((align
& 3) == 0)
29115 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29117 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29120 /* Expand string store operation. Firstly we try to do that by using
29121 vectorization instructions, then try with ARM unaligned access and
29122 double-word store if profitable. OPERANDS[0] is the destination,
29123 OPERANDS[1] is the number of bytes, operands[2] is the value to
29124 initialize the memory, OPERANDS[3] is the known alignment of the
29127 arm_gen_setmem (rtx
*operands
)
29129 rtx dstbase
= operands
[0];
29130 unsigned HOST_WIDE_INT length
;
29131 unsigned HOST_WIDE_INT value
;
29132 unsigned HOST_WIDE_INT align
;
29134 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29137 length
= UINTVAL (operands
[1]);
29141 value
= (UINTVAL (operands
[2]) & 0xFF);
29142 align
= UINTVAL (operands
[3]);
29143 if (TARGET_NEON
&& length
>= 8
29144 && current_tune
->string_ops_prefer_neon
29145 && arm_block_set_vect (dstbase
, length
, value
, align
))
29148 if (!unaligned_access
&& (align
& 3) != 0)
29149 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29151 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29156 arm_macro_fusion_p (void)
29158 return current_tune
->fuseable_ops
!= tune_params::FUSE_NOTHING
;
29163 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29166 rtx prev_set
= single_set (prev
);
29167 rtx curr_set
= single_set (curr
);
29173 if (any_condjump_p (curr
))
29176 if (!arm_macro_fusion_p ())
29179 if (current_tune
->fuseable_ops
& tune_params::FUSE_MOVW_MOVT
)
29181 /* We are trying to fuse
29182 movw imm / movt imm
29183 instructions as a group that gets scheduled together. */
29185 set_dest
= SET_DEST (curr_set
);
29187 if (GET_MODE (set_dest
) != SImode
)
29190 /* We are trying to match:
29191 prev (movw) == (set (reg r0) (const_int imm16))
29192 curr (movt) == (set (zero_extract (reg r0)
29195 (const_int imm16_1))
29197 prev (movw) == (set (reg r1)
29198 (high (symbol_ref ("SYM"))))
29199 curr (movt) == (set (reg r0)
29201 (symbol_ref ("SYM")))) */
29202 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29204 if (CONST_INT_P (SET_SRC (curr_set
))
29205 && CONST_INT_P (SET_SRC (prev_set
))
29206 && REG_P (XEXP (set_dest
, 0))
29207 && REG_P (SET_DEST (prev_set
))
29208 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29211 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29212 && REG_P (SET_DEST (curr_set
))
29213 && REG_P (SET_DEST (prev_set
))
29214 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29215 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29221 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29223 static unsigned HOST_WIDE_INT
29224 arm_asan_shadow_offset (void)
29226 return (unsigned HOST_WIDE_INT
) 1 << 29;
29230 /* This is a temporary fix for PR60655. Ideally we need
29231 to handle most of these cases in the generic part but
29232 currently we reject minus (..) (sym_ref). We try to
29233 ameliorate the case with minus (sym_ref1) (sym_ref2)
29234 where they are in the same section. */
29237 arm_const_not_ok_for_debug_p (rtx p
)
29239 tree decl_op0
= NULL
;
29240 tree decl_op1
= NULL
;
29242 if (GET_CODE (p
) == MINUS
)
29244 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29246 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29248 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29249 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29251 if ((TREE_CODE (decl_op1
) == VAR_DECL
29252 || TREE_CODE (decl_op1
) == CONST_DECL
)
29253 && (TREE_CODE (decl_op0
) == VAR_DECL
29254 || TREE_CODE (decl_op0
) == CONST_DECL
))
29255 return (get_variable_section (decl_op1
, false)
29256 != get_variable_section (decl_op0
, false));
29258 if (TREE_CODE (decl_op1
) == LABEL_DECL
29259 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29260 return (DECL_CONTEXT (decl_op1
)
29261 != DECL_CONTEXT (decl_op0
));
29271 /* return TRUE if x is a reference to a value in a constant pool */
29273 arm_is_constant_pool_ref (rtx x
)
29276 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29277 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29281 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29285 if (is_called_in_ARM_mode (decl
)
29286 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29287 && cfun
->is_thunk
))
29288 fprintf (stream
, "\t.code 32\n");
29289 else if (TARGET_THUMB1
)
29290 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29292 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29295 if (TARGET_POKE_FUNCTION_NAME
)
29296 arm_poke_function_name (stream
, (const char *) name
);
29299 /* If MEM is in the form of [base+offset], extract the two parts
29300 of address and set to BASE and OFFSET, otherwise return false
29301 after clearing BASE and OFFSET. */
29304 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29308 gcc_assert (MEM_P (mem
));
29310 addr
= XEXP (mem
, 0);
29312 /* Strip off const from addresses like (const (addr)). */
29313 if (GET_CODE (addr
) == CONST
)
29314 addr
= XEXP (addr
, 0);
29316 if (GET_CODE (addr
) == REG
)
29319 *offset
= const0_rtx
;
29323 if (GET_CODE (addr
) == PLUS
29324 && GET_CODE (XEXP (addr
, 0)) == REG
29325 && CONST_INT_P (XEXP (addr
, 1)))
29327 *base
= XEXP (addr
, 0);
29328 *offset
= XEXP (addr
, 1);
29333 *offset
= NULL_RTX
;
29338 /* If INSN is a load or store of address in the form of [base+offset],
29339 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29340 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29341 otherwise return FALSE. */
29344 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29348 gcc_assert (INSN_P (insn
));
29349 x
= PATTERN (insn
);
29350 if (GET_CODE (x
) != SET
)
29354 dest
= SET_DEST (x
);
29355 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29358 extract_base_offset_in_addr (dest
, base
, offset
);
29360 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29363 extract_base_offset_in_addr (src
, base
, offset
);
29368 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29371 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29373 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29374 and PRI are only calculated for these instructions. For other instruction,
29375 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29376 instruction fusion can be supported by returning different priorities.
29378 It's important that irrelevant instructions get the largest FUSION_PRI. */
29381 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29382 int *fusion_pri
, int *pri
)
29388 gcc_assert (INSN_P (insn
));
29391 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29398 /* Load goes first. */
29400 *fusion_pri
= tmp
- 1;
29402 *fusion_pri
= tmp
- 2;
29406 /* INSN with smaller base register goes first. */
29407 tmp
-= ((REGNO (base
) & 0xff) << 20);
29409 /* INSN with smaller offset goes first. */
29410 off_val
= (int)(INTVAL (offset
));
29412 tmp
-= (off_val
& 0xfffff);
29414 tmp
+= ((- off_val
) & 0xfffff);
29419 #include "gt-arm.h"