Add assember CFI directives to millicode division and remainder routines.
[official-gcc.git] / gcc / config / arm / arm.cc
bloba46627bc375719c8ff30b6857ca864d71e130a4f
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
75 #include "aarch-common.h"
76 #include "aarch-common-protos.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode;
83 typedef struct minipool_fixup Mfix;
85 void (*arm_lang_output_object_attributes_hook)(void);
87 struct four_ints
89 int i[4];
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn *, bool);
185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
188 static void emit_constant_insn (rtx cond, rtx pattern);
189 static rtx_insn *emit_set_insn (rtx, rtx);
190 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
191 static rtx emit_multi_reg_push (unsigned long, unsigned long);
192 static void arm_emit_multi_reg_pop (unsigned long);
193 static int vfp_emit_fstmd (int, int);
194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
195 static int arm_arg_partial_bytes (cumulative_args_t,
196 const function_arg_info &);
197 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
198 static void arm_function_arg_advance (cumulative_args_t,
199 const function_arg_info &);
200 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
201 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
202 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
203 const_tree);
204 static rtx aapcs_libcall_value (machine_mode);
205 static int aapcs_select_return_coproc (const_tree, const_tree);
207 #ifdef OBJECT_FORMAT_ELF
208 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
209 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
210 #endif
211 #ifndef ARM_PE
212 static void arm_encode_section_info (tree, rtx, int);
213 #endif
215 static void arm_file_end (void);
216 static void arm_file_start (void);
217 static void arm_insert_attributes (tree, tree *);
219 static void arm_setup_incoming_varargs (cumulative_args_t,
220 const function_arg_info &, int *, int);
221 static bool arm_pass_by_reference (cumulative_args_t,
222 const function_arg_info &);
223 static bool arm_promote_prototypes (const_tree);
224 static bool arm_default_short_enums (void);
225 static bool arm_align_anon_bitfield (void);
226 static bool arm_return_in_msb (const_tree);
227 static bool arm_must_pass_in_stack (const function_arg_info &);
228 static bool arm_return_in_memory (const_tree, const_tree);
229 #if ARM_UNWIND_INFO
230 static void arm_unwind_emit (FILE *, rtx_insn *);
231 static bool arm_output_ttype (rtx);
232 static void arm_asm_emit_except_personality (rtx);
233 #endif
234 static void arm_asm_init_sections (void);
235 static rtx arm_dwarf_register_span (rtx);
237 static tree arm_cxx_guard_type (void);
238 static bool arm_cxx_guard_mask_bit (void);
239 static tree arm_get_cookie_size (tree);
240 static bool arm_cookie_has_size (void);
241 static bool arm_cxx_cdtor_returns_this (void);
242 static bool arm_cxx_key_method_may_be_inline (void);
243 static void arm_cxx_determine_class_data_visibility (tree);
244 static bool arm_cxx_class_data_always_comdat (void);
245 static bool arm_cxx_use_aeabi_atexit (void);
246 static void arm_init_libfuncs (void);
247 static tree arm_build_builtin_va_list (void);
248 static void arm_expand_builtin_va_start (tree, rtx);
249 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
250 static void arm_option_override (void);
251 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
252 struct cl_target_option *);
253 static void arm_override_options_after_change (void);
254 static void arm_option_print (FILE *, int, struct cl_target_option *);
255 static void arm_set_current_function (tree);
256 static bool arm_can_inline_p (tree, tree);
257 static void arm_relayout_function (tree);
258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_sched_can_speculate_insn (rtx_insn *);
261 static bool arm_macro_fusion_p (void);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
265 static int arm_first_cycle_multipass_dfa_lookahead (void);
266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
267 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
268 static bool arm_output_addr_const_extra (FILE *, rtx);
269 static bool arm_allocate_stack_slots_for_args (void);
270 static bool arm_warn_func_return (tree);
271 static tree arm_promoted_type (const_tree t);
272 static bool arm_scalar_mode_supported_p (scalar_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (scalar_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
302 rtx, const vec_perm_indices &);
304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
307 tree vectype,
308 int misalign ATTRIBUTE_UNUSED);
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
316 const_tree);
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
320 int reloc);
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
328 vec<machine_mode> &,
329 vec<const char *> &, vec<rtx> &,
330 HARD_REG_SET &, location_t);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
333 /* Table of machine attributes. */
334 static const struct attribute_spec arm_attribute_table[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
340 call. */
341 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
342 /* Whereas these functions are always known to reside within the 26 bit
343 addressing range. */
344 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
347 NULL },
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 NULL },
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute, NULL },
355 #ifdef ARM_PE
356 /* ARM/PE has three new attributes:
357 interfacearm - ?
358 dllexport - for exporting a function/variable that will live in a dll
359 dllimport - for importing a function/variable from a dll
361 Microsoft allows multiple declspecs in one __declspec, separating
362 them with spaces. We do NOT support this. Instead, use __declspec
363 multiple times.
365 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
366 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
367 { "interfacearm", 0, 0, true, false, false, false,
368 arm_handle_fndecl_attribute, NULL },
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
373 NULL },
374 { "notshared", 0, 0, false, true, false, false,
375 arm_handle_notshared_attribute, NULL },
376 #endif
377 /* ARMv8-M Security Extensions support. */
378 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379 arm_handle_cmse_nonsecure_entry, NULL },
380 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
381 arm_handle_cmse_nonsecure_call, NULL },
382 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
383 { NULL, 0, 0, false, false, false, false, NULL, NULL }
386 /* Initialize the GCC target structure. */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #endif
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
401 #undef TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
409 #undef TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
424 #undef TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
436 #undef TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
442 #undef TARGET_OPTION_RESTORE
443 #define TARGET_OPTION_RESTORE arm_option_restore
445 #undef TARGET_OPTION_PRINT
446 #define TARGET_OPTION_PRINT arm_option_print
448 #undef TARGET_COMP_TYPE_ATTRIBUTES
449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
454 #undef TARGET_SCHED_MACRO_FUSION_P
455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
460 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
463 #undef TARGET_SCHED_ADJUST_COST
464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
466 #undef TARGET_SET_CURRENT_FUNCTION
467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
472 #undef TARGET_SCHED_REORDER
473 #define TARGET_SCHED_REORDER arm_sched_reorder
475 #undef TARGET_REGISTER_MOVE_COST
476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
478 #undef TARGET_MEMORY_MOVE_COST
479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
481 #undef TARGET_ENCODE_SECTION_INFO
482 #ifdef ARM_PE
483 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
484 #else
485 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
486 #endif
488 #undef TARGET_STRIP_NAME_ENCODING
489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
491 #undef TARGET_ASM_INTERNAL_LABEL
492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
494 #undef TARGET_FLOATN_MODE
495 #define TARGET_FLOATN_MODE arm_floatn_mode
497 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
500 #undef TARGET_FUNCTION_VALUE
501 #define TARGET_FUNCTION_VALUE arm_function_value
503 #undef TARGET_LIBCALL_VALUE
504 #define TARGET_LIBCALL_VALUE arm_libcall_value
506 #undef TARGET_FUNCTION_VALUE_REGNO_P
507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
514 #undef TARGET_RTX_COSTS
515 #define TARGET_RTX_COSTS arm_rtx_costs
516 #undef TARGET_ADDRESS_COST
517 #define TARGET_ADDRESS_COST arm_address_cost
518 #undef TARGET_INSN_COST
519 #define TARGET_INSN_COST arm_insn_cost
521 #undef TARGET_SHIFT_TRUNCATION_MASK
522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
531 arm_autovectorize_vector_modes
533 #undef TARGET_MACHINE_DEPENDENT_REORG
534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
536 #undef TARGET_INIT_BUILTINS
537 #define TARGET_INIT_BUILTINS arm_init_builtins
538 #undef TARGET_EXPAND_BUILTIN
539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
540 #undef TARGET_BUILTIN_DECL
541 #define TARGET_BUILTIN_DECL arm_builtin_decl
543 #undef TARGET_INIT_LIBFUNCS
544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
548 #undef TARGET_PROMOTE_PROTOTYPES
549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
550 #undef TARGET_PASS_BY_REFERENCE
551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
552 #undef TARGET_ARG_PARTIAL_BYTES
553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
554 #undef TARGET_FUNCTION_ARG
555 #define TARGET_FUNCTION_ARG arm_function_arg
556 #undef TARGET_FUNCTION_ARG_ADVANCE
557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
558 #undef TARGET_FUNCTION_ARG_PADDING
559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
560 #undef TARGET_FUNCTION_ARG_BOUNDARY
561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
563 #undef TARGET_SETUP_INCOMING_VARARGS
564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
571 #undef TARGET_TRAMPOLINE_INIT
572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
576 #undef TARGET_WARN_FUNC_RETURN
577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
579 #undef TARGET_DEFAULT_SHORT_ENUMS
580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
582 #undef TARGET_ALIGN_ANON_BITFIELD
583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
585 #undef TARGET_NARROW_VOLATILE_BITFIELD
586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
588 #undef TARGET_CXX_GUARD_TYPE
589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
591 #undef TARGET_CXX_GUARD_MASK_BIT
592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
594 #undef TARGET_CXX_GET_COOKIE_SIZE
595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
597 #undef TARGET_CXX_COOKIE_HAS_SIZE
598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
606 #undef TARGET_CXX_USE_AEABI_ATEXIT
607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
611 arm_cxx_determine_class_data_visibility
613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
616 #undef TARGET_RETURN_IN_MSB
617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
619 #undef TARGET_RETURN_IN_MEMORY
620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
622 #undef TARGET_MUST_PASS_IN_STACK
623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
625 #if ARM_UNWIND_INFO
626 #undef TARGET_ASM_UNWIND_EMIT
627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
629 /* EABI unwinding tables use a different format for the typeinfo tables. */
630 #undef TARGET_ASM_TTYPE
631 #define TARGET_ASM_TTYPE arm_output_ttype
633 #undef TARGET_ARM_EABI_UNWINDER
634 #define TARGET_ARM_EABI_UNWINDER true
636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
639 #endif /* ARM_UNWIND_INFO */
641 #undef TARGET_ASM_INIT_SECTIONS
642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
644 #undef TARGET_DWARF_REGISTER_SPAN
645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
647 #undef TARGET_CANNOT_COPY_INSN_P
648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_HAVE_TLS
652 #define TARGET_HAVE_TLS true
653 #endif
655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
658 #undef TARGET_LEGITIMATE_CONSTANT_P
659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
661 #undef TARGET_CANNOT_FORCE_CONST_MEM
662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
664 #undef TARGET_MAX_ANCHOR_OFFSET
665 #define TARGET_MAX_ANCHOR_OFFSET 4095
667 /* The minimum is set such that the total size of the block
668 for a particular anchor is -4088 + 1 + 4095 bytes, which is
669 divisible by eight, ensuring natural spacing of anchors. */
670 #undef TARGET_MIN_ANCHOR_OFFSET
671 #define TARGET_MIN_ANCHOR_OFFSET -4088
673 #undef TARGET_SCHED_ISSUE_RATE
674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
676 #undef TARGET_SCHED_VARIABLE_ISSUE
677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
681 arm_first_cycle_multipass_dfa_lookahead
683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
685 arm_first_cycle_multipass_dfa_lookahead_guard
687 #undef TARGET_MANGLE_TYPE
688 #define TARGET_MANGLE_TYPE arm_mangle_type
690 #undef TARGET_INVALID_CONVERSION
691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
693 #undef TARGET_INVALID_UNARY_OP
694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
696 #undef TARGET_INVALID_BINARY_OP
697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
702 #undef TARGET_BUILD_BUILTIN_VA_LIST
703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
704 #undef TARGET_EXPAND_BUILTIN_VA_START
705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
709 #ifdef HAVE_AS_TLS
710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
712 #endif
714 #undef TARGET_LEGITIMATE_ADDRESS_P
715 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
717 #undef TARGET_PREFERRED_RELOAD_CLASS
718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
720 #undef TARGET_PROMOTED_TYPE
721 #define TARGET_PROMOTED_TYPE arm_promoted_type
723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
726 #undef TARGET_COMPUTE_FRAME_LAYOUT
727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
729 #undef TARGET_FRAME_POINTER_REQUIRED
730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
732 #undef TARGET_CAN_ELIMINATE
733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
738 #undef TARGET_CLASS_LIKELY_SPILLED_P
739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
741 #undef TARGET_VECTORIZE_BUILTINS
742 #define TARGET_VECTORIZE_BUILTINS
744 #undef TARGET_VECTOR_ALIGNMENT
745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
749 arm_vector_alignment_reachable
751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
753 arm_builtin_support_vector_misalignment
755 #undef TARGET_PREFERRED_RENAME_CLASS
756 #define TARGET_PREFERRED_RENAME_CLASS \
757 arm_preferred_rename_class
759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
764 arm_builtin_vectorization_cost
766 #undef TARGET_CANONICALIZE_COMPARISON
767 #define TARGET_CANONICALIZE_COMPARISON \
768 arm_canonicalize_comparison
770 #undef TARGET_ASAN_SHADOW_OFFSET
771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
773 #undef MAX_INSN_PER_IT_BLOCK
774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
776 #undef TARGET_CAN_USE_DOLOOP_P
777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
785 #undef TARGET_SCHED_FUSION_PRIORITY
786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
788 #undef TARGET_ASM_FUNCTION_SECTION
789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
794 #undef TARGET_SECTION_TYPE_FLAGS
795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
800 #undef TARGET_C_EXCESS_PRECISION
801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
803 /* Although the architecture reserves bits 0 and 1, only the former is
804 used for ARM/Thumb ISA selection in v7 and earlier versions. */
805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
808 #undef TARGET_FIXED_CONDITION_CODE_REGS
809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
811 #undef TARGET_HARD_REGNO_NREGS
812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
813 #undef TARGET_HARD_REGNO_MODE_OK
814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
816 #undef TARGET_MODES_TIEABLE_P
817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
819 #undef TARGET_CAN_CHANGE_MODE_CLASS
820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
822 #undef TARGET_CONSTANT_ALIGNMENT
823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
825 #undef TARGET_INVALID_WITHIN_DOLOOP
826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
828 #undef TARGET_MD_ASM_ADJUST
829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
831 #undef TARGET_STACK_PROTECT_GUARD
832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
834 #undef TARGET_VECTORIZE_GET_MASK_MODE
835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
837 /* Obstack for minipool constant handling. */
838 static struct obstack minipool_obstack;
839 static char * minipool_startobj;
841 /* The maximum number of insns skipped which
842 will be conditionalised if possible. */
843 static int max_insns_skipped = 5;
845 /* True if we are currently building a constant table. */
846 int making_const_table;
848 /* The processor for which instructions should be scheduled. */
849 enum processor_type arm_tune = TARGET_CPU_arm_none;
851 /* The current tuning set. */
852 const struct tune_params *current_tune;
854 /* Which floating point hardware to schedule for. */
855 int arm_fpu_attr;
857 /* Used for Thumb call_via trampolines. */
858 rtx thumb_call_via_label[14];
859 static int thumb_call_reg_needed;
861 /* The bits in this mask specify which instruction scheduling options should
862 be used. */
863 unsigned int tune_flags = 0;
865 /* The highest ARM architecture version supported by the
866 target. */
867 enum base_architecture arm_base_arch = BASE_ARCH_0;
869 /* Active target architecture and tuning. */
871 struct arm_build_target arm_active_target;
873 /* The following are used in the arm.md file as equivalents to bits
874 in the above two flag variables. */
876 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
877 int arm_arch4 = 0;
879 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
880 int arm_arch4t = 0;
882 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
883 int arm_arch5t = 0;
885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
886 int arm_arch5te = 0;
888 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
889 int arm_arch6 = 0;
891 /* Nonzero if this chip supports the ARM 6K extensions. */
892 int arm_arch6k = 0;
894 /* Nonzero if this chip supports the ARM 6KZ extensions. */
895 int arm_arch6kz = 0;
897 /* Nonzero if instructions present in ARMv6-M can be used. */
898 int arm_arch6m = 0;
900 /* Nonzero if this chip supports the ARM 7 extensions. */
901 int arm_arch7 = 0;
903 /* Nonzero if this chip supports the Large Physical Address Extension. */
904 int arm_arch_lpae = 0;
906 /* Nonzero if instructions not present in the 'M' profile can be used. */
907 int arm_arch_notm = 0;
909 /* Nonzero if instructions present in ARMv7E-M can be used. */
910 int arm_arch7em = 0;
912 /* Nonzero if instructions present in ARMv8 can be used. */
913 int arm_arch8 = 0;
915 /* Nonzero if this chip supports the ARMv8.1 extensions. */
916 int arm_arch8_1 = 0;
918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
919 int arm_arch8_2 = 0;
921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
922 int arm_arch8_3 = 0;
924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
925 int arm_arch8_4 = 0;
927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
928 extensions. */
929 int arm_arch8m_main = 0;
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932 extensions. */
933 int arm_arch8_1m_main = 0;
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936 Architecture 8.2. */
937 int arm_fp16_inst = 0;
939 /* Nonzero if this chip can benefit from load scheduling. */
940 int arm_ld_sched = 0;
942 /* Nonzero if this chip is a StrongARM. */
943 int arm_tune_strongarm = 0;
945 /* Nonzero if this chip supports Intel Wireless MMX technology. */
946 int arm_arch_iwmmxt = 0;
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
949 int arm_arch_iwmmxt2 = 0;
951 /* Nonzero if this chip is an XScale. */
952 int arm_arch_xscale = 0;
954 /* Nonzero if tuning for XScale */
955 int arm_tune_xscale = 0;
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958 This typically means an ARM6 or ARM7 with MMU or MPU. */
959 int arm_tune_wbuf = 0;
961 /* Nonzero if tuning for Cortex-A9. */
962 int arm_tune_cortex_a9 = 0;
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965 preprocessor.
966 XXX This is a bit of a hack, it's intended to help work around
967 problems in GLD which doesn't understand that armv5t code is
968 interworking clean. */
969 int arm_cpp_interwork = 0;
971 /* Nonzero if chip supports Thumb 1. */
972 int arm_arch_thumb1;
974 /* Nonzero if chip supports Thumb 2. */
975 int arm_arch_thumb2;
977 /* Nonzero if chip supports integer division instruction. */
978 int arm_arch_arm_hwdiv;
979 int arm_arch_thumb_hwdiv;
981 /* Nonzero if chip disallows volatile memory access in IT block. */
982 int arm_arch_no_volatile_ce;
984 /* Nonzero if we shouldn't use literal pools. */
985 bool arm_disable_literal_pool = false;
987 /* The register number to be used for the PIC offset register. */
988 unsigned arm_pic_register = INVALID_REGNUM;
990 enum arm_pcs arm_pcs_default;
992 /* For an explanation of these variables, see final_prescan_insn below. */
993 int arm_ccfsm_state;
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
995 enum arm_cond_code arm_current_cc;
997 rtx arm_target_insn;
998 int arm_target_label;
999 /* The number of conditionally executed insns, including the current insn. */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002 Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask. */
1005 int arm_condexec_masklen = 0;
1007 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1008 int arm_arch_crc = 0;
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1011 int arm_arch_dotprod = 0;
1013 /* Nonzero if chip supports the ARMv8-M security extensions. */
1014 int arm_arch_cmse = 0;
1016 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1017 int arm_m_profile_small_mul = 0;
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1020 int arm_arch_i8mm = 0;
1022 /* Nonzero if chip supports the BFloat16 instructions. */
1023 int arm_arch_bf16 = 0;
1025 /* Nonzero if chip supports the Custom Datapath Extension. */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1032 /* The condition codes of the ARM, and the inverse function. */
1033 static const char * const arm_condition_codes[] =
1035 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1040 int arm_regs_in_sequence[] =
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047 FP_SYSREGS
1049 #undef DEF_FP_SYSREG
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1054 #define THUMB2_WORK_REGS \
1055 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1056 | (1 << SP_REGNUM) \
1057 | (1 << PC_REGNUM) \
1058 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1059 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1060 : 0)))
1062 /* Initialization code. */
1064 struct cpu_tune
1066 enum processor_type scheduler;
1067 unsigned int tune_flags;
1068 const struct tune_params *tune;
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1074 num_slots, \
1075 l1_size, \
1076 l1_line_size \
1079 /* arm generic vectorizer costs. */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082 1, /* scalar_stmt_cost. */
1083 1, /* scalar load_cost. */
1084 1, /* scalar_store_cost. */
1085 1, /* vec_stmt_cost. */
1086 1, /* vec_to_scalar_cost. */
1087 1, /* scalar_to_vec_cost. */
1088 1, /* vec_align_load_cost. */
1089 1, /* vec_unalign_load_cost. */
1090 1, /* vec_unalign_store_cost. */
1091 1, /* vec_store_cost. */
1092 3, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1097 #include "aarch-cost-tables.h"
1101 const struct cpu_cost_table cortexa9_extra_costs =
1103 /* ALU */
1105 0, /* arith. */
1106 0, /* logical. */
1107 0, /* shift. */
1108 COSTS_N_INSNS (1), /* shift_reg. */
1109 COSTS_N_INSNS (1), /* arith_shift. */
1110 COSTS_N_INSNS (2), /* arith_shift_reg. */
1111 0, /* log_shift. */
1112 COSTS_N_INSNS (1), /* log_shift_reg. */
1113 COSTS_N_INSNS (1), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_arith. */
1115 COSTS_N_INSNS (1), /* bfi. */
1116 COSTS_N_INSNS (1), /* bfx. */
1117 0, /* clz. */
1118 0, /* rev. */
1119 0, /* non_exec. */
1120 true /* non_exec_costs_exec. */
1123 /* MULT SImode */
1125 COSTS_N_INSNS (3), /* simple. */
1126 COSTS_N_INSNS (3), /* flag_setting. */
1127 COSTS_N_INSNS (2), /* extend. */
1128 COSTS_N_INSNS (3), /* add. */
1129 COSTS_N_INSNS (2), /* extend_add. */
1130 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1132 /* MULT DImode */
1134 0, /* simple (N/A). */
1135 0, /* flag_setting (N/A). */
1136 COSTS_N_INSNS (4), /* extend. */
1137 0, /* add (N/A). */
1138 COSTS_N_INSNS (4), /* extend_add. */
1139 0 /* idiv (N/A). */
1142 /* LD/ST */
1144 COSTS_N_INSNS (2), /* load. */
1145 COSTS_N_INSNS (2), /* load_sign_extend. */
1146 COSTS_N_INSNS (2), /* ldrd. */
1147 COSTS_N_INSNS (2), /* ldm_1st. */
1148 1, /* ldm_regs_per_insn_1st. */
1149 2, /* ldm_regs_per_insn_subsequent. */
1150 COSTS_N_INSNS (5), /* loadf. */
1151 COSTS_N_INSNS (5), /* loadd. */
1152 COSTS_N_INSNS (1), /* load_unaligned. */
1153 COSTS_N_INSNS (2), /* store. */
1154 COSTS_N_INSNS (2), /* strd. */
1155 COSTS_N_INSNS (2), /* stm_1st. */
1156 1, /* stm_regs_per_insn_1st. */
1157 2, /* stm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* storef. */
1159 COSTS_N_INSNS (1), /* stored. */
1160 COSTS_N_INSNS (1), /* store_unaligned. */
1161 COSTS_N_INSNS (1), /* loadv. */
1162 COSTS_N_INSNS (1) /* storev. */
1165 /* FP SFmode */
1167 COSTS_N_INSNS (14), /* div. */
1168 COSTS_N_INSNS (4), /* mult. */
1169 COSTS_N_INSNS (7), /* mult_addsub. */
1170 COSTS_N_INSNS (30), /* fma. */
1171 COSTS_N_INSNS (3), /* addsub. */
1172 COSTS_N_INSNS (1), /* fpconst. */
1173 COSTS_N_INSNS (1), /* neg. */
1174 COSTS_N_INSNS (3), /* compare. */
1175 COSTS_N_INSNS (3), /* widen. */
1176 COSTS_N_INSNS (3), /* narrow. */
1177 COSTS_N_INSNS (3), /* toint. */
1178 COSTS_N_INSNS (3), /* fromint. */
1179 COSTS_N_INSNS (3) /* roundint. */
1181 /* FP DFmode */
1183 COSTS_N_INSNS (24), /* div. */
1184 COSTS_N_INSNS (5), /* mult. */
1185 COSTS_N_INSNS (8), /* mult_addsub. */
1186 COSTS_N_INSNS (30), /* fma. */
1187 COSTS_N_INSNS (3), /* addsub. */
1188 COSTS_N_INSNS (1), /* fpconst. */
1189 COSTS_N_INSNS (1), /* neg. */
1190 COSTS_N_INSNS (3), /* compare. */
1191 COSTS_N_INSNS (3), /* widen. */
1192 COSTS_N_INSNS (3), /* narrow. */
1193 COSTS_N_INSNS (3), /* toint. */
1194 COSTS_N_INSNS (3), /* fromint. */
1195 COSTS_N_INSNS (3) /* roundint. */
1198 /* Vector */
1200 COSTS_N_INSNS (1), /* alu. */
1201 COSTS_N_INSNS (4), /* mult. */
1202 COSTS_N_INSNS (1), /* movi. */
1203 COSTS_N_INSNS (2), /* dup. */
1204 COSTS_N_INSNS (2) /* extract. */
1208 const struct cpu_cost_table cortexa8_extra_costs =
1210 /* ALU */
1212 0, /* arith. */
1213 0, /* logical. */
1214 COSTS_N_INSNS (1), /* shift. */
1215 0, /* shift_reg. */
1216 COSTS_N_INSNS (1), /* arith_shift. */
1217 0, /* arith_shift_reg. */
1218 COSTS_N_INSNS (1), /* log_shift. */
1219 0, /* log_shift_reg. */
1220 0, /* extend. */
1221 0, /* extend_arith. */
1222 0, /* bfi. */
1223 0, /* bfx. */
1224 0, /* clz. */
1225 0, /* rev. */
1226 0, /* non_exec. */
1227 true /* non_exec_costs_exec. */
1230 /* MULT SImode */
1232 COSTS_N_INSNS (1), /* simple. */
1233 COSTS_N_INSNS (1), /* flag_setting. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* add. */
1236 COSTS_N_INSNS (1), /* extend_add. */
1237 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1239 /* MULT DImode */
1241 0, /* simple (N/A). */
1242 0, /* flag_setting (N/A). */
1243 COSTS_N_INSNS (2), /* extend. */
1244 0, /* add (N/A). */
1245 COSTS_N_INSNS (2), /* extend_add. */
1246 0 /* idiv (N/A). */
1249 /* LD/ST */
1251 COSTS_N_INSNS (1), /* load. */
1252 COSTS_N_INSNS (1), /* load_sign_extend. */
1253 COSTS_N_INSNS (1), /* ldrd. */
1254 COSTS_N_INSNS (1), /* ldm_1st. */
1255 1, /* ldm_regs_per_insn_1st. */
1256 2, /* ldm_regs_per_insn_subsequent. */
1257 COSTS_N_INSNS (1), /* loadf. */
1258 COSTS_N_INSNS (1), /* loadd. */
1259 COSTS_N_INSNS (1), /* load_unaligned. */
1260 COSTS_N_INSNS (1), /* store. */
1261 COSTS_N_INSNS (1), /* strd. */
1262 COSTS_N_INSNS (1), /* stm_1st. */
1263 1, /* stm_regs_per_insn_1st. */
1264 2, /* stm_regs_per_insn_subsequent. */
1265 COSTS_N_INSNS (1), /* storef. */
1266 COSTS_N_INSNS (1), /* stored. */
1267 COSTS_N_INSNS (1), /* store_unaligned. */
1268 COSTS_N_INSNS (1), /* loadv. */
1269 COSTS_N_INSNS (1) /* storev. */
1272 /* FP SFmode */
1274 COSTS_N_INSNS (36), /* div. */
1275 COSTS_N_INSNS (11), /* mult. */
1276 COSTS_N_INSNS (20), /* mult_addsub. */
1277 COSTS_N_INSNS (30), /* fma. */
1278 COSTS_N_INSNS (9), /* addsub. */
1279 COSTS_N_INSNS (3), /* fpconst. */
1280 COSTS_N_INSNS (3), /* neg. */
1281 COSTS_N_INSNS (6), /* compare. */
1282 COSTS_N_INSNS (4), /* widen. */
1283 COSTS_N_INSNS (4), /* narrow. */
1284 COSTS_N_INSNS (8), /* toint. */
1285 COSTS_N_INSNS (8), /* fromint. */
1286 COSTS_N_INSNS (8) /* roundint. */
1288 /* FP DFmode */
1290 COSTS_N_INSNS (64), /* div. */
1291 COSTS_N_INSNS (16), /* mult. */
1292 COSTS_N_INSNS (25), /* mult_addsub. */
1293 COSTS_N_INSNS (30), /* fma. */
1294 COSTS_N_INSNS (9), /* addsub. */
1295 COSTS_N_INSNS (3), /* fpconst. */
1296 COSTS_N_INSNS (3), /* neg. */
1297 COSTS_N_INSNS (6), /* compare. */
1298 COSTS_N_INSNS (6), /* widen. */
1299 COSTS_N_INSNS (6), /* narrow. */
1300 COSTS_N_INSNS (8), /* toint. */
1301 COSTS_N_INSNS (8), /* fromint. */
1302 COSTS_N_INSNS (8) /* roundint. */
1305 /* Vector */
1307 COSTS_N_INSNS (1), /* alu. */
1308 COSTS_N_INSNS (4), /* mult. */
1309 COSTS_N_INSNS (1), /* movi. */
1310 COSTS_N_INSNS (2), /* dup. */
1311 COSTS_N_INSNS (2) /* extract. */
1315 const struct cpu_cost_table cortexa5_extra_costs =
1317 /* ALU */
1319 0, /* arith. */
1320 0, /* logical. */
1321 COSTS_N_INSNS (1), /* shift. */
1322 COSTS_N_INSNS (1), /* shift_reg. */
1323 COSTS_N_INSNS (1), /* arith_shift. */
1324 COSTS_N_INSNS (1), /* arith_shift_reg. */
1325 COSTS_N_INSNS (1), /* log_shift. */
1326 COSTS_N_INSNS (1), /* log_shift_reg. */
1327 COSTS_N_INSNS (1), /* extend. */
1328 COSTS_N_INSNS (1), /* extend_arith. */
1329 COSTS_N_INSNS (1), /* bfi. */
1330 COSTS_N_INSNS (1), /* bfx. */
1331 COSTS_N_INSNS (1), /* clz. */
1332 COSTS_N_INSNS (1), /* rev. */
1333 0, /* non_exec. */
1334 true /* non_exec_costs_exec. */
1338 /* MULT SImode */
1340 0, /* simple. */
1341 COSTS_N_INSNS (1), /* flag_setting. */
1342 COSTS_N_INSNS (1), /* extend. */
1343 COSTS_N_INSNS (1), /* add. */
1344 COSTS_N_INSNS (1), /* extend_add. */
1345 COSTS_N_INSNS (7) /* idiv. */
1347 /* MULT DImode */
1349 0, /* simple (N/A). */
1350 0, /* flag_setting (N/A). */
1351 COSTS_N_INSNS (1), /* extend. */
1352 0, /* add. */
1353 COSTS_N_INSNS (2), /* extend_add. */
1354 0 /* idiv (N/A). */
1357 /* LD/ST */
1359 COSTS_N_INSNS (1), /* load. */
1360 COSTS_N_INSNS (1), /* load_sign_extend. */
1361 COSTS_N_INSNS (6), /* ldrd. */
1362 COSTS_N_INSNS (1), /* ldm_1st. */
1363 1, /* ldm_regs_per_insn_1st. */
1364 2, /* ldm_regs_per_insn_subsequent. */
1365 COSTS_N_INSNS (2), /* loadf. */
1366 COSTS_N_INSNS (4), /* loadd. */
1367 COSTS_N_INSNS (1), /* load_unaligned. */
1368 COSTS_N_INSNS (1), /* store. */
1369 COSTS_N_INSNS (3), /* strd. */
1370 COSTS_N_INSNS (1), /* stm_1st. */
1371 1, /* stm_regs_per_insn_1st. */
1372 2, /* stm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* storef. */
1374 COSTS_N_INSNS (2), /* stored. */
1375 COSTS_N_INSNS (1), /* store_unaligned. */
1376 COSTS_N_INSNS (1), /* loadv. */
1377 COSTS_N_INSNS (1) /* storev. */
1380 /* FP SFmode */
1382 COSTS_N_INSNS (15), /* div. */
1383 COSTS_N_INSNS (3), /* mult. */
1384 COSTS_N_INSNS (7), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1396 /* FP DFmode */
1398 COSTS_N_INSNS (30), /* div. */
1399 COSTS_N_INSNS (6), /* mult. */
1400 COSTS_N_INSNS (10), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1413 /* Vector */
1415 COSTS_N_INSNS (1), /* alu. */
1416 COSTS_N_INSNS (4), /* mult. */
1417 COSTS_N_INSNS (1), /* movi. */
1418 COSTS_N_INSNS (2), /* dup. */
1419 COSTS_N_INSNS (2) /* extract. */
1424 const struct cpu_cost_table cortexa7_extra_costs =
1426 /* ALU */
1428 0, /* arith. */
1429 0, /* logical. */
1430 COSTS_N_INSNS (1), /* shift. */
1431 COSTS_N_INSNS (1), /* shift_reg. */
1432 COSTS_N_INSNS (1), /* arith_shift. */
1433 COSTS_N_INSNS (1), /* arith_shift_reg. */
1434 COSTS_N_INSNS (1), /* log_shift. */
1435 COSTS_N_INSNS (1), /* log_shift_reg. */
1436 COSTS_N_INSNS (1), /* extend. */
1437 COSTS_N_INSNS (1), /* extend_arith. */
1438 COSTS_N_INSNS (1), /* bfi. */
1439 COSTS_N_INSNS (1), /* bfx. */
1440 COSTS_N_INSNS (1), /* clz. */
1441 COSTS_N_INSNS (1), /* rev. */
1442 0, /* non_exec. */
1443 true /* non_exec_costs_exec. */
1447 /* MULT SImode */
1449 0, /* simple. */
1450 COSTS_N_INSNS (1), /* flag_setting. */
1451 COSTS_N_INSNS (1), /* extend. */
1452 COSTS_N_INSNS (1), /* add. */
1453 COSTS_N_INSNS (1), /* extend_add. */
1454 COSTS_N_INSNS (7) /* idiv. */
1456 /* MULT DImode */
1458 0, /* simple (N/A). */
1459 0, /* flag_setting (N/A). */
1460 COSTS_N_INSNS (1), /* extend. */
1461 0, /* add. */
1462 COSTS_N_INSNS (2), /* extend_add. */
1463 0 /* idiv (N/A). */
1466 /* LD/ST */
1468 COSTS_N_INSNS (1), /* load. */
1469 COSTS_N_INSNS (1), /* load_sign_extend. */
1470 COSTS_N_INSNS (3), /* ldrd. */
1471 COSTS_N_INSNS (1), /* ldm_1st. */
1472 1, /* ldm_regs_per_insn_1st. */
1473 2, /* ldm_regs_per_insn_subsequent. */
1474 COSTS_N_INSNS (2), /* loadf. */
1475 COSTS_N_INSNS (2), /* loadd. */
1476 COSTS_N_INSNS (1), /* load_unaligned. */
1477 COSTS_N_INSNS (1), /* store. */
1478 COSTS_N_INSNS (3), /* strd. */
1479 COSTS_N_INSNS (1), /* stm_1st. */
1480 1, /* stm_regs_per_insn_1st. */
1481 2, /* stm_regs_per_insn_subsequent. */
1482 COSTS_N_INSNS (2), /* storef. */
1483 COSTS_N_INSNS (2), /* stored. */
1484 COSTS_N_INSNS (1), /* store_unaligned. */
1485 COSTS_N_INSNS (1), /* loadv. */
1486 COSTS_N_INSNS (1) /* storev. */
1489 /* FP SFmode */
1491 COSTS_N_INSNS (15), /* div. */
1492 COSTS_N_INSNS (3), /* mult. */
1493 COSTS_N_INSNS (7), /* mult_addsub. */
1494 COSTS_N_INSNS (7), /* fma. */
1495 COSTS_N_INSNS (3), /* addsub. */
1496 COSTS_N_INSNS (3), /* fpconst. */
1497 COSTS_N_INSNS (3), /* neg. */
1498 COSTS_N_INSNS (3), /* compare. */
1499 COSTS_N_INSNS (3), /* widen. */
1500 COSTS_N_INSNS (3), /* narrow. */
1501 COSTS_N_INSNS (3), /* toint. */
1502 COSTS_N_INSNS (3), /* fromint. */
1503 COSTS_N_INSNS (3) /* roundint. */
1505 /* FP DFmode */
1507 COSTS_N_INSNS (30), /* div. */
1508 COSTS_N_INSNS (6), /* mult. */
1509 COSTS_N_INSNS (10), /* mult_addsub. */
1510 COSTS_N_INSNS (7), /* fma. */
1511 COSTS_N_INSNS (3), /* addsub. */
1512 COSTS_N_INSNS (3), /* fpconst. */
1513 COSTS_N_INSNS (3), /* neg. */
1514 COSTS_N_INSNS (3), /* compare. */
1515 COSTS_N_INSNS (3), /* widen. */
1516 COSTS_N_INSNS (3), /* narrow. */
1517 COSTS_N_INSNS (3), /* toint. */
1518 COSTS_N_INSNS (3), /* fromint. */
1519 COSTS_N_INSNS (3) /* roundint. */
1522 /* Vector */
1524 COSTS_N_INSNS (1), /* alu. */
1525 COSTS_N_INSNS (4), /* mult. */
1526 COSTS_N_INSNS (1), /* movi. */
1527 COSTS_N_INSNS (2), /* dup. */
1528 COSTS_N_INSNS (2) /* extract. */
1532 const struct cpu_cost_table cortexa12_extra_costs =
1534 /* ALU */
1536 0, /* arith. */
1537 0, /* logical. */
1538 0, /* shift. */
1539 COSTS_N_INSNS (1), /* shift_reg. */
1540 COSTS_N_INSNS (1), /* arith_shift. */
1541 COSTS_N_INSNS (1), /* arith_shift_reg. */
1542 COSTS_N_INSNS (1), /* log_shift. */
1543 COSTS_N_INSNS (1), /* log_shift_reg. */
1544 0, /* extend. */
1545 COSTS_N_INSNS (1), /* extend_arith. */
1546 0, /* bfi. */
1547 COSTS_N_INSNS (1), /* bfx. */
1548 COSTS_N_INSNS (1), /* clz. */
1549 COSTS_N_INSNS (1), /* rev. */
1550 0, /* non_exec. */
1551 true /* non_exec_costs_exec. */
1553 /* MULT SImode */
1556 COSTS_N_INSNS (2), /* simple. */
1557 COSTS_N_INSNS (3), /* flag_setting. */
1558 COSTS_N_INSNS (2), /* extend. */
1559 COSTS_N_INSNS (3), /* add. */
1560 COSTS_N_INSNS (2), /* extend_add. */
1561 COSTS_N_INSNS (18) /* idiv. */
1563 /* MULT DImode */
1565 0, /* simple (N/A). */
1566 0, /* flag_setting (N/A). */
1567 COSTS_N_INSNS (3), /* extend. */
1568 0, /* add (N/A). */
1569 COSTS_N_INSNS (3), /* extend_add. */
1570 0 /* idiv (N/A). */
1573 /* LD/ST */
1575 COSTS_N_INSNS (3), /* load. */
1576 COSTS_N_INSNS (3), /* load_sign_extend. */
1577 COSTS_N_INSNS (3), /* ldrd. */
1578 COSTS_N_INSNS (3), /* ldm_1st. */
1579 1, /* ldm_regs_per_insn_1st. */
1580 2, /* ldm_regs_per_insn_subsequent. */
1581 COSTS_N_INSNS (3), /* loadf. */
1582 COSTS_N_INSNS (3), /* loadd. */
1583 0, /* load_unaligned. */
1584 0, /* store. */
1585 0, /* strd. */
1586 0, /* stm_1st. */
1587 1, /* stm_regs_per_insn_1st. */
1588 2, /* stm_regs_per_insn_subsequent. */
1589 COSTS_N_INSNS (2), /* storef. */
1590 COSTS_N_INSNS (2), /* stored. */
1591 0, /* store_unaligned. */
1592 COSTS_N_INSNS (1), /* loadv. */
1593 COSTS_N_INSNS (1) /* storev. */
1596 /* FP SFmode */
1598 COSTS_N_INSNS (17), /* div. */
1599 COSTS_N_INSNS (4), /* mult. */
1600 COSTS_N_INSNS (8), /* mult_addsub. */
1601 COSTS_N_INSNS (8), /* fma. */
1602 COSTS_N_INSNS (4), /* addsub. */
1603 COSTS_N_INSNS (2), /* fpconst. */
1604 COSTS_N_INSNS (2), /* neg. */
1605 COSTS_N_INSNS (2), /* compare. */
1606 COSTS_N_INSNS (4), /* widen. */
1607 COSTS_N_INSNS (4), /* narrow. */
1608 COSTS_N_INSNS (4), /* toint. */
1609 COSTS_N_INSNS (4), /* fromint. */
1610 COSTS_N_INSNS (4) /* roundint. */
1612 /* FP DFmode */
1614 COSTS_N_INSNS (31), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (2), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1629 /* Vector */
1631 COSTS_N_INSNS (1), /* alu. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (1), /* movi. */
1634 COSTS_N_INSNS (2), /* dup. */
1635 COSTS_N_INSNS (2) /* extract. */
1639 const struct cpu_cost_table cortexa15_extra_costs =
1641 /* ALU */
1643 0, /* arith. */
1644 0, /* logical. */
1645 0, /* shift. */
1646 0, /* shift_reg. */
1647 COSTS_N_INSNS (1), /* arith_shift. */
1648 COSTS_N_INSNS (1), /* arith_shift_reg. */
1649 COSTS_N_INSNS (1), /* log_shift. */
1650 COSTS_N_INSNS (1), /* log_shift_reg. */
1651 0, /* extend. */
1652 COSTS_N_INSNS (1), /* extend_arith. */
1653 COSTS_N_INSNS (1), /* bfi. */
1654 0, /* bfx. */
1655 0, /* clz. */
1656 0, /* rev. */
1657 0, /* non_exec. */
1658 true /* non_exec_costs_exec. */
1660 /* MULT SImode */
1663 COSTS_N_INSNS (2), /* simple. */
1664 COSTS_N_INSNS (3), /* flag_setting. */
1665 COSTS_N_INSNS (2), /* extend. */
1666 COSTS_N_INSNS (2), /* add. */
1667 COSTS_N_INSNS (2), /* extend_add. */
1668 COSTS_N_INSNS (18) /* idiv. */
1670 /* MULT DImode */
1672 0, /* simple (N/A). */
1673 0, /* flag_setting (N/A). */
1674 COSTS_N_INSNS (3), /* extend. */
1675 0, /* add (N/A). */
1676 COSTS_N_INSNS (3), /* extend_add. */
1677 0 /* idiv (N/A). */
1680 /* LD/ST */
1682 COSTS_N_INSNS (3), /* load. */
1683 COSTS_N_INSNS (3), /* load_sign_extend. */
1684 COSTS_N_INSNS (3), /* ldrd. */
1685 COSTS_N_INSNS (4), /* ldm_1st. */
1686 1, /* ldm_regs_per_insn_1st. */
1687 2, /* ldm_regs_per_insn_subsequent. */
1688 COSTS_N_INSNS (4), /* loadf. */
1689 COSTS_N_INSNS (4), /* loadd. */
1690 0, /* load_unaligned. */
1691 0, /* store. */
1692 0, /* strd. */
1693 COSTS_N_INSNS (1), /* stm_1st. */
1694 1, /* stm_regs_per_insn_1st. */
1695 2, /* stm_regs_per_insn_subsequent. */
1696 0, /* storef. */
1697 0, /* stored. */
1698 0, /* store_unaligned. */
1699 COSTS_N_INSNS (1), /* loadv. */
1700 COSTS_N_INSNS (1) /* storev. */
1703 /* FP SFmode */
1705 COSTS_N_INSNS (17), /* div. */
1706 COSTS_N_INSNS (4), /* mult. */
1707 COSTS_N_INSNS (8), /* mult_addsub. */
1708 COSTS_N_INSNS (8), /* fma. */
1709 COSTS_N_INSNS (4), /* addsub. */
1710 COSTS_N_INSNS (2), /* fpconst. */
1711 COSTS_N_INSNS (2), /* neg. */
1712 COSTS_N_INSNS (5), /* compare. */
1713 COSTS_N_INSNS (4), /* widen. */
1714 COSTS_N_INSNS (4), /* narrow. */
1715 COSTS_N_INSNS (4), /* toint. */
1716 COSTS_N_INSNS (4), /* fromint. */
1717 COSTS_N_INSNS (4) /* roundint. */
1719 /* FP DFmode */
1721 COSTS_N_INSNS (31), /* div. */
1722 COSTS_N_INSNS (4), /* mult. */
1723 COSTS_N_INSNS (8), /* mult_addsub. */
1724 COSTS_N_INSNS (8), /* fma. */
1725 COSTS_N_INSNS (4), /* addsub. */
1726 COSTS_N_INSNS (2), /* fpconst. */
1727 COSTS_N_INSNS (2), /* neg. */
1728 COSTS_N_INSNS (2), /* compare. */
1729 COSTS_N_INSNS (4), /* widen. */
1730 COSTS_N_INSNS (4), /* narrow. */
1731 COSTS_N_INSNS (4), /* toint. */
1732 COSTS_N_INSNS (4), /* fromint. */
1733 COSTS_N_INSNS (4) /* roundint. */
1736 /* Vector */
1738 COSTS_N_INSNS (1), /* alu. */
1739 COSTS_N_INSNS (4), /* mult. */
1740 COSTS_N_INSNS (1), /* movi. */
1741 COSTS_N_INSNS (2), /* dup. */
1742 COSTS_N_INSNS (2) /* extract. */
1746 const struct cpu_cost_table v7m_extra_costs =
1748 /* ALU */
1750 0, /* arith. */
1751 0, /* logical. */
1752 0, /* shift. */
1753 0, /* shift_reg. */
1754 0, /* arith_shift. */
1755 COSTS_N_INSNS (1), /* arith_shift_reg. */
1756 0, /* log_shift. */
1757 COSTS_N_INSNS (1), /* log_shift_reg. */
1758 0, /* extend. */
1759 COSTS_N_INSNS (1), /* extend_arith. */
1760 0, /* bfi. */
1761 0, /* bfx. */
1762 0, /* clz. */
1763 0, /* rev. */
1764 COSTS_N_INSNS (1), /* non_exec. */
1765 false /* non_exec_costs_exec. */
1768 /* MULT SImode */
1770 COSTS_N_INSNS (1), /* simple. */
1771 COSTS_N_INSNS (1), /* flag_setting. */
1772 COSTS_N_INSNS (2), /* extend. */
1773 COSTS_N_INSNS (1), /* add. */
1774 COSTS_N_INSNS (3), /* extend_add. */
1775 COSTS_N_INSNS (8) /* idiv. */
1777 /* MULT DImode */
1779 0, /* simple (N/A). */
1780 0, /* flag_setting (N/A). */
1781 COSTS_N_INSNS (2), /* extend. */
1782 0, /* add (N/A). */
1783 COSTS_N_INSNS (3), /* extend_add. */
1784 0 /* idiv (N/A). */
1787 /* LD/ST */
1789 COSTS_N_INSNS (2), /* load. */
1790 0, /* load_sign_extend. */
1791 COSTS_N_INSNS (3), /* ldrd. */
1792 COSTS_N_INSNS (2), /* ldm_1st. */
1793 1, /* ldm_regs_per_insn_1st. */
1794 1, /* ldm_regs_per_insn_subsequent. */
1795 COSTS_N_INSNS (2), /* loadf. */
1796 COSTS_N_INSNS (3), /* loadd. */
1797 COSTS_N_INSNS (1), /* load_unaligned. */
1798 COSTS_N_INSNS (2), /* store. */
1799 COSTS_N_INSNS (3), /* strd. */
1800 COSTS_N_INSNS (2), /* stm_1st. */
1801 1, /* stm_regs_per_insn_1st. */
1802 1, /* stm_regs_per_insn_subsequent. */
1803 COSTS_N_INSNS (2), /* storef. */
1804 COSTS_N_INSNS (3), /* stored. */
1805 COSTS_N_INSNS (1), /* store_unaligned. */
1806 COSTS_N_INSNS (1), /* loadv. */
1807 COSTS_N_INSNS (1) /* storev. */
1810 /* FP SFmode */
1812 COSTS_N_INSNS (7), /* div. */
1813 COSTS_N_INSNS (2), /* mult. */
1814 COSTS_N_INSNS (5), /* mult_addsub. */
1815 COSTS_N_INSNS (3), /* fma. */
1816 COSTS_N_INSNS (1), /* addsub. */
1817 0, /* fpconst. */
1818 0, /* neg. */
1819 0, /* compare. */
1820 0, /* widen. */
1821 0, /* narrow. */
1822 0, /* toint. */
1823 0, /* fromint. */
1824 0 /* roundint. */
1826 /* FP DFmode */
1828 COSTS_N_INSNS (15), /* div. */
1829 COSTS_N_INSNS (5), /* mult. */
1830 COSTS_N_INSNS (7), /* mult_addsub. */
1831 COSTS_N_INSNS (7), /* fma. */
1832 COSTS_N_INSNS (3), /* addsub. */
1833 0, /* fpconst. */
1834 0, /* neg. */
1835 0, /* compare. */
1836 0, /* widen. */
1837 0, /* narrow. */
1838 0, /* toint. */
1839 0, /* fromint. */
1840 0 /* roundint. */
1843 /* Vector */
1845 COSTS_N_INSNS (1), /* alu. */
1846 COSTS_N_INSNS (4), /* mult. */
1847 COSTS_N_INSNS (1), /* movi. */
1848 COSTS_N_INSNS (2), /* dup. */
1849 COSTS_N_INSNS (2) /* extract. */
1853 const struct addr_mode_cost_table generic_addr_mode_costs =
1855 /* int. */
1857 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1858 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1859 COSTS_N_INSNS (0) /* AMO_WB. */
1861 /* float. */
1863 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1864 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1865 COSTS_N_INSNS (0) /* AMO_WB. */
1867 /* vector. */
1869 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1870 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1871 COSTS_N_INSNS (0) /* AMO_WB. */
1875 const struct tune_params arm_slowmul_tune =
1877 &generic_extra_costs, /* Insn extra costs. */
1878 &generic_addr_mode_costs, /* Addressing mode costs. */
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 3, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 1, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_TRUE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE,
1893 tune_params::FUSE_NOTHING,
1894 tune_params::SCHED_AUTOPREF_OFF
1897 const struct tune_params arm_fastmul_tune =
1899 &generic_extra_costs, /* Insn extra costs. */
1900 &generic_addr_mode_costs, /* Addressing mode costs. */
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_TRUE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_STRINGOPS_FALSE,
1915 tune_params::FUSE_NOTHING,
1916 tune_params::SCHED_AUTOPREF_OFF
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920 skipping is shorter. Set max_insns_skipped to a lower value. */
1922 const struct tune_params arm_strongarm_tune =
1924 &generic_extra_costs, /* Insn extra costs. */
1925 &generic_addr_mode_costs, /* Addressing mode costs. */
1926 NULL, /* Sched adj cost. */
1927 arm_default_branch_cost,
1928 &arm_default_vec_cost,
1929 1, /* Constant limit. */
1930 3, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 1, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 tune_params::PREF_CONST_POOL_TRUE,
1935 tune_params::PREF_LDRD_FALSE,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_NEITHER,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_OFF
1944 const struct tune_params arm_xscale_tune =
1946 &generic_extra_costs, /* Insn extra costs. */
1947 &generic_addr_mode_costs, /* Addressing mode costs. */
1948 xscale_sched_adjust_cost,
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 2, /* Constant limit. */
1952 3, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_TRUE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_STRINGOPS_FALSE,
1962 tune_params::FUSE_NOTHING,
1963 tune_params::SCHED_AUTOPREF_OFF
1966 const struct tune_params arm_9e_tune =
1968 &generic_extra_costs, /* Insn extra costs. */
1969 &generic_addr_mode_costs, /* Addressing mode costs. */
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 1, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_TRUE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_STRINGOPS_FALSE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1988 const struct tune_params arm_marvell_pj4_tune =
1990 &generic_extra_costs, /* Insn extra costs. */
1991 &generic_addr_mode_costs, /* Addressing mode costs. */
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 5, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 2, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_TRUE,
2001 tune_params::PREF_LDRD_FALSE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_NEITHER,
2005 tune_params::PREF_NEON_STRINGOPS_FALSE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_OFF
2010 const struct tune_params arm_v6t2_tune =
2012 &generic_extra_costs, /* Insn extra costs. */
2013 &generic_addr_mode_costs, /* Addressing mode costs. */
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 1, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_FALSE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_NEITHER,
2027 tune_params::PREF_NEON_STRINGOPS_FALSE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2033 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2034 const struct tune_params arm_cortex_tune =
2036 &generic_extra_costs,
2037 &generic_addr_mode_costs, /* Addressing mode costs. */
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_STRINGOPS_FALSE,
2052 tune_params::FUSE_NOTHING,
2053 tune_params::SCHED_AUTOPREF_OFF
2056 const struct tune_params arm_cortex_a8_tune =
2058 &cortexa8_extra_costs,
2059 &generic_addr_mode_costs, /* Addressing mode costs. */
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 5, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_FALSE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 tune_params::FUSE_NOTHING,
2075 tune_params::SCHED_AUTOPREF_OFF
2078 const struct tune_params arm_cortex_a7_tune =
2080 &cortexa7_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_STRINGOPS_TRUE,
2096 tune_params::FUSE_NOTHING,
2097 tune_params::SCHED_AUTOPREF_OFF
2100 const struct tune_params arm_cortex_a15_tune =
2102 &cortexa15_extra_costs,
2103 &generic_addr_mode_costs, /* Addressing mode costs. */
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 8, /* Memset max inline. */
2110 3, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE,
2118 tune_params::FUSE_NOTHING,
2119 tune_params::SCHED_AUTOPREF_FULL
2122 const struct tune_params arm_cortex_a35_tune =
2124 &cortexa53_extra_costs,
2125 &generic_addr_mode_costs, /* Addressing mode costs. */
2126 NULL, /* Sched adj cost. */
2127 arm_default_branch_cost,
2128 &arm_default_vec_cost,
2129 1, /* Constant limit. */
2130 5, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 1, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL,
2134 tune_params::PREF_CONST_POOL_FALSE,
2135 tune_params::PREF_LDRD_FALSE,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER,
2139 tune_params::PREF_NEON_STRINGOPS_TRUE,
2140 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2141 tune_params::SCHED_AUTOPREF_OFF
2144 const struct tune_params arm_cortex_a53_tune =
2146 &cortexa53_extra_costs,
2147 &generic_addr_mode_costs, /* Addressing mode costs. */
2148 NULL, /* Sched adj cost. */
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL,
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_FALSE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2163 tune_params::SCHED_AUTOPREF_OFF
2166 const struct tune_params arm_cortex_a57_tune =
2168 &cortexa57_extra_costs,
2169 &generic_addr_mode_costs, /* addressing mode costs */
2170 NULL, /* Sched adj cost. */
2171 arm_default_branch_cost,
2172 &arm_default_vec_cost,
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 3, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL,
2178 tune_params::PREF_CONST_POOL_FALSE,
2179 tune_params::PREF_LDRD_TRUE,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE,
2184 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2185 tune_params::SCHED_AUTOPREF_FULL
2188 const struct tune_params arm_exynosm1_tune =
2190 &exynosm1_extra_costs,
2191 &generic_addr_mode_costs, /* Addressing mode costs. */
2192 NULL, /* Sched adj cost. */
2193 arm_default_branch_cost,
2194 &arm_default_vec_cost,
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 3, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL,
2200 tune_params::PREF_CONST_POOL_FALSE,
2201 tune_params::PREF_LDRD_TRUE,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL,
2205 tune_params::PREF_NEON_STRINGOPS_TRUE,
2206 tune_params::FUSE_NOTHING,
2207 tune_params::SCHED_AUTOPREF_OFF
2210 const struct tune_params arm_xgene1_tune =
2212 &xgene1_extra_costs,
2213 &generic_addr_mode_costs, /* Addressing mode costs. */
2214 NULL, /* Sched adj cost. */
2215 arm_default_branch_cost,
2216 &arm_default_vec_cost,
2217 1, /* Constant limit. */
2218 2, /* Max cond insns. */
2219 32, /* Memset max inline. */
2220 4, /* Issue rate. */
2221 ARM_PREFETCH_NOT_BENEFICIAL,
2222 tune_params::PREF_CONST_POOL_FALSE,
2223 tune_params::PREF_LDRD_TRUE,
2224 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2226 tune_params::DISPARAGE_FLAGS_ALL,
2227 tune_params::PREF_NEON_STRINGOPS_FALSE,
2228 tune_params::FUSE_NOTHING,
2229 tune_params::SCHED_AUTOPREF_OFF
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233 less appealing. Set max_insns_skipped to a low value. */
2235 const struct tune_params arm_cortex_a5_tune =
2237 &cortexa5_extra_costs,
2238 &generic_addr_mode_costs, /* Addressing mode costs. */
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_a5_branch_cost,
2241 &arm_default_vec_cost,
2242 1, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_FALSE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_STRINGOPS_TRUE,
2253 tune_params::FUSE_NOTHING,
2254 tune_params::SCHED_AUTOPREF_OFF
2257 const struct tune_params arm_cortex_a9_tune =
2259 &cortexa9_extra_costs,
2260 &generic_addr_mode_costs, /* Addressing mode costs. */
2261 cortex_a9_sched_adjust_cost,
2262 arm_default_branch_cost,
2263 &arm_default_vec_cost,
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 2, /* Issue rate. */
2268 ARM_PREFETCH_BENEFICIAL(4,32,32),
2269 tune_params::PREF_CONST_POOL_FALSE,
2270 tune_params::PREF_LDRD_FALSE,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER,
2274 tune_params::PREF_NEON_STRINGOPS_FALSE,
2275 tune_params::FUSE_NOTHING,
2276 tune_params::SCHED_AUTOPREF_OFF
2279 const struct tune_params arm_cortex_a12_tune =
2281 &cortexa12_extra_costs,
2282 &generic_addr_mode_costs, /* Addressing mode costs. */
2283 NULL, /* Sched adj cost. */
2284 arm_default_branch_cost,
2285 &arm_default_vec_cost, /* Vectorizer costs. */
2286 1, /* Constant limit. */
2287 2, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL,
2291 tune_params::PREF_CONST_POOL_FALSE,
2292 tune_params::PREF_LDRD_TRUE,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_ALL,
2296 tune_params::PREF_NEON_STRINGOPS_TRUE,
2297 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2298 tune_params::SCHED_AUTOPREF_OFF
2301 const struct tune_params arm_cortex_a73_tune =
2303 &cortexa57_extra_costs,
2304 &generic_addr_mode_costs, /* Addressing mode costs. */
2305 NULL, /* Sched adj cost. */
2306 arm_default_branch_cost,
2307 &arm_default_vec_cost, /* Vectorizer costs. */
2308 1, /* Constant limit. */
2309 2, /* Max cond insns. */
2310 8, /* Memset max inline. */
2311 2, /* Issue rate. */
2312 ARM_PREFETCH_NOT_BENEFICIAL,
2313 tune_params::PREF_CONST_POOL_FALSE,
2314 tune_params::PREF_LDRD_TRUE,
2315 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2316 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2317 tune_params::DISPARAGE_FLAGS_ALL,
2318 tune_params::PREF_NEON_STRINGOPS_TRUE,
2319 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2320 tune_params::SCHED_AUTOPREF_FULL
2323 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2324 cycle to execute each. An LDR from the constant pool also takes two cycles
2325 to execute, but mildly increases pipelining opportunity (consecutive
2326 loads/stores can be pipelined together, saving one cycle), and may also
2327 improve icache utilisation. Hence we prefer the constant pool for such
2328 processors. */
2330 const struct tune_params arm_v7m_tune =
2332 &v7m_extra_costs,
2333 &generic_addr_mode_costs, /* Addressing mode costs. */
2334 NULL, /* Sched adj cost. */
2335 arm_cortex_m_branch_cost,
2336 &arm_default_vec_cost,
2337 1, /* Constant limit. */
2338 2, /* Max cond insns. */
2339 8, /* Memset max inline. */
2340 1, /* Issue rate. */
2341 ARM_PREFETCH_NOT_BENEFICIAL,
2342 tune_params::PREF_CONST_POOL_TRUE,
2343 tune_params::PREF_LDRD_FALSE,
2344 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2345 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2346 tune_params::DISPARAGE_FLAGS_NEITHER,
2347 tune_params::PREF_NEON_STRINGOPS_FALSE,
2348 tune_params::FUSE_NOTHING,
2349 tune_params::SCHED_AUTOPREF_OFF
2352 /* Cortex-M7 tuning. */
2354 const struct tune_params arm_cortex_m7_tune =
2356 &v7m_extra_costs,
2357 &generic_addr_mode_costs, /* Addressing mode costs. */
2358 NULL, /* Sched adj cost. */
2359 arm_cortex_m7_branch_cost,
2360 &arm_default_vec_cost,
2361 0, /* Constant limit. */
2362 1, /* Max cond insns. */
2363 8, /* Memset max inline. */
2364 2, /* Issue rate. */
2365 ARM_PREFETCH_NOT_BENEFICIAL,
2366 tune_params::PREF_CONST_POOL_TRUE,
2367 tune_params::PREF_LDRD_FALSE,
2368 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2369 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2370 tune_params::DISPARAGE_FLAGS_NEITHER,
2371 tune_params::PREF_NEON_STRINGOPS_FALSE,
2372 tune_params::FUSE_NOTHING,
2373 tune_params::SCHED_AUTOPREF_OFF
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2378 cortex-m23. */
2379 const struct tune_params arm_v6m_tune =
2381 &generic_extra_costs, /* Insn extra costs. */
2382 &generic_addr_mode_costs, /* Addressing mode costs. */
2383 NULL, /* Sched adj cost. */
2384 arm_default_branch_cost,
2385 &arm_default_vec_cost, /* Vectorizer costs. */
2386 1, /* Constant limit. */
2387 5, /* Max cond insns. */
2388 8, /* Memset max inline. */
2389 1, /* Issue rate. */
2390 ARM_PREFETCH_NOT_BENEFICIAL,
2391 tune_params::PREF_CONST_POOL_FALSE,
2392 tune_params::PREF_LDRD_FALSE,
2393 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2394 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2395 tune_params::DISPARAGE_FLAGS_NEITHER,
2396 tune_params::PREF_NEON_STRINGOPS_FALSE,
2397 tune_params::FUSE_NOTHING,
2398 tune_params::SCHED_AUTOPREF_OFF
2401 const struct tune_params arm_fa726te_tune =
2403 &generic_extra_costs, /* Insn extra costs. */
2404 &generic_addr_mode_costs, /* Addressing mode costs. */
2405 fa726te_sched_adjust_cost,
2406 arm_default_branch_cost,
2407 &arm_default_vec_cost,
2408 1, /* Constant limit. */
2409 5, /* Max cond insns. */
2410 8, /* Memset max inline. */
2411 2, /* Issue rate. */
2412 ARM_PREFETCH_NOT_BENEFICIAL,
2413 tune_params::PREF_CONST_POOL_TRUE,
2414 tune_params::PREF_LDRD_FALSE,
2415 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2416 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2417 tune_params::DISPARAGE_FLAGS_NEITHER,
2418 tune_params::PREF_NEON_STRINGOPS_FALSE,
2419 tune_params::FUSE_NOTHING,
2420 tune_params::SCHED_AUTOPREF_OFF
2423 char *accepted_branch_protection_string = NULL;
2425 /* Auto-generated CPU, FPU and architecture tables. */
2426 #include "arm-cpu-data.h"
2428 /* The name of the preprocessor macro to define for this architecture. PROFILE
2429 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2430 is thus chosen to be big enough to hold the longest architecture name. */
2432 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2434 /* Supported TLS relocations. */
2436 enum tls_reloc {
2437 TLS_GD32,
2438 TLS_GD32_FDPIC,
2439 TLS_LDM32,
2440 TLS_LDM32_FDPIC,
2441 TLS_LDO32,
2442 TLS_IE32,
2443 TLS_IE32_FDPIC,
2444 TLS_LE32,
2445 TLS_DESCSEQ /* GNU scheme */
2448 /* The maximum number of insns to be used when loading a constant. */
2449 inline static int
2450 arm_constant_limit (bool size_p)
2452 return size_p ? 1 : current_tune->constant_limit;
2455 /* Emit an insn that's a simple single-set. Both the operands must be known
2456 to be valid. */
2457 inline static rtx_insn *
2458 emit_set_insn (rtx x, rtx y)
2460 return emit_insn (gen_rtx_SET (x, y));
2463 /* Return the number of bits set in VALUE. */
2464 static unsigned
2465 bit_count (unsigned long value)
2467 unsigned long count = 0;
2469 while (value)
2471 count++;
2472 value &= value - 1; /* Clear the least-significant set bit. */
2475 return count;
2478 /* Return the number of bits set in BMAP. */
2479 static unsigned
2480 bitmap_popcount (const sbitmap bmap)
2482 unsigned int count = 0;
2483 unsigned int n = 0;
2484 sbitmap_iterator sbi;
2486 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2487 count++;
2488 return count;
2491 typedef struct
2493 machine_mode mode;
2494 const char *name;
2495 } arm_fixed_mode_set;
2497 /* A small helper for setting fixed-point library libfuncs. */
2499 static void
2500 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2501 const char *funcname, const char *modename,
2502 int num_suffix)
2504 char buffer[50];
2506 if (num_suffix == 0)
2507 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2508 else
2509 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2511 set_optab_libfunc (optable, mode, buffer);
2514 static void
2515 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2516 machine_mode from, const char *funcname,
2517 const char *toname, const char *fromname)
2519 char buffer[50];
2520 const char *maybe_suffix_2 = "";
2522 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2523 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2524 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2525 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2526 maybe_suffix_2 = "2";
2528 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2529 maybe_suffix_2);
2531 set_conv_libfunc (optable, to, from, buffer);
2534 static GTY(()) rtx speculation_barrier_libfunc;
2536 /* Record that we have no arithmetic or comparison libfuncs for
2537 machine mode MODE. */
2539 static void
2540 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2542 /* Arithmetic. */
2543 set_optab_libfunc (add_optab, mode, NULL);
2544 set_optab_libfunc (sdiv_optab, mode, NULL);
2545 set_optab_libfunc (smul_optab, mode, NULL);
2546 set_optab_libfunc (neg_optab, mode, NULL);
2547 set_optab_libfunc (sub_optab, mode, NULL);
2549 /* Comparisons. */
2550 set_optab_libfunc (eq_optab, mode, NULL);
2551 set_optab_libfunc (ne_optab, mode, NULL);
2552 set_optab_libfunc (lt_optab, mode, NULL);
2553 set_optab_libfunc (le_optab, mode, NULL);
2554 set_optab_libfunc (ge_optab, mode, NULL);
2555 set_optab_libfunc (gt_optab, mode, NULL);
2556 set_optab_libfunc (unord_optab, mode, NULL);
2559 /* Set up library functions unique to ARM. */
2560 static void
2561 arm_init_libfuncs (void)
2563 machine_mode mode_iter;
2565 /* For Linux, we have access to kernel support for atomic operations. */
2566 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2567 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2569 /* There are no special library functions unless we are using the
2570 ARM BPABI. */
2571 if (!TARGET_BPABI)
2572 return;
2574 /* The functions below are described in Section 4 of the "Run-Time
2575 ABI for the ARM architecture", Version 1.0. */
2577 /* Double-precision floating-point arithmetic. Table 2. */
2578 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2579 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2580 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2581 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2582 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2584 /* Double-precision comparisons. Table 3. */
2585 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2586 set_optab_libfunc (ne_optab, DFmode, NULL);
2587 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2588 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2589 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2590 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2591 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2593 /* Single-precision floating-point arithmetic. Table 4. */
2594 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2595 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2596 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2597 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2598 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2600 /* Single-precision comparisons. Table 5. */
2601 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2602 set_optab_libfunc (ne_optab, SFmode, NULL);
2603 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2604 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2605 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2606 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2607 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2609 /* Floating-point to integer conversions. Table 6. */
2610 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2611 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2612 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2613 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2614 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2615 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2616 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2617 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2619 /* Conversions between floating types. Table 7. */
2620 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2621 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2623 /* Integer to floating-point conversions. Table 8. */
2624 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2625 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2626 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2627 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2628 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2629 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2630 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2631 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2633 /* Long long. Table 9. */
2634 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2635 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2636 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2637 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2638 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2639 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2640 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2641 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2643 /* Integer (32/32->32) division. \S 4.3.1. */
2644 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2645 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2647 /* The divmod functions are designed so that they can be used for
2648 plain division, even though they return both the quotient and the
2649 remainder. The quotient is returned in the usual location (i.e.,
2650 r0 for SImode, {r0, r1} for DImode), just as would be expected
2651 for an ordinary division routine. Because the AAPCS calling
2652 conventions specify that all of { r0, r1, r2, r3 } are
2653 callee-saved registers, there is no need to tell the compiler
2654 explicitly that those registers are clobbered by these
2655 routines. */
2656 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2657 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2659 /* For SImode division the ABI provides div-without-mod routines,
2660 which are faster. */
2661 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2662 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2664 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2665 divmod libcalls instead. */
2666 set_optab_libfunc (smod_optab, DImode, NULL);
2667 set_optab_libfunc (umod_optab, DImode, NULL);
2668 set_optab_libfunc (smod_optab, SImode, NULL);
2669 set_optab_libfunc (umod_optab, SImode, NULL);
2671 /* Half-precision float operations. The compiler handles all operations
2672 with NULL libfuncs by converting the SFmode. */
2673 switch (arm_fp16_format)
2675 case ARM_FP16_FORMAT_IEEE:
2676 case ARM_FP16_FORMAT_ALTERNATIVE:
2678 /* Conversions. */
2679 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2680 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2681 ? "__gnu_f2h_ieee"
2682 : "__gnu_f2h_alternative"));
2683 set_conv_libfunc (sext_optab, SFmode, HFmode,
2684 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2685 ? "__gnu_h2f_ieee"
2686 : "__gnu_h2f_alternative"));
2688 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2689 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2690 ? "__gnu_d2h_ieee"
2691 : "__gnu_d2h_alternative"));
2693 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2694 break;
2696 default:
2697 break;
2700 /* For all possible libcalls in BFmode, record NULL. */
2701 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2703 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2704 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2705 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2706 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2708 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2710 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2712 const arm_fixed_mode_set fixed_arith_modes[] =
2714 { E_QQmode, "qq" },
2715 { E_UQQmode, "uqq" },
2716 { E_HQmode, "hq" },
2717 { E_UHQmode, "uhq" },
2718 { E_SQmode, "sq" },
2719 { E_USQmode, "usq" },
2720 { E_DQmode, "dq" },
2721 { E_UDQmode, "udq" },
2722 { E_TQmode, "tq" },
2723 { E_UTQmode, "utq" },
2724 { E_HAmode, "ha" },
2725 { E_UHAmode, "uha" },
2726 { E_SAmode, "sa" },
2727 { E_USAmode, "usa" },
2728 { E_DAmode, "da" },
2729 { E_UDAmode, "uda" },
2730 { E_TAmode, "ta" },
2731 { E_UTAmode, "uta" }
2733 const arm_fixed_mode_set fixed_conv_modes[] =
2735 { E_QQmode, "qq" },
2736 { E_UQQmode, "uqq" },
2737 { E_HQmode, "hq" },
2738 { E_UHQmode, "uhq" },
2739 { E_SQmode, "sq" },
2740 { E_USQmode, "usq" },
2741 { E_DQmode, "dq" },
2742 { E_UDQmode, "udq" },
2743 { E_TQmode, "tq" },
2744 { E_UTQmode, "utq" },
2745 { E_HAmode, "ha" },
2746 { E_UHAmode, "uha" },
2747 { E_SAmode, "sa" },
2748 { E_USAmode, "usa" },
2749 { E_DAmode, "da" },
2750 { E_UDAmode, "uda" },
2751 { E_TAmode, "ta" },
2752 { E_UTAmode, "uta" },
2753 { E_QImode, "qi" },
2754 { E_HImode, "hi" },
2755 { E_SImode, "si" },
2756 { E_DImode, "di" },
2757 { E_TImode, "ti" },
2758 { E_SFmode, "sf" },
2759 { E_DFmode, "df" }
2761 unsigned int i, j;
2763 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2765 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2766 "add", fixed_arith_modes[i].name, 3);
2767 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2768 "ssadd", fixed_arith_modes[i].name, 3);
2769 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2770 "usadd", fixed_arith_modes[i].name, 3);
2771 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2772 "sub", fixed_arith_modes[i].name, 3);
2773 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2774 "sssub", fixed_arith_modes[i].name, 3);
2775 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2776 "ussub", fixed_arith_modes[i].name, 3);
2777 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2778 "mul", fixed_arith_modes[i].name, 3);
2779 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2780 "ssmul", fixed_arith_modes[i].name, 3);
2781 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2782 "usmul", fixed_arith_modes[i].name, 3);
2783 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2784 "div", fixed_arith_modes[i].name, 3);
2785 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2786 "udiv", fixed_arith_modes[i].name, 3);
2787 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2788 "ssdiv", fixed_arith_modes[i].name, 3);
2789 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2790 "usdiv", fixed_arith_modes[i].name, 3);
2791 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2792 "neg", fixed_arith_modes[i].name, 2);
2793 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2794 "ssneg", fixed_arith_modes[i].name, 2);
2795 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2796 "usneg", fixed_arith_modes[i].name, 2);
2797 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2798 "ashl", fixed_arith_modes[i].name, 3);
2799 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2800 "ashr", fixed_arith_modes[i].name, 3);
2801 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2802 "lshr", fixed_arith_modes[i].name, 3);
2803 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2804 "ssashl", fixed_arith_modes[i].name, 3);
2805 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2806 "usashl", fixed_arith_modes[i].name, 3);
2807 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2808 "cmp", fixed_arith_modes[i].name, 2);
2811 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2812 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2814 if (i == j
2815 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2816 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2817 continue;
2819 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2820 fixed_conv_modes[j].mode, "fract",
2821 fixed_conv_modes[i].name,
2822 fixed_conv_modes[j].name);
2823 arm_set_fixed_conv_libfunc (satfract_optab,
2824 fixed_conv_modes[i].mode,
2825 fixed_conv_modes[j].mode, "satfract",
2826 fixed_conv_modes[i].name,
2827 fixed_conv_modes[j].name);
2828 arm_set_fixed_conv_libfunc (fractuns_optab,
2829 fixed_conv_modes[i].mode,
2830 fixed_conv_modes[j].mode, "fractuns",
2831 fixed_conv_modes[i].name,
2832 fixed_conv_modes[j].name);
2833 arm_set_fixed_conv_libfunc (satfractuns_optab,
2834 fixed_conv_modes[i].mode,
2835 fixed_conv_modes[j].mode, "satfractuns",
2836 fixed_conv_modes[i].name,
2837 fixed_conv_modes[j].name);
2841 if (TARGET_AAPCS_BASED)
2842 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2844 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2847 /* On AAPCS systems, this is the "struct __va_list". */
2848 static GTY(()) tree va_list_type;
2850 /* Return the type to use as __builtin_va_list. */
2851 static tree
2852 arm_build_builtin_va_list (void)
2854 tree va_list_name;
2855 tree ap_field;
2857 if (!TARGET_AAPCS_BASED)
2858 return std_build_builtin_va_list ();
2860 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2861 defined as:
2863 struct __va_list
2865 void *__ap;
2868 The C Library ABI further reinforces this definition in \S
2869 4.1.
2871 We must follow this definition exactly. The structure tag
2872 name is visible in C++ mangled names, and thus forms a part
2873 of the ABI. The field name may be used by people who
2874 #include <stdarg.h>. */
2875 /* Create the type. */
2876 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2877 /* Give it the required name. */
2878 va_list_name = build_decl (BUILTINS_LOCATION,
2879 TYPE_DECL,
2880 get_identifier ("__va_list"),
2881 va_list_type);
2882 DECL_ARTIFICIAL (va_list_name) = 1;
2883 TYPE_NAME (va_list_type) = va_list_name;
2884 TYPE_STUB_DECL (va_list_type) = va_list_name;
2885 /* Create the __ap field. */
2886 ap_field = build_decl (BUILTINS_LOCATION,
2887 FIELD_DECL,
2888 get_identifier ("__ap"),
2889 ptr_type_node);
2890 DECL_ARTIFICIAL (ap_field) = 1;
2891 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2892 TYPE_FIELDS (va_list_type) = ap_field;
2893 /* Compute its layout. */
2894 layout_type (va_list_type);
2896 return va_list_type;
2899 /* Return an expression of type "void *" pointing to the next
2900 available argument in a variable-argument list. VALIST is the
2901 user-level va_list object, of type __builtin_va_list. */
2902 static tree
2903 arm_extract_valist_ptr (tree valist)
2905 if (TREE_TYPE (valist) == error_mark_node)
2906 return error_mark_node;
2908 /* On an AAPCS target, the pointer is stored within "struct
2909 va_list". */
2910 if (TARGET_AAPCS_BASED)
2912 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2913 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2914 valist, ap_field, NULL_TREE);
2917 return valist;
2920 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2921 static void
2922 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2924 valist = arm_extract_valist_ptr (valist);
2925 std_expand_builtin_va_start (valist, nextarg);
2928 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2929 static tree
2930 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2931 gimple_seq *post_p)
2933 valist = arm_extract_valist_ptr (valist);
2934 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2937 /* Check any incompatible options that the user has specified. */
2938 static void
2939 arm_option_check_internal (struct gcc_options *opts)
2941 int flags = opts->x_target_flags;
2943 /* iWMMXt and NEON are incompatible. */
2944 if (TARGET_IWMMXT
2945 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2946 error ("iWMMXt and NEON are incompatible");
2948 /* Make sure that the processor choice does not conflict with any of the
2949 other command line choices. */
2950 if (TARGET_ARM_P (flags)
2951 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2952 error ("target CPU does not support ARM mode");
2954 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2955 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2956 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2958 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2959 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2961 /* If this target is normally configured to use APCS frames, warn if they
2962 are turned off and debugging is turned on. */
2963 if (TARGET_ARM_P (flags)
2964 && write_symbols != NO_DEBUG
2965 && !TARGET_APCS_FRAME
2966 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2967 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2968 "debugging");
2970 /* iWMMXt unsupported under Thumb mode. */
2971 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2972 error ("iWMMXt unsupported under Thumb mode");
2974 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2975 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2977 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2979 error ("RTP PIC is incompatible with Thumb");
2980 flag_pic = 0;
2983 if (target_pure_code || target_slow_flash_data)
2985 const char *flag = (target_pure_code ? "-mpure-code" :
2986 "-mslow-flash-data");
2987 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2989 /* We only support -mslow-flash-data on M-profile targets with
2990 MOVT. */
2991 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2992 error ("%s only supports non-pic code on M-profile targets with the "
2993 "MOVT instruction", flag);
2995 /* We only support -mpure-code on M-profile targets. */
2996 if (target_pure_code && common_unsupported_modes)
2997 error ("%s only supports non-pic code on M-profile targets", flag);
2999 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3000 -mword-relocations forbids relocation of MOVT/MOVW. */
3001 if (target_word_relocations)
3002 error ("%s incompatible with %<-mword-relocations%>", flag);
3006 /* Recompute the global settings depending on target attribute options. */
3008 static void
3009 arm_option_params_internal (void)
3011 /* If we are not using the default (ARM mode) section anchor offset
3012 ranges, then set the correct ranges now. */
3013 if (TARGET_THUMB1)
3015 /* Thumb-1 LDR instructions cannot have negative offsets.
3016 Permissible positive offset ranges are 5-bit (for byte loads),
3017 6-bit (for halfword loads), or 7-bit (for word loads).
3018 Empirical results suggest a 7-bit anchor range gives the best
3019 overall code size. */
3020 targetm.min_anchor_offset = 0;
3021 targetm.max_anchor_offset = 127;
3023 else if (TARGET_THUMB2)
3025 /* The minimum is set such that the total size of the block
3026 for a particular anchor is 248 + 1 + 4095 bytes, which is
3027 divisible by eight, ensuring natural spacing of anchors. */
3028 targetm.min_anchor_offset = -248;
3029 targetm.max_anchor_offset = 4095;
3031 else
3033 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3034 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3037 /* Increase the number of conditional instructions with -Os. */
3038 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3040 /* For THUMB2, we limit the conditional sequence to one IT block. */
3041 if (TARGET_THUMB2)
3042 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3044 if (TARGET_THUMB1)
3045 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3046 else
3047 targetm.md_asm_adjust = arm_md_asm_adjust;
3050 /* True if -mflip-thumb should next add an attribute for the default
3051 mode, false if it should next add an attribute for the opposite mode. */
3052 static GTY(()) bool thumb_flipper;
3054 /* Options after initial target override. */
3055 static GTY(()) tree init_optimize;
3057 static void
3058 arm_override_options_after_change_1 (struct gcc_options *opts,
3059 struct gcc_options *opts_set)
3061 /* -falign-functions without argument: supply one. */
3062 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3063 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3064 && opts->x_optimize_size ? "2" : "4";
3067 /* Implement targetm.override_options_after_change. */
3069 static void
3070 arm_override_options_after_change (void)
3072 arm_override_options_after_change_1 (&global_options, &global_options_set);
3075 /* Implement TARGET_OPTION_RESTORE. */
3076 static void
3077 arm_option_restore (struct gcc_options */* opts */,
3078 struct gcc_options */* opts_set */,
3079 struct cl_target_option *ptr)
3081 arm_configure_build_target (&arm_active_target, ptr, false);
3082 arm_option_reconfigure_globals ();
3085 /* Reset options between modes that the user has specified. */
3086 static void
3087 arm_option_override_internal (struct gcc_options *opts,
3088 struct gcc_options *opts_set)
3090 arm_override_options_after_change_1 (opts, opts_set);
3092 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3094 /* The default is to enable interworking, so this warning message would
3095 be confusing to users who have just compiled with
3096 eg, -march=armv4. */
3097 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3098 opts->x_target_flags &= ~MASK_INTERWORK;
3101 if (TARGET_THUMB_P (opts->x_target_flags)
3102 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3104 warning (0, "target CPU does not support THUMB instructions");
3105 opts->x_target_flags &= ~MASK_THUMB;
3108 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3110 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3111 opts->x_target_flags &= ~MASK_APCS_FRAME;
3114 /* Callee super interworking implies thumb interworking. Adding
3115 this to the flags here simplifies the logic elsewhere. */
3116 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3117 opts->x_target_flags |= MASK_INTERWORK;
3119 /* need to remember initial values so combinaisons of options like
3120 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3121 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3123 if (! opts_set->x_arm_restrict_it)
3124 opts->x_arm_restrict_it = arm_arch8;
3126 /* ARM execution state and M profile don't have [restrict] IT. */
3127 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3128 opts->x_arm_restrict_it = 0;
3130 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3131 if (!opts_set->x_arm_restrict_it
3132 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3133 opts->x_arm_restrict_it = 0;
3135 /* Enable -munaligned-access by default for
3136 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3137 i.e. Thumb2 and ARM state only.
3138 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3139 - ARMv8 architecture-base processors.
3141 Disable -munaligned-access by default for
3142 - all pre-ARMv6 architecture-based processors
3143 - ARMv6-M architecture-based processors
3144 - ARMv8-M Baseline processors. */
3146 if (! opts_set->x_unaligned_access)
3148 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3149 && arm_arch6 && (arm_arch_notm || arm_arch7));
3151 else if (opts->x_unaligned_access == 1
3152 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3154 warning (0, "target CPU does not support unaligned accesses");
3155 opts->x_unaligned_access = 0;
3158 /* Don't warn since it's on by default in -O2. */
3159 if (TARGET_THUMB1_P (opts->x_target_flags))
3160 opts->x_flag_schedule_insns = 0;
3161 else
3162 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3164 /* Disable shrink-wrap when optimizing function for size, since it tends to
3165 generate additional returns. */
3166 if (optimize_function_for_size_p (cfun)
3167 && TARGET_THUMB2_P (opts->x_target_flags))
3168 opts->x_flag_shrink_wrap = false;
3169 else
3170 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3172 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3173 - epilogue_insns - does not accurately model the corresponding insns
3174 emitted in the asm file. In particular, see the comment in thumb_exit
3175 'Find out how many of the (return) argument registers we can corrupt'.
3176 As a consequence, the epilogue may clobber registers without fipa-ra
3177 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3178 TODO: Accurately model clobbers for epilogue_insns and reenable
3179 fipa-ra. */
3180 if (TARGET_THUMB1_P (opts->x_target_flags))
3181 opts->x_flag_ipa_ra = 0;
3182 else
3183 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3185 /* Thumb2 inline assembly code should always use unified syntax.
3186 This will apply to ARM and Thumb1 eventually. */
3187 if (TARGET_THUMB2_P (opts->x_target_flags))
3188 opts->x_inline_asm_unified = true;
3190 if (arm_stack_protector_guard == SSP_GLOBAL
3191 && opts->x_arm_stack_protector_guard_offset_str)
3193 error ("incompatible options %<-mstack-protector-guard=global%> and "
3194 "%<-mstack-protector-guard-offset=%s%>",
3195 arm_stack_protector_guard_offset_str);
3198 if (opts->x_arm_stack_protector_guard_offset_str)
3200 char *end;
3201 const char *str = arm_stack_protector_guard_offset_str;
3202 errno = 0;
3203 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3204 if (!*str || *end || errno)
3205 error ("%qs is not a valid offset in %qs", str,
3206 "-mstack-protector-guard-offset=");
3207 arm_stack_protector_guard_offset = offs;
3210 if (arm_current_function_pac_enabled_p ())
3212 if (!arm_arch8m_main)
3213 error ("This architecture does not support branch protection "
3214 "instructions");
3215 if (TARGET_TPCS_FRAME)
3216 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3219 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3220 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3221 #endif
3224 static sbitmap isa_all_fpubits_internal;
3225 static sbitmap isa_all_fpbits;
3226 static sbitmap isa_quirkbits;
3228 /* Configure a build target TARGET from the user-specified options OPTS and
3229 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3230 architecture have been specified, but the two are not identical. */
3231 void
3232 arm_configure_build_target (struct arm_build_target *target,
3233 struct cl_target_option *opts,
3234 bool warn_compatible)
3236 const cpu_option *arm_selected_tune = NULL;
3237 const arch_option *arm_selected_arch = NULL;
3238 const cpu_option *arm_selected_cpu = NULL;
3239 const arm_fpu_desc *arm_selected_fpu = NULL;
3240 const char *tune_opts = NULL;
3241 const char *arch_opts = NULL;
3242 const char *cpu_opts = NULL;
3244 bitmap_clear (target->isa);
3245 target->core_name = NULL;
3246 target->arch_name = NULL;
3248 if (opts->x_arm_arch_string)
3250 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3251 "-march",
3252 opts->x_arm_arch_string);
3253 arch_opts = strchr (opts->x_arm_arch_string, '+');
3256 if (opts->x_arm_cpu_string)
3258 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3259 opts->x_arm_cpu_string);
3260 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3261 arm_selected_tune = arm_selected_cpu;
3262 /* If taking the tuning from -mcpu, we don't need to rescan the
3263 options for tuning. */
3266 if (opts->x_arm_tune_string)
3268 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3269 opts->x_arm_tune_string);
3270 tune_opts = strchr (opts->x_arm_tune_string, '+');
3273 if (opts->x_arm_branch_protection_string)
3275 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3277 if (aarch_ra_sign_key != AARCH_KEY_A)
3279 warning (0, "invalid key type for %<-mbranch-protection=%>");
3280 aarch_ra_sign_key = AARCH_KEY_A;
3284 if (arm_selected_arch)
3286 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3287 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3288 arch_opts);
3290 if (arm_selected_cpu)
3292 auto_sbitmap cpu_isa (isa_num_bits);
3293 auto_sbitmap isa_delta (isa_num_bits);
3295 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3296 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3297 cpu_opts);
3298 bitmap_xor (isa_delta, cpu_isa, target->isa);
3299 /* Ignore any bits that are quirk bits. */
3300 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3301 /* If the user (or the default configuration) has specified a
3302 specific FPU, then ignore any bits that depend on the FPU
3303 configuration. Do similarly if using the soft-float
3304 ABI. */
3305 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3306 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3307 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3309 if (!bitmap_empty_p (isa_delta))
3311 if (warn_compatible)
3312 warning (0, "switch %<-mcpu=%s%> conflicts "
3313 "with switch %<-march=%s%>",
3314 opts->x_arm_cpu_string,
3315 opts->x_arm_arch_string);
3317 /* -march wins for code generation.
3318 -mcpu wins for default tuning. */
3319 if (!arm_selected_tune)
3320 arm_selected_tune = arm_selected_cpu;
3322 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3323 target->arch_name = arm_selected_arch->common.name;
3325 else
3327 /* Architecture and CPU are essentially the same.
3328 Prefer the CPU setting. */
3329 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3330 target->core_name = arm_selected_cpu->common.name;
3331 /* Copy the CPU's capabilities, so that we inherit the
3332 appropriate extensions and quirks. */
3333 bitmap_copy (target->isa, cpu_isa);
3336 else
3338 /* Pick a CPU based on the architecture. */
3339 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3340 target->arch_name = arm_selected_arch->common.name;
3341 /* Note: target->core_name is left unset in this path. */
3344 else if (arm_selected_cpu)
3346 target->core_name = arm_selected_cpu->common.name;
3347 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3348 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3349 cpu_opts);
3350 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3352 /* If the user did not specify a processor or architecture, choose
3353 one for them. */
3354 else
3356 const cpu_option *sel;
3357 auto_sbitmap sought_isa (isa_num_bits);
3358 bitmap_clear (sought_isa);
3359 auto_sbitmap default_isa (isa_num_bits);
3361 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3362 TARGET_CPU_DEFAULT);
3363 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3364 gcc_assert (arm_selected_cpu->common.name);
3366 /* RWE: All of the selection logic below (to the end of this
3367 'if' clause) looks somewhat suspect. It appears to be mostly
3368 there to support forcing thumb support when the default CPU
3369 does not have thumb (somewhat dubious in terms of what the
3370 user might be expecting). I think it should be removed once
3371 support for the pre-thumb era cores is removed. */
3372 sel = arm_selected_cpu;
3373 arm_initialize_isa (default_isa, sel->common.isa_bits);
3374 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3375 cpu_opts);
3377 /* Now check to see if the user has specified any command line
3378 switches that require certain abilities from the cpu. */
3380 if (TARGET_INTERWORK || TARGET_THUMB)
3381 bitmap_set_bit (sought_isa, isa_bit_thumb);
3383 /* If there are such requirements and the default CPU does not
3384 satisfy them, we need to run over the complete list of
3385 cores looking for one that is satisfactory. */
3386 if (!bitmap_empty_p (sought_isa)
3387 && !bitmap_subset_p (sought_isa, default_isa))
3389 auto_sbitmap candidate_isa (isa_num_bits);
3390 /* We're only interested in a CPU with at least the
3391 capabilities of the default CPU and the required
3392 additional features. */
3393 bitmap_ior (default_isa, default_isa, sought_isa);
3395 /* Try to locate a CPU type that supports all of the abilities
3396 of the default CPU, plus the extra abilities requested by
3397 the user. */
3398 for (sel = all_cores; sel->common.name != NULL; sel++)
3400 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3401 /* An exact match? */
3402 if (bitmap_equal_p (default_isa, candidate_isa))
3403 break;
3406 if (sel->common.name == NULL)
3408 unsigned current_bit_count = isa_num_bits;
3409 const cpu_option *best_fit = NULL;
3411 /* Ideally we would like to issue an error message here
3412 saying that it was not possible to find a CPU compatible
3413 with the default CPU, but which also supports the command
3414 line options specified by the programmer, and so they
3415 ought to use the -mcpu=<name> command line option to
3416 override the default CPU type.
3418 If we cannot find a CPU that has exactly the
3419 characteristics of the default CPU and the given
3420 command line options we scan the array again looking
3421 for a best match. The best match must have at least
3422 the capabilities of the perfect match. */
3423 for (sel = all_cores; sel->common.name != NULL; sel++)
3425 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3427 if (bitmap_subset_p (default_isa, candidate_isa))
3429 unsigned count;
3431 bitmap_and_compl (candidate_isa, candidate_isa,
3432 default_isa);
3433 count = bitmap_popcount (candidate_isa);
3435 if (count < current_bit_count)
3437 best_fit = sel;
3438 current_bit_count = count;
3442 gcc_assert (best_fit);
3443 sel = best_fit;
3446 arm_selected_cpu = sel;
3449 /* Now we know the CPU, we can finally initialize the target
3450 structure. */
3451 target->core_name = arm_selected_cpu->common.name;
3452 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3453 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3454 cpu_opts);
3455 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3458 gcc_assert (arm_selected_cpu);
3459 gcc_assert (arm_selected_arch);
3461 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3463 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3464 auto_sbitmap fpu_bits (isa_num_bits);
3466 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3467 /* This should clear out ALL bits relating to the FPU/simd
3468 extensions, to avoid potentially invalid combinations later on
3469 that we can't match. At present we only clear out those bits
3470 that can be set by -mfpu. This should be fixed in GCC-12. */
3471 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3472 bitmap_ior (target->isa, target->isa, fpu_bits);
3475 /* If we have the soft-float ABI, clear any feature bits relating to use of
3476 floating-point operations. They'll just confuse things later on. */
3477 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3478 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3480 /* There may be implied bits which we still need to enable. These are
3481 non-named features which are needed to complete other sets of features,
3482 but cannot be enabled from arm-cpus.in due to being shared between
3483 multiple fgroups. Each entry in all_implied_fbits is of the form
3484 ante -> cons, meaning that if the feature "ante" is enabled, we should
3485 implicitly enable "cons". */
3486 const struct fbit_implication *impl = all_implied_fbits;
3487 while (impl->ante)
3489 if (bitmap_bit_p (target->isa, impl->ante))
3490 bitmap_set_bit (target->isa, impl->cons);
3491 impl++;
3494 if (!arm_selected_tune)
3495 arm_selected_tune = arm_selected_cpu;
3496 else /* Validate the features passed to -mtune. */
3497 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3499 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3501 /* Finish initializing the target structure. */
3502 if (!target->arch_name)
3503 target->arch_name = arm_selected_arch->common.name;
3504 target->arch_pp_name = arm_selected_arch->arch;
3505 target->base_arch = arm_selected_arch->base_arch;
3506 target->profile = arm_selected_arch->profile;
3508 target->tune_flags = tune_data->tune_flags;
3509 target->tune = tune_data->tune;
3510 target->tune_core = tune_data->scheduler;
3513 /* Fix up any incompatible options that the user has specified. */
3514 static void
3515 arm_option_override (void)
3517 static const enum isa_feature fpu_bitlist_internal[]
3518 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3519 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3520 static const enum isa_feature fp_bitlist[]
3521 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3522 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3523 cl_target_option opts;
3525 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3526 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3528 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3529 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3530 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3531 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3533 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3535 if (!OPTION_SET_P (arm_fpu_index))
3537 bool ok;
3538 int fpu_index;
3540 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3541 CL_TARGET);
3542 gcc_assert (ok);
3543 arm_fpu_index = (enum fpu_type) fpu_index;
3546 cl_target_option_save (&opts, &global_options, &global_options_set);
3547 arm_configure_build_target (&arm_active_target, &opts, true);
3549 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3550 SUBTARGET_OVERRIDE_OPTIONS;
3551 #endif
3553 /* Initialize boolean versions of the architectural flags, for use
3554 in the arm.md file and for enabling feature flags. */
3555 arm_option_reconfigure_globals ();
3557 arm_tune = arm_active_target.tune_core;
3558 tune_flags = arm_active_target.tune_flags;
3559 current_tune = arm_active_target.tune;
3561 /* TBD: Dwarf info for apcs frame is not handled yet. */
3562 if (TARGET_APCS_FRAME)
3563 flag_shrink_wrap = false;
3565 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3567 warning (0, "%<-mapcs-stack-check%> incompatible with "
3568 "%<-mno-apcs-frame%>");
3569 target_flags |= MASK_APCS_FRAME;
3572 if (TARGET_POKE_FUNCTION_NAME)
3573 target_flags |= MASK_APCS_FRAME;
3575 if (TARGET_APCS_REENT && flag_pic)
3576 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3578 if (TARGET_APCS_REENT)
3579 warning (0, "APCS reentrant code not supported. Ignored");
3581 /* Set up some tuning parameters. */
3582 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3583 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3584 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3585 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3586 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3587 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3589 /* For arm2/3 there is no need to do any scheduling if we are doing
3590 software floating-point. */
3591 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3592 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3594 /* Override the default structure alignment for AAPCS ABI. */
3595 if (!OPTION_SET_P (arm_structure_size_boundary))
3597 if (TARGET_AAPCS_BASED)
3598 arm_structure_size_boundary = 8;
3600 else
3602 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3604 if (arm_structure_size_boundary != 8
3605 && arm_structure_size_boundary != 32
3606 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3608 if (ARM_DOUBLEWORD_ALIGN)
3609 warning (0,
3610 "structure size boundary can only be set to 8, 32 or 64");
3611 else
3612 warning (0, "structure size boundary can only be set to 8 or 32");
3613 arm_structure_size_boundary
3614 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3618 if (TARGET_VXWORKS_RTP)
3620 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3621 arm_pic_data_is_text_relative = 0;
3623 else if (flag_pic
3624 && !arm_pic_data_is_text_relative
3625 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3626 /* When text & data segments don't have a fixed displacement, the
3627 intended use is with a single, read only, pic base register.
3628 Unless the user explicitly requested not to do that, set
3629 it. */
3630 target_flags |= MASK_SINGLE_PIC_BASE;
3632 /* If stack checking is disabled, we can use r10 as the PIC register,
3633 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3634 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3636 if (TARGET_VXWORKS_RTP)
3637 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3638 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3641 if (flag_pic && TARGET_VXWORKS_RTP)
3642 arm_pic_register = 9;
3644 /* If in FDPIC mode then force arm_pic_register to be r9. */
3645 if (TARGET_FDPIC)
3647 arm_pic_register = FDPIC_REGNUM;
3648 if (TARGET_THUMB1)
3649 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3652 if (arm_pic_register_string != NULL)
3654 int pic_register = decode_reg_name (arm_pic_register_string);
3656 if (!flag_pic)
3657 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3659 /* Prevent the user from choosing an obviously stupid PIC register. */
3660 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3661 || pic_register == HARD_FRAME_POINTER_REGNUM
3662 || pic_register == STACK_POINTER_REGNUM
3663 || pic_register >= PC_REGNUM
3664 || (TARGET_VXWORKS_RTP
3665 && (unsigned int) pic_register != arm_pic_register))
3666 error ("unable to use %qs for PIC register", arm_pic_register_string);
3667 else
3668 arm_pic_register = pic_register;
3671 if (flag_pic)
3672 target_word_relocations = 1;
3674 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3675 if (fix_cm3_ldrd == 2)
3677 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3678 fix_cm3_ldrd = 1;
3679 else
3680 fix_cm3_ldrd = 0;
3683 /* Enable fix_vlldm by default if required. */
3684 if (fix_vlldm == 2)
3686 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3687 fix_vlldm = 1;
3688 else
3689 fix_vlldm = 0;
3692 /* Enable fix_aes by default if required. */
3693 if (fix_aes_erratum_1742098 == 2)
3695 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3696 fix_aes_erratum_1742098 = 1;
3697 else
3698 fix_aes_erratum_1742098 = 0;
3701 /* Hot/Cold partitioning is not currently supported, since we can't
3702 handle literal pool placement in that case. */
3703 if (flag_reorder_blocks_and_partition)
3705 inform (input_location,
3706 "%<-freorder-blocks-and-partition%> not supported "
3707 "on this architecture");
3708 flag_reorder_blocks_and_partition = 0;
3709 flag_reorder_blocks = 1;
3712 if (flag_pic)
3713 /* Hoisting PIC address calculations more aggressively provides a small,
3714 but measurable, size reduction for PIC code. Therefore, we decrease
3715 the bar for unrestricted expression hoisting to the cost of PIC address
3716 calculation, which is 2 instructions. */
3717 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3718 param_gcse_unrestricted_cost, 2);
3720 /* ARM EABI defaults to strict volatile bitfields. */
3721 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3722 && abi_version_at_least(2))
3723 flag_strict_volatile_bitfields = 1;
3725 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3726 have deemed it beneficial (signified by setting
3727 prefetch.num_slots to 1 or more). */
3728 if (flag_prefetch_loop_arrays < 0
3729 && HAVE_prefetch
3730 && optimize >= 3
3731 && current_tune->prefetch.num_slots > 0)
3732 flag_prefetch_loop_arrays = 1;
3734 /* Set up parameters to be used in prefetching algorithm. Do not
3735 override the defaults unless we are tuning for a core we have
3736 researched values for. */
3737 if (current_tune->prefetch.num_slots > 0)
3738 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3739 param_simultaneous_prefetches,
3740 current_tune->prefetch.num_slots);
3741 if (current_tune->prefetch.l1_cache_line_size >= 0)
3742 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3743 param_l1_cache_line_size,
3744 current_tune->prefetch.l1_cache_line_size);
3745 if (current_tune->prefetch.l1_cache_line_size >= 0)
3747 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3748 param_destruct_interfere_size,
3749 current_tune->prefetch.l1_cache_line_size);
3750 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3751 param_construct_interfere_size,
3752 current_tune->prefetch.l1_cache_line_size);
3754 else
3756 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3757 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3758 constructive? */
3759 /* More recent Cortex chips have a 64-byte cache line, but are marked
3760 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3761 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3762 param_destruct_interfere_size, 64);
3763 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3764 param_construct_interfere_size, 64);
3767 if (current_tune->prefetch.l1_cache_size >= 0)
3768 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3769 param_l1_cache_size,
3770 current_tune->prefetch.l1_cache_size);
3772 /* Look through ready list and all of queue for instructions
3773 relevant for L2 auto-prefetcher. */
3774 int sched_autopref_queue_depth;
3776 switch (current_tune->sched_autopref)
3778 case tune_params::SCHED_AUTOPREF_OFF:
3779 sched_autopref_queue_depth = -1;
3780 break;
3782 case tune_params::SCHED_AUTOPREF_RANK:
3783 sched_autopref_queue_depth = 0;
3784 break;
3786 case tune_params::SCHED_AUTOPREF_FULL:
3787 sched_autopref_queue_depth = max_insn_queue_index + 1;
3788 break;
3790 default:
3791 gcc_unreachable ();
3794 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3795 param_sched_autopref_queue_depth,
3796 sched_autopref_queue_depth);
3798 /* Currently, for slow flash data, we just disable literal pools. We also
3799 disable it for pure-code. */
3800 if (target_slow_flash_data || target_pure_code)
3801 arm_disable_literal_pool = true;
3803 /* Disable scheduling fusion by default if it's not armv7 processor
3804 or doesn't prefer ldrd/strd. */
3805 if (flag_schedule_fusion == 2
3806 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3807 flag_schedule_fusion = 0;
3809 /* Need to remember initial options before they are overriden. */
3810 init_optimize = build_optimization_node (&global_options,
3811 &global_options_set);
3813 arm_options_perform_arch_sanity_checks ();
3814 arm_option_override_internal (&global_options, &global_options_set);
3815 arm_option_check_internal (&global_options);
3816 arm_option_params_internal ();
3818 /* Create the default target_options structure. */
3819 target_option_default_node = target_option_current_node
3820 = build_target_option_node (&global_options, &global_options_set);
3822 /* Register global variables with the garbage collector. */
3823 arm_add_gc_roots ();
3825 /* Init initial mode for testing. */
3826 thumb_flipper = TARGET_THUMB;
3830 /* Reconfigure global status flags from the active_target.isa. */
3831 void
3832 arm_option_reconfigure_globals (void)
3834 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3835 arm_base_arch = arm_active_target.base_arch;
3837 /* Initialize boolean versions of the architectural flags, for use
3838 in the arm.md file. */
3839 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3840 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3841 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3842 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3843 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3844 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3845 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3846 arm_arch6m = arm_arch6 && !arm_arch_notm;
3847 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3848 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3849 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3850 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3851 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3852 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3853 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3854 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3855 isa_bit_armv8_1m_main);
3856 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3857 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3858 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3859 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3860 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3861 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3862 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3863 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3864 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3865 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3866 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3867 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3868 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3870 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3871 if (arm_fp16_inst)
3873 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3874 error ("selected fp16 options are incompatible");
3875 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3878 arm_arch_cde = 0;
3879 arm_arch_cde_coproc = 0;
3880 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3881 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3882 isa_bit_cdecp6, isa_bit_cdecp7};
3883 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3885 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3886 if (cde_bit)
3888 arm_arch_cde |= cde_bit;
3889 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3893 /* And finally, set up some quirks. */
3894 arm_arch_no_volatile_ce
3895 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3896 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3897 isa_bit_quirk_armv6kz);
3899 /* Use the cp15 method if it is available. */
3900 if (target_thread_pointer == TP_AUTO)
3902 if (arm_arch6k && !TARGET_THUMB1)
3903 target_thread_pointer = TP_CP15;
3904 else
3905 target_thread_pointer = TP_SOFT;
3908 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3909 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3912 /* Perform some validation between the desired architecture and the rest of the
3913 options. */
3914 void
3915 arm_options_perform_arch_sanity_checks (void)
3917 /* V5T code we generate is completely interworking capable, so we turn off
3918 TARGET_INTERWORK here to avoid many tests later on. */
3920 /* XXX However, we must pass the right pre-processor defines to CPP
3921 or GLD can get confused. This is a hack. */
3922 if (TARGET_INTERWORK)
3923 arm_cpp_interwork = 1;
3925 if (arm_arch5t)
3926 target_flags &= ~MASK_INTERWORK;
3928 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3929 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3931 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3932 error ("iwmmxt abi requires an iwmmxt capable cpu");
3934 /* BPABI targets use linker tricks to allow interworking on cores
3935 without thumb support. */
3936 if (TARGET_INTERWORK
3937 && !TARGET_BPABI
3938 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3940 warning (0, "target CPU does not support interworking" );
3941 target_flags &= ~MASK_INTERWORK;
3944 /* If soft-float is specified then don't use FPU. */
3945 if (TARGET_SOFT_FLOAT)
3946 arm_fpu_attr = FPU_NONE;
3947 else
3948 arm_fpu_attr = FPU_VFP;
3950 if (TARGET_AAPCS_BASED)
3952 if (TARGET_CALLER_INTERWORKING)
3953 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3954 else
3955 if (TARGET_CALLEE_INTERWORKING)
3956 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3959 /* __fp16 support currently assumes the core has ldrh. */
3960 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3961 sorry ("%<__fp16%> and no ldrh");
3963 if (use_cmse && !arm_arch_cmse)
3964 error ("target CPU does not support ARMv8-M Security Extensions");
3966 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3967 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3968 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3969 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3972 if (TARGET_AAPCS_BASED)
3974 if (arm_abi == ARM_ABI_IWMMXT)
3975 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3976 else if (TARGET_HARD_FLOAT_ABI)
3978 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3979 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3980 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3981 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3983 else
3984 arm_pcs_default = ARM_PCS_AAPCS;
3986 else
3988 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3989 sorry ("%<-mfloat-abi=hard%> and VFP");
3991 if (arm_abi == ARM_ABI_APCS)
3992 arm_pcs_default = ARM_PCS_APCS;
3993 else
3994 arm_pcs_default = ARM_PCS_ATPCS;
3998 /* Test whether a local function descriptor is canonical, i.e.,
3999 whether we can use GOTOFFFUNCDESC to compute the address of the
4000 function. */
4001 static bool
4002 arm_fdpic_local_funcdesc_p (rtx fnx)
4004 tree fn;
4005 enum symbol_visibility vis;
4006 bool ret;
4008 if (!TARGET_FDPIC)
4009 return true;
4011 if (! SYMBOL_REF_LOCAL_P (fnx))
4012 return false;
4014 fn = SYMBOL_REF_DECL (fnx);
4016 if (! fn)
4017 return false;
4019 vis = DECL_VISIBILITY (fn);
4021 if (vis == VISIBILITY_PROTECTED)
4022 /* Private function descriptors for protected functions are not
4023 canonical. Temporarily change the visibility to global so that
4024 we can ensure uniqueness of funcdesc pointers. */
4025 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4027 ret = default_binds_local_p_1 (fn, flag_pic);
4029 DECL_VISIBILITY (fn) = vis;
4031 return ret;
4034 static void
4035 arm_add_gc_roots (void)
4037 gcc_obstack_init(&minipool_obstack);
4038 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4041 /* A table of known ARM exception types.
4042 For use with the interrupt function attribute. */
4044 typedef struct
4046 const char *const arg;
4047 const unsigned long return_value;
4049 isr_attribute_arg;
4051 static const isr_attribute_arg isr_attribute_args [] =
4053 { "IRQ", ARM_FT_ISR },
4054 { "irq", ARM_FT_ISR },
4055 { "FIQ", ARM_FT_FIQ },
4056 { "fiq", ARM_FT_FIQ },
4057 { "ABORT", ARM_FT_ISR },
4058 { "abort", ARM_FT_ISR },
4059 { "UNDEF", ARM_FT_EXCEPTION },
4060 { "undef", ARM_FT_EXCEPTION },
4061 { "SWI", ARM_FT_EXCEPTION },
4062 { "swi", ARM_FT_EXCEPTION },
4063 { NULL, ARM_FT_NORMAL }
4066 /* Returns the (interrupt) function type of the current
4067 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4069 static unsigned long
4070 arm_isr_value (tree argument)
4072 const isr_attribute_arg * ptr;
4073 const char * arg;
4075 if (!arm_arch_notm)
4076 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4078 /* No argument - default to IRQ. */
4079 if (argument == NULL_TREE)
4080 return ARM_FT_ISR;
4082 /* Get the value of the argument. */
4083 if (TREE_VALUE (argument) == NULL_TREE
4084 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4085 return ARM_FT_UNKNOWN;
4087 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4089 /* Check it against the list of known arguments. */
4090 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4091 if (streq (arg, ptr->arg))
4092 return ptr->return_value;
4094 /* An unrecognized interrupt type. */
4095 return ARM_FT_UNKNOWN;
4098 /* Computes the type of the current function. */
4100 static unsigned long
4101 arm_compute_func_type (void)
4103 unsigned long type = ARM_FT_UNKNOWN;
4104 tree a;
4105 tree attr;
4107 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4109 /* Decide if the current function is volatile. Such functions
4110 never return, and many memory cycles can be saved by not storing
4111 register values that will never be needed again. This optimization
4112 was added to speed up context switching in a kernel application. */
4113 if (optimize > 0
4114 && (TREE_NOTHROW (current_function_decl)
4115 || !(flag_unwind_tables
4116 || (flag_exceptions
4117 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4118 && TREE_THIS_VOLATILE (current_function_decl))
4119 type |= ARM_FT_VOLATILE;
4121 if (cfun->static_chain_decl != NULL)
4122 type |= ARM_FT_NESTED;
4124 attr = DECL_ATTRIBUTES (current_function_decl);
4126 a = lookup_attribute ("naked", attr);
4127 if (a != NULL_TREE)
4128 type |= ARM_FT_NAKED;
4130 a = lookup_attribute ("isr", attr);
4131 if (a == NULL_TREE)
4132 a = lookup_attribute ("interrupt", attr);
4134 if (a == NULL_TREE)
4135 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4136 else
4137 type |= arm_isr_value (TREE_VALUE (a));
4139 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4140 type |= ARM_FT_CMSE_ENTRY;
4142 return type;
4145 /* Returns the type of the current function. */
4147 unsigned long
4148 arm_current_func_type (void)
4150 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4151 cfun->machine->func_type = arm_compute_func_type ();
4153 return cfun->machine->func_type;
4156 bool
4157 arm_allocate_stack_slots_for_args (void)
4159 /* Naked functions should not allocate stack slots for arguments. */
4160 return !IS_NAKED (arm_current_func_type ());
4163 static bool
4164 arm_warn_func_return (tree decl)
4166 /* Naked functions are implemented entirely in assembly, including the
4167 return sequence, so suppress warnings about this. */
4168 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4172 /* Output assembler code for a block containing the constant parts
4173 of a trampoline, leaving space for the variable parts.
4175 On the ARM, (if r8 is the static chain regnum, and remembering that
4176 referencing pc adds an offset of 8) the trampoline looks like:
4177 ldr r8, [pc, #0]
4178 ldr pc, [pc]
4179 .word static chain value
4180 .word function's address
4181 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4183 In FDPIC mode, the trampoline looks like:
4184 .word trampoline address
4185 .word trampoline GOT address
4186 ldr r12, [pc, #8] ; #4 for Arm mode
4187 ldr r9, [pc, #8] ; #4 for Arm mode
4188 ldr pc, [pc, #8] ; #4 for Arm mode
4189 .word static chain value
4190 .word GOT address
4191 .word function's address
4194 static void
4195 arm_asm_trampoline_template (FILE *f)
4197 fprintf (f, "\t.syntax unified\n");
4199 if (TARGET_FDPIC)
4201 /* The first two words are a function descriptor pointing to the
4202 trampoline code just below. */
4203 if (TARGET_ARM)
4204 fprintf (f, "\t.arm\n");
4205 else if (TARGET_THUMB2)
4206 fprintf (f, "\t.thumb\n");
4207 else
4208 /* Only ARM and Thumb-2 are supported. */
4209 gcc_unreachable ();
4211 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4212 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4213 /* Trampoline code which sets the static chain register but also
4214 PIC register before jumping into real code. */
4215 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4216 STATIC_CHAIN_REGNUM, PC_REGNUM,
4217 TARGET_THUMB2 ? 8 : 4);
4218 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4219 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4220 TARGET_THUMB2 ? 8 : 4);
4221 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4222 PC_REGNUM, PC_REGNUM,
4223 TARGET_THUMB2 ? 8 : 4);
4224 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4226 else if (TARGET_ARM)
4228 fprintf (f, "\t.arm\n");
4229 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4230 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4232 else if (TARGET_THUMB2)
4234 fprintf (f, "\t.thumb\n");
4235 /* The Thumb-2 trampoline is similar to the arm implementation.
4236 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4237 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4238 STATIC_CHAIN_REGNUM, PC_REGNUM);
4239 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4241 else
4243 ASM_OUTPUT_ALIGN (f, 2);
4244 fprintf (f, "\t.code\t16\n");
4245 fprintf (f, ".Ltrampoline_start:\n");
4246 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4247 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4248 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4249 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4250 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4251 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4253 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4254 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4257 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4259 static void
4260 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4262 rtx fnaddr, mem, a_tramp;
4264 emit_block_move (m_tramp, assemble_trampoline_template (),
4265 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4267 if (TARGET_FDPIC)
4269 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4270 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4271 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4272 /* The function start address is at offset 8, but in Thumb mode
4273 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4274 below. */
4275 rtx trampoline_code_start
4276 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4278 /* Write initial funcdesc which points to the trampoline. */
4279 mem = adjust_address (m_tramp, SImode, 0);
4280 emit_move_insn (mem, trampoline_code_start);
4281 mem = adjust_address (m_tramp, SImode, 4);
4282 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4283 /* Setup static chain. */
4284 mem = adjust_address (m_tramp, SImode, 20);
4285 emit_move_insn (mem, chain_value);
4286 /* GOT + real function entry point. */
4287 mem = adjust_address (m_tramp, SImode, 24);
4288 emit_move_insn (mem, gotaddr);
4289 mem = adjust_address (m_tramp, SImode, 28);
4290 emit_move_insn (mem, fnaddr);
4292 else
4294 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4295 emit_move_insn (mem, chain_value);
4297 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4298 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4299 emit_move_insn (mem, fnaddr);
4302 a_tramp = XEXP (m_tramp, 0);
4303 maybe_emit_call_builtin___clear_cache (a_tramp,
4304 plus_constant (ptr_mode,
4305 a_tramp,
4306 TRAMPOLINE_SIZE));
4309 /* Thumb trampolines should be entered in thumb mode, so set
4310 the bottom bit of the address. */
4312 static rtx
4313 arm_trampoline_adjust_address (rtx addr)
4315 /* For FDPIC don't fix trampoline address since it's a function
4316 descriptor and not a function address. */
4317 if (TARGET_THUMB && !TARGET_FDPIC)
4318 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4319 NULL, 0, OPTAB_LIB_WIDEN);
4320 return addr;
4323 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4324 includes call-clobbered registers too. If this is a leaf function
4325 we can just examine the registers used by the RTL, but otherwise we
4326 have to assume that whatever function is called might clobber
4327 anything, and so we have to save all the call-clobbered registers
4328 as well. */
4329 static inline bool reg_needs_saving_p (unsigned reg)
4331 unsigned long func_type = arm_current_func_type ();
4333 if (IS_INTERRUPT (func_type))
4334 if (df_regs_ever_live_p (reg)
4335 /* Save call-clobbered core registers. */
4336 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4337 return true;
4338 else
4339 return false;
4340 else
4341 if (!df_regs_ever_live_p (reg)
4342 || call_used_or_fixed_reg_p (reg))
4343 return false;
4344 else
4345 return true;
4348 /* Return 1 if it is possible to return using a single instruction.
4349 If SIBLING is non-null, this is a test for a return before a sibling
4350 call. SIBLING is the call insn, so we can examine its register usage. */
4353 use_return_insn (int iscond, rtx sibling)
4355 int regno;
4356 unsigned int func_type;
4357 unsigned long saved_int_regs;
4358 unsigned HOST_WIDE_INT stack_adjust;
4359 arm_stack_offsets *offsets;
4361 /* Never use a return instruction before reload has run. */
4362 if (!reload_completed)
4363 return 0;
4365 /* Never use a return instruction when return address signing
4366 mechanism is enabled as it requires more than one
4367 instruction. */
4368 if (arm_current_function_pac_enabled_p ())
4369 return 0;
4371 func_type = arm_current_func_type ();
4373 /* Naked, volatile and stack alignment functions need special
4374 consideration. */
4375 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4376 return 0;
4378 /* So do interrupt functions that use the frame pointer and Thumb
4379 interrupt functions. */
4380 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4381 return 0;
4383 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4384 && !optimize_function_for_size_p (cfun))
4385 return 0;
4387 offsets = arm_get_frame_offsets ();
4388 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4390 /* As do variadic functions. */
4391 if (crtl->args.pretend_args_size
4392 || cfun->machine->uses_anonymous_args
4393 /* Or if the function calls __builtin_eh_return () */
4394 || crtl->calls_eh_return
4395 /* Or if the function calls alloca */
4396 || cfun->calls_alloca
4397 /* Or if there is a stack adjustment. However, if the stack pointer
4398 is saved on the stack, we can use a pre-incrementing stack load. */
4399 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4400 && stack_adjust == 4))
4401 /* Or if the static chain register was saved above the frame, under the
4402 assumption that the stack pointer isn't saved on the stack. */
4403 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4404 && arm_compute_static_chain_stack_bytes() != 0))
4405 return 0;
4407 saved_int_regs = offsets->saved_regs_mask;
4409 /* Unfortunately, the insn
4411 ldmib sp, {..., sp, ...}
4413 triggers a bug on most SA-110 based devices, such that the stack
4414 pointer won't be correctly restored if the instruction takes a
4415 page fault. We work around this problem by popping r3 along with
4416 the other registers, since that is never slower than executing
4417 another instruction.
4419 We test for !arm_arch5t here, because code for any architecture
4420 less than this could potentially be run on one of the buggy
4421 chips. */
4422 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4424 /* Validate that r3 is a call-clobbered register (always true in
4425 the default abi) ... */
4426 if (!call_used_or_fixed_reg_p (3))
4427 return 0;
4429 /* ... that it isn't being used for a return value ... */
4430 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4431 return 0;
4433 /* ... or for a tail-call argument ... */
4434 if (sibling)
4436 gcc_assert (CALL_P (sibling));
4438 if (find_regno_fusage (sibling, USE, 3))
4439 return 0;
4442 /* ... and that there are no call-saved registers in r0-r2
4443 (always true in the default ABI). */
4444 if (saved_int_regs & 0x7)
4445 return 0;
4448 /* Can't be done if interworking with Thumb, and any registers have been
4449 stacked. */
4450 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4451 return 0;
4453 /* On StrongARM, conditional returns are expensive if they aren't
4454 taken and multiple registers have been stacked. */
4455 if (iscond && arm_tune_strongarm)
4457 /* Conditional return when just the LR is stored is a simple
4458 conditional-load instruction, that's not expensive. */
4459 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4460 return 0;
4462 if (flag_pic
4463 && arm_pic_register != INVALID_REGNUM
4464 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4465 return 0;
4468 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4469 several instructions if anything needs to be popped. Armv8.1-M Mainline
4470 also needs several instructions to save and restore FP context. */
4471 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4472 return 0;
4474 /* If there are saved registers but the LR isn't saved, then we need
4475 two instructions for the return. */
4476 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4477 return 0;
4479 /* Can't be done if any of the VFP regs are pushed,
4480 since this also requires an insn. */
4481 if (TARGET_VFP_BASE)
4482 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4483 if (reg_needs_saving_p (regno))
4484 return 0;
4486 if (TARGET_REALLY_IWMMXT)
4487 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4488 if (reg_needs_saving_p (regno))
4489 return 0;
4491 return 1;
4494 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4495 shrink-wrapping if possible. This is the case if we need to emit a
4496 prologue, which we can test by looking at the offsets. */
4497 bool
4498 use_simple_return_p (void)
4500 arm_stack_offsets *offsets;
4502 /* Note this function can be called before or after reload. */
4503 if (!reload_completed)
4504 arm_compute_frame_layout ();
4506 offsets = arm_get_frame_offsets ();
4507 return offsets->outgoing_args != 0;
4510 /* Return TRUE if int I is a valid immediate ARM constant. */
4513 const_ok_for_arm (HOST_WIDE_INT i)
4515 int lowbit;
4517 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4518 be all zero, or all one. */
4519 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4520 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4521 != ((~(unsigned HOST_WIDE_INT) 0)
4522 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4523 return FALSE;
4525 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4527 /* Fast return for 0 and small values. We must do this for zero, since
4528 the code below can't handle that one case. */
4529 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4530 return TRUE;
4532 /* Get the number of trailing zeros. */
4533 lowbit = ffs((int) i) - 1;
4535 /* Only even shifts are allowed in ARM mode so round down to the
4536 nearest even number. */
4537 if (TARGET_ARM)
4538 lowbit &= ~1;
4540 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4541 return TRUE;
4543 if (TARGET_ARM)
4545 /* Allow rotated constants in ARM mode. */
4546 if (lowbit <= 4
4547 && ((i & ~0xc000003f) == 0
4548 || (i & ~0xf000000f) == 0
4549 || (i & ~0xfc000003) == 0))
4550 return TRUE;
4552 else if (TARGET_THUMB2)
4554 HOST_WIDE_INT v;
4556 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4557 v = i & 0xff;
4558 v |= v << 16;
4559 if (i == v || i == (v | (v << 8)))
4560 return TRUE;
4562 /* Allow repeated pattern 0xXY00XY00. */
4563 v = i & 0xff00;
4564 v |= v << 16;
4565 if (i == v)
4566 return TRUE;
4568 else if (TARGET_HAVE_MOVT)
4570 /* Thumb-1 Targets with MOVT. */
4571 if (i > 0xffff)
4572 return FALSE;
4573 else
4574 return TRUE;
4577 return FALSE;
4580 /* Return true if I is a valid constant for the operation CODE. */
4582 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4584 if (const_ok_for_arm (i))
4585 return 1;
4587 switch (code)
4589 case SET:
4590 /* See if we can use movw. */
4591 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4592 return 1;
4593 else
4594 /* Otherwise, try mvn. */
4595 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4597 case PLUS:
4598 /* See if we can use addw or subw. */
4599 if (TARGET_THUMB2
4600 && ((i & 0xfffff000) == 0
4601 || ((-i) & 0xfffff000) == 0))
4602 return 1;
4603 /* Fall through. */
4604 case COMPARE:
4605 case EQ:
4606 case NE:
4607 case GT:
4608 case LE:
4609 case LT:
4610 case GE:
4611 case GEU:
4612 case LTU:
4613 case GTU:
4614 case LEU:
4615 case UNORDERED:
4616 case ORDERED:
4617 case UNEQ:
4618 case UNGE:
4619 case UNLT:
4620 case UNGT:
4621 case UNLE:
4622 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4624 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4625 case XOR:
4626 return 0;
4628 case IOR:
4629 if (TARGET_THUMB2)
4630 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4631 return 0;
4633 case AND:
4634 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4636 default:
4637 gcc_unreachable ();
4641 /* Return true if I is a valid di mode constant for the operation CODE. */
4643 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4645 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4646 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4647 rtx hi = GEN_INT (hi_val);
4648 rtx lo = GEN_INT (lo_val);
4650 if (TARGET_THUMB1)
4651 return 0;
4653 switch (code)
4655 case AND:
4656 case IOR:
4657 case XOR:
4658 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4659 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4660 case PLUS:
4661 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4663 default:
4664 return 0;
4668 /* Emit a sequence of insns to handle a large constant.
4669 CODE is the code of the operation required, it can be any of SET, PLUS,
4670 IOR, AND, XOR, MINUS;
4671 MODE is the mode in which the operation is being performed;
4672 VAL is the integer to operate on;
4673 SOURCE is the other operand (a register, or a null-pointer for SET);
4674 SUBTARGETS means it is safe to create scratch registers if that will
4675 either produce a simpler sequence, or we will want to cse the values.
4676 Return value is the number of insns emitted. */
4678 /* ??? Tweak this for thumb2. */
4680 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4681 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4683 rtx cond;
4685 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4686 cond = COND_EXEC_TEST (PATTERN (insn));
4687 else
4688 cond = NULL_RTX;
4690 if (subtargets || code == SET
4691 || (REG_P (target) && REG_P (source)
4692 && REGNO (target) != REGNO (source)))
4694 /* After arm_reorg has been called, we can't fix up expensive
4695 constants by pushing them into memory so we must synthesize
4696 them in-line, regardless of the cost. This is only likely to
4697 be more costly on chips that have load delay slots and we are
4698 compiling without running the scheduler (so no splitting
4699 occurred before the final instruction emission).
4701 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4703 if (!cfun->machine->after_arm_reorg
4704 && !cond
4705 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4706 1, 0)
4707 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4708 + (code != SET))))
4710 if (code == SET)
4712 /* Currently SET is the only monadic value for CODE, all
4713 the rest are diadic. */
4714 if (TARGET_USE_MOVT)
4715 arm_emit_movpair (target, GEN_INT (val));
4716 else
4717 emit_set_insn (target, GEN_INT (val));
4719 return 1;
4721 else
4723 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4725 if (TARGET_USE_MOVT)
4726 arm_emit_movpair (temp, GEN_INT (val));
4727 else
4728 emit_set_insn (temp, GEN_INT (val));
4730 /* For MINUS, the value is subtracted from, since we never
4731 have subtraction of a constant. */
4732 if (code == MINUS)
4733 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4734 else
4735 emit_set_insn (target,
4736 gen_rtx_fmt_ee (code, mode, source, temp));
4737 return 2;
4742 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4746 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4747 ARM/THUMB2 immediates, and add up to VAL.
4748 Thr function return value gives the number of insns required. */
4749 static int
4750 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4751 struct four_ints *return_sequence)
4753 int best_consecutive_zeros = 0;
4754 int i;
4755 int best_start = 0;
4756 int insns1, insns2;
4757 struct four_ints tmp_sequence;
4759 /* If we aren't targeting ARM, the best place to start is always at
4760 the bottom, otherwise look more closely. */
4761 if (TARGET_ARM)
4763 for (i = 0; i < 32; i += 2)
4765 int consecutive_zeros = 0;
4767 if (!(val & (3 << i)))
4769 while ((i < 32) && !(val & (3 << i)))
4771 consecutive_zeros += 2;
4772 i += 2;
4774 if (consecutive_zeros > best_consecutive_zeros)
4776 best_consecutive_zeros = consecutive_zeros;
4777 best_start = i - consecutive_zeros;
4779 i -= 2;
4784 /* So long as it won't require any more insns to do so, it's
4785 desirable to emit a small constant (in bits 0...9) in the last
4786 insn. This way there is more chance that it can be combined with
4787 a later addressing insn to form a pre-indexed load or store
4788 operation. Consider:
4790 *((volatile int *)0xe0000100) = 1;
4791 *((volatile int *)0xe0000110) = 2;
4793 We want this to wind up as:
4795 mov rA, #0xe0000000
4796 mov rB, #1
4797 str rB, [rA, #0x100]
4798 mov rB, #2
4799 str rB, [rA, #0x110]
4801 rather than having to synthesize both large constants from scratch.
4803 Therefore, we calculate how many insns would be required to emit
4804 the constant starting from `best_start', and also starting from
4805 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4806 yield a shorter sequence, we may as well use zero. */
4807 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4808 if (best_start != 0
4809 && ((HOST_WIDE_INT_1U << best_start) < val))
4811 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4812 if (insns2 <= insns1)
4814 *return_sequence = tmp_sequence;
4815 insns1 = insns2;
4819 return insns1;
4822 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4823 static int
4824 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4825 struct four_ints *return_sequence, int i)
4827 int remainder = val & 0xffffffff;
4828 int insns = 0;
4830 /* Try and find a way of doing the job in either two or three
4831 instructions.
4833 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4834 location. We start at position I. This may be the MSB, or
4835 optimial_immediate_sequence may have positioned it at the largest block
4836 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4837 wrapping around to the top of the word when we drop off the bottom.
4838 In the worst case this code should produce no more than four insns.
4840 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4841 constants, shifted to any arbitrary location. We should always start
4842 at the MSB. */
4845 int end;
4846 unsigned int b1, b2, b3, b4;
4847 unsigned HOST_WIDE_INT result;
4848 int loc;
4850 gcc_assert (insns < 4);
4852 if (i <= 0)
4853 i += 32;
4855 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4856 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4858 loc = i;
4859 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4860 /* We can use addw/subw for the last 12 bits. */
4861 result = remainder;
4862 else
4864 /* Use an 8-bit shifted/rotated immediate. */
4865 end = i - 8;
4866 if (end < 0)
4867 end += 32;
4868 result = remainder & ((0x0ff << end)
4869 | ((i < end) ? (0xff >> (32 - end))
4870 : 0));
4871 i -= 8;
4874 else
4876 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4877 arbitrary shifts. */
4878 i -= TARGET_ARM ? 2 : 1;
4879 continue;
4882 /* Next, see if we can do a better job with a thumb2 replicated
4883 constant.
4885 We do it this way around to catch the cases like 0x01F001E0 where
4886 two 8-bit immediates would work, but a replicated constant would
4887 make it worse.
4889 TODO: 16-bit constants that don't clear all the bits, but still win.
4890 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4891 if (TARGET_THUMB2)
4893 b1 = (remainder & 0xff000000) >> 24;
4894 b2 = (remainder & 0x00ff0000) >> 16;
4895 b3 = (remainder & 0x0000ff00) >> 8;
4896 b4 = remainder & 0xff;
4898 if (loc > 24)
4900 /* The 8-bit immediate already found clears b1 (and maybe b2),
4901 but must leave b3 and b4 alone. */
4903 /* First try to find a 32-bit replicated constant that clears
4904 almost everything. We can assume that we can't do it in one,
4905 or else we wouldn't be here. */
4906 unsigned int tmp = b1 & b2 & b3 & b4;
4907 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4908 + (tmp << 24);
4909 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4910 + (tmp == b3) + (tmp == b4);
4911 if (tmp
4912 && (matching_bytes >= 3
4913 || (matching_bytes == 2
4914 && const_ok_for_op (remainder & ~tmp2, code))))
4916 /* At least 3 of the bytes match, and the fourth has at
4917 least as many bits set, or two of the bytes match
4918 and it will only require one more insn to finish. */
4919 result = tmp2;
4920 i = tmp != b1 ? 32
4921 : tmp != b2 ? 24
4922 : tmp != b3 ? 16
4923 : 8;
4926 /* Second, try to find a 16-bit replicated constant that can
4927 leave three of the bytes clear. If b2 or b4 is already
4928 zero, then we can. If the 8-bit from above would not
4929 clear b2 anyway, then we still win. */
4930 else if (b1 == b3 && (!b2 || !b4
4931 || (remainder & 0x00ff0000 & ~result)))
4933 result = remainder & 0xff00ff00;
4934 i = 24;
4937 else if (loc > 16)
4939 /* The 8-bit immediate already found clears b2 (and maybe b3)
4940 and we don't get here unless b1 is alredy clear, but it will
4941 leave b4 unchanged. */
4943 /* If we can clear b2 and b4 at once, then we win, since the
4944 8-bits couldn't possibly reach that far. */
4945 if (b2 == b4)
4947 result = remainder & 0x00ff00ff;
4948 i = 16;
4953 return_sequence->i[insns++] = result;
4954 remainder &= ~result;
4956 if (code == SET || code == MINUS)
4957 code = PLUS;
4959 while (remainder);
4961 return insns;
4964 /* Emit an instruction with the indicated PATTERN. If COND is
4965 non-NULL, conditionalize the execution of the instruction on COND
4966 being true. */
4968 static void
4969 emit_constant_insn (rtx cond, rtx pattern)
4971 if (cond)
4972 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4973 emit_insn (pattern);
4976 /* As above, but extra parameter GENERATE which, if clear, suppresses
4977 RTL generation. */
4979 static int
4980 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4981 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4982 int subtargets, int generate)
4984 int can_invert = 0;
4985 int can_negate = 0;
4986 int final_invert = 0;
4987 int i;
4988 int set_sign_bit_copies = 0;
4989 int clear_sign_bit_copies = 0;
4990 int clear_zero_bit_copies = 0;
4991 int set_zero_bit_copies = 0;
4992 int insns = 0, neg_insns, inv_insns;
4993 unsigned HOST_WIDE_INT temp1, temp2;
4994 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4995 struct four_ints *immediates;
4996 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4998 /* Find out which operations are safe for a given CODE. Also do a quick
4999 check for degenerate cases; these can occur when DImode operations
5000 are split. */
5001 switch (code)
5003 case SET:
5004 can_invert = 1;
5005 break;
5007 case PLUS:
5008 can_negate = 1;
5009 break;
5011 case IOR:
5012 if (remainder == 0xffffffff)
5014 if (generate)
5015 emit_constant_insn (cond,
5016 gen_rtx_SET (target,
5017 GEN_INT (ARM_SIGN_EXTEND (val))));
5018 return 1;
5021 if (remainder == 0)
5023 if (reload_completed && rtx_equal_p (target, source))
5024 return 0;
5026 if (generate)
5027 emit_constant_insn (cond, gen_rtx_SET (target, source));
5028 return 1;
5030 break;
5032 case AND:
5033 if (remainder == 0)
5035 if (generate)
5036 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5037 return 1;
5039 if (remainder == 0xffffffff)
5041 if (reload_completed && rtx_equal_p (target, source))
5042 return 0;
5043 if (generate)
5044 emit_constant_insn (cond, gen_rtx_SET (target, source));
5045 return 1;
5047 can_invert = 1;
5048 break;
5050 case XOR:
5051 if (remainder == 0)
5053 if (reload_completed && rtx_equal_p (target, source))
5054 return 0;
5055 if (generate)
5056 emit_constant_insn (cond, gen_rtx_SET (target, source));
5057 return 1;
5060 if (remainder == 0xffffffff)
5062 if (generate)
5063 emit_constant_insn (cond,
5064 gen_rtx_SET (target,
5065 gen_rtx_NOT (mode, source)));
5066 return 1;
5068 final_invert = 1;
5069 break;
5071 case MINUS:
5072 /* We treat MINUS as (val - source), since (source - val) is always
5073 passed as (source + (-val)). */
5074 if (remainder == 0)
5076 if (generate)
5077 emit_constant_insn (cond,
5078 gen_rtx_SET (target,
5079 gen_rtx_NEG (mode, source)));
5080 return 1;
5082 if (const_ok_for_arm (val))
5084 if (generate)
5085 emit_constant_insn (cond,
5086 gen_rtx_SET (target,
5087 gen_rtx_MINUS (mode, GEN_INT (val),
5088 source)));
5089 return 1;
5092 break;
5094 default:
5095 gcc_unreachable ();
5098 /* If we can do it in one insn get out quickly. */
5099 if (const_ok_for_op (val, code))
5101 if (generate)
5102 emit_constant_insn (cond,
5103 gen_rtx_SET (target,
5104 (source
5105 ? gen_rtx_fmt_ee (code, mode, source,
5106 GEN_INT (val))
5107 : GEN_INT (val))));
5108 return 1;
5111 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5112 insn. */
5113 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5114 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5116 if (generate)
5118 if (mode == SImode && i == 16)
5119 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5120 smaller insn. */
5121 emit_constant_insn (cond,
5122 gen_zero_extendhisi2
5123 (target, gen_lowpart (HImode, source)));
5124 else
5125 /* Extz only supports SImode, but we can coerce the operands
5126 into that mode. */
5127 emit_constant_insn (cond,
5128 gen_extzv_t2 (gen_lowpart (SImode, target),
5129 gen_lowpart (SImode, source),
5130 GEN_INT (i), const0_rtx));
5133 return 1;
5136 /* Calculate a few attributes that may be useful for specific
5137 optimizations. */
5138 /* Count number of leading zeros. */
5139 for (i = 31; i >= 0; i--)
5141 if ((remainder & (1 << i)) == 0)
5142 clear_sign_bit_copies++;
5143 else
5144 break;
5147 /* Count number of leading 1's. */
5148 for (i = 31; i >= 0; i--)
5150 if ((remainder & (1 << i)) != 0)
5151 set_sign_bit_copies++;
5152 else
5153 break;
5156 /* Count number of trailing zero's. */
5157 for (i = 0; i <= 31; i++)
5159 if ((remainder & (1 << i)) == 0)
5160 clear_zero_bit_copies++;
5161 else
5162 break;
5165 /* Count number of trailing 1's. */
5166 for (i = 0; i <= 31; i++)
5168 if ((remainder & (1 << i)) != 0)
5169 set_zero_bit_copies++;
5170 else
5171 break;
5174 switch (code)
5176 case SET:
5177 /* See if we can do this by sign_extending a constant that is known
5178 to be negative. This is a good, way of doing it, since the shift
5179 may well merge into a subsequent insn. */
5180 if (set_sign_bit_copies > 1)
5182 if (const_ok_for_arm
5183 (temp1 = ARM_SIGN_EXTEND (remainder
5184 << (set_sign_bit_copies - 1))))
5186 if (generate)
5188 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5189 emit_constant_insn (cond,
5190 gen_rtx_SET (new_src, GEN_INT (temp1)));
5191 emit_constant_insn (cond,
5192 gen_ashrsi3 (target, new_src,
5193 GEN_INT (set_sign_bit_copies - 1)));
5195 return 2;
5197 /* For an inverted constant, we will need to set the low bits,
5198 these will be shifted out of harm's way. */
5199 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5200 if (const_ok_for_arm (~temp1))
5202 if (generate)
5204 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5205 emit_constant_insn (cond,
5206 gen_rtx_SET (new_src, GEN_INT (temp1)));
5207 emit_constant_insn (cond,
5208 gen_ashrsi3 (target, new_src,
5209 GEN_INT (set_sign_bit_copies - 1)));
5211 return 2;
5215 /* See if we can calculate the value as the difference between two
5216 valid immediates. */
5217 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5219 int topshift = clear_sign_bit_copies & ~1;
5221 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5222 & (0xff000000 >> topshift));
5224 /* If temp1 is zero, then that means the 9 most significant
5225 bits of remainder were 1 and we've caused it to overflow.
5226 When topshift is 0 we don't need to do anything since we
5227 can borrow from 'bit 32'. */
5228 if (temp1 == 0 && topshift != 0)
5229 temp1 = 0x80000000 >> (topshift - 1);
5231 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5233 if (const_ok_for_arm (temp2))
5235 if (generate)
5237 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5238 emit_constant_insn (cond,
5239 gen_rtx_SET (new_src, GEN_INT (temp1)));
5240 emit_constant_insn (cond,
5241 gen_addsi3 (target, new_src,
5242 GEN_INT (-temp2)));
5245 return 2;
5249 /* See if we can generate this by setting the bottom (or the top)
5250 16 bits, and then shifting these into the other half of the
5251 word. We only look for the simplest cases, to do more would cost
5252 too much. Be careful, however, not to generate this when the
5253 alternative would take fewer insns. */
5254 if (val & 0xffff0000)
5256 temp1 = remainder & 0xffff0000;
5257 temp2 = remainder & 0x0000ffff;
5259 /* Overlaps outside this range are best done using other methods. */
5260 for (i = 9; i < 24; i++)
5262 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5263 && !const_ok_for_arm (temp2))
5265 rtx new_src = (subtargets
5266 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5267 : target);
5268 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5269 source, subtargets, generate);
5270 source = new_src;
5271 if (generate)
5272 emit_constant_insn
5273 (cond,
5274 gen_rtx_SET
5275 (target,
5276 gen_rtx_IOR (mode,
5277 gen_rtx_ASHIFT (mode, source,
5278 GEN_INT (i)),
5279 source)));
5280 return insns + 1;
5284 /* Don't duplicate cases already considered. */
5285 for (i = 17; i < 24; i++)
5287 if (((temp1 | (temp1 >> i)) == remainder)
5288 && !const_ok_for_arm (temp1))
5290 rtx new_src = (subtargets
5291 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5292 : target);
5293 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5294 source, subtargets, generate);
5295 source = new_src;
5296 if (generate)
5297 emit_constant_insn
5298 (cond,
5299 gen_rtx_SET (target,
5300 gen_rtx_IOR
5301 (mode,
5302 gen_rtx_LSHIFTRT (mode, source,
5303 GEN_INT (i)),
5304 source)));
5305 return insns + 1;
5309 break;
5311 case IOR:
5312 case XOR:
5313 /* If we have IOR or XOR, and the constant can be loaded in a
5314 single instruction, and we can find a temporary to put it in,
5315 then this can be done in two instructions instead of 3-4. */
5316 if (subtargets
5317 /* TARGET can't be NULL if SUBTARGETS is 0 */
5318 || (reload_completed && !reg_mentioned_p (target, source)))
5320 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5322 if (generate)
5324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5326 emit_constant_insn (cond,
5327 gen_rtx_SET (sub, GEN_INT (val)));
5328 emit_constant_insn (cond,
5329 gen_rtx_SET (target,
5330 gen_rtx_fmt_ee (code, mode,
5331 source, sub)));
5333 return 2;
5337 if (code == XOR)
5338 break;
5340 /* Convert.
5341 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5342 and the remainder 0s for e.g. 0xfff00000)
5343 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5345 This can be done in 2 instructions by using shifts with mov or mvn.
5346 e.g. for
5347 x = x | 0xfff00000;
5348 we generate.
5349 mvn r0, r0, asl #12
5350 mvn r0, r0, lsr #12 */
5351 if (set_sign_bit_copies > 8
5352 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5354 if (generate)
5356 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5357 rtx shift = GEN_INT (set_sign_bit_copies);
5359 emit_constant_insn
5360 (cond,
5361 gen_rtx_SET (sub,
5362 gen_rtx_NOT (mode,
5363 gen_rtx_ASHIFT (mode,
5364 source,
5365 shift))));
5366 emit_constant_insn
5367 (cond,
5368 gen_rtx_SET (target,
5369 gen_rtx_NOT (mode,
5370 gen_rtx_LSHIFTRT (mode, sub,
5371 shift))));
5373 return 2;
5376 /* Convert
5377 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5379 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5381 For eg. r0 = r0 | 0xfff
5382 mvn r0, r0, lsr #12
5383 mvn r0, r0, asl #12
5386 if (set_zero_bit_copies > 8
5387 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5389 if (generate)
5391 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5392 rtx shift = GEN_INT (set_zero_bit_copies);
5394 emit_constant_insn
5395 (cond,
5396 gen_rtx_SET (sub,
5397 gen_rtx_NOT (mode,
5398 gen_rtx_LSHIFTRT (mode,
5399 source,
5400 shift))));
5401 emit_constant_insn
5402 (cond,
5403 gen_rtx_SET (target,
5404 gen_rtx_NOT (mode,
5405 gen_rtx_ASHIFT (mode, sub,
5406 shift))));
5408 return 2;
5411 /* This will never be reached for Thumb2 because orn is a valid
5412 instruction. This is for Thumb1 and the ARM 32 bit cases.
5414 x = y | constant (such that ~constant is a valid constant)
5415 Transform this to
5416 x = ~(~y & ~constant).
5418 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5420 if (generate)
5422 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5423 emit_constant_insn (cond,
5424 gen_rtx_SET (sub,
5425 gen_rtx_NOT (mode, source)));
5426 source = sub;
5427 if (subtargets)
5428 sub = gen_reg_rtx (mode);
5429 emit_constant_insn (cond,
5430 gen_rtx_SET (sub,
5431 gen_rtx_AND (mode, source,
5432 GEN_INT (temp1))));
5433 emit_constant_insn (cond,
5434 gen_rtx_SET (target,
5435 gen_rtx_NOT (mode, sub)));
5437 return 3;
5439 break;
5441 case AND:
5442 /* See if two shifts will do 2 or more insn's worth of work. */
5443 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5445 HOST_WIDE_INT shift_mask = ((0xffffffff
5446 << (32 - clear_sign_bit_copies))
5447 & 0xffffffff);
5449 if ((remainder | shift_mask) != 0xffffffff)
5451 HOST_WIDE_INT new_val
5452 = ARM_SIGN_EXTEND (remainder | shift_mask);
5454 if (generate)
5456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5457 insns = arm_gen_constant (AND, SImode, cond, new_val,
5458 new_src, source, subtargets, 1);
5459 source = new_src;
5461 else
5463 rtx targ = subtargets ? NULL_RTX : target;
5464 insns = arm_gen_constant (AND, mode, cond, new_val,
5465 targ, source, subtargets, 0);
5469 if (generate)
5471 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5472 rtx shift = GEN_INT (clear_sign_bit_copies);
5474 emit_insn (gen_ashlsi3 (new_src, source, shift));
5475 emit_insn (gen_lshrsi3 (target, new_src, shift));
5478 return insns + 2;
5481 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5483 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5485 if ((remainder | shift_mask) != 0xffffffff)
5487 HOST_WIDE_INT new_val
5488 = ARM_SIGN_EXTEND (remainder | shift_mask);
5489 if (generate)
5491 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5493 insns = arm_gen_constant (AND, mode, cond, new_val,
5494 new_src, source, subtargets, 1);
5495 source = new_src;
5497 else
5499 rtx targ = subtargets ? NULL_RTX : target;
5501 insns = arm_gen_constant (AND, mode, cond, new_val,
5502 targ, source, subtargets, 0);
5506 if (generate)
5508 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5509 rtx shift = GEN_INT (clear_zero_bit_copies);
5511 emit_insn (gen_lshrsi3 (new_src, source, shift));
5512 emit_insn (gen_ashlsi3 (target, new_src, shift));
5515 return insns + 2;
5518 break;
5520 default:
5521 break;
5524 /* Calculate what the instruction sequences would be if we generated it
5525 normally, negated, or inverted. */
5526 if (code == AND)
5527 /* AND cannot be split into multiple insns, so invert and use BIC. */
5528 insns = 99;
5529 else
5530 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5532 if (can_negate)
5533 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5534 &neg_immediates);
5535 else
5536 neg_insns = 99;
5538 if (can_invert || final_invert)
5539 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5540 &inv_immediates);
5541 else
5542 inv_insns = 99;
5544 immediates = &pos_immediates;
5546 /* Is the negated immediate sequence more efficient? */
5547 if (neg_insns < insns && neg_insns <= inv_insns)
5549 insns = neg_insns;
5550 immediates = &neg_immediates;
5552 else
5553 can_negate = 0;
5555 /* Is the inverted immediate sequence more efficient?
5556 We must allow for an extra NOT instruction for XOR operations, although
5557 there is some chance that the final 'mvn' will get optimized later. */
5558 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5560 insns = inv_insns;
5561 immediates = &inv_immediates;
5563 else
5565 can_invert = 0;
5566 final_invert = 0;
5569 /* Now output the chosen sequence as instructions. */
5570 if (generate)
5572 for (i = 0; i < insns; i++)
5574 rtx new_src, temp1_rtx;
5576 temp1 = immediates->i[i];
5578 if (code == SET || code == MINUS)
5579 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5580 else if ((final_invert || i < (insns - 1)) && subtargets)
5581 new_src = gen_reg_rtx (mode);
5582 else
5583 new_src = target;
5585 if (can_invert)
5586 temp1 = ~temp1;
5587 else if (can_negate)
5588 temp1 = -temp1;
5590 temp1 = trunc_int_for_mode (temp1, mode);
5591 temp1_rtx = GEN_INT (temp1);
5593 if (code == SET)
5595 else if (code == MINUS)
5596 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5597 else
5598 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5600 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5601 source = new_src;
5603 if (code == SET)
5605 can_negate = can_invert;
5606 can_invert = 0;
5607 code = PLUS;
5609 else if (code == MINUS)
5610 code = PLUS;
5614 if (final_invert)
5616 if (generate)
5617 emit_constant_insn (cond, gen_rtx_SET (target,
5618 gen_rtx_NOT (mode, source)));
5619 insns++;
5622 return insns;
5625 /* Return TRUE if op is a constant where both the low and top words are
5626 suitable for RSB/RSC instructions. This is never true for Thumb, since
5627 we do not have RSC in that case. */
5628 static bool
5629 arm_const_double_prefer_rsbs_rsc (rtx op)
5631 /* Thumb lacks RSC, so we never prefer that sequence. */
5632 if (TARGET_THUMB || !CONST_INT_P (op))
5633 return false;
5634 HOST_WIDE_INT hi, lo;
5635 lo = UINTVAL (op) & 0xffffffffULL;
5636 hi = UINTVAL (op) >> 32;
5637 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5640 /* Canonicalize a comparison so that we are more likely to recognize it.
5641 This can be done for a few constant compares, where we can make the
5642 immediate value easier to load. */
5644 static void
5645 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5646 bool op0_preserve_value)
5648 machine_mode mode;
5649 unsigned HOST_WIDE_INT i, maxval;
5651 mode = GET_MODE (*op0);
5652 if (mode == VOIDmode)
5653 mode = GET_MODE (*op1);
5655 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5657 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5658 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5659 either reversed or (for constant OP1) adjusted to GE/LT.
5660 Similarly for GTU/LEU in Thumb mode. */
5661 if (mode == DImode)
5664 if (*code == GT || *code == LE
5665 || *code == GTU || *code == LEU)
5667 /* Missing comparison. First try to use an available
5668 comparison. */
5669 if (CONST_INT_P (*op1))
5671 i = INTVAL (*op1);
5672 switch (*code)
5674 case GT:
5675 case LE:
5676 if (i != maxval)
5678 /* Try to convert to GE/LT, unless that would be more
5679 expensive. */
5680 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5681 && arm_const_double_prefer_rsbs_rsc (*op1))
5682 return;
5683 *op1 = GEN_INT (i + 1);
5684 *code = *code == GT ? GE : LT;
5686 else
5688 /* GT maxval is always false, LE maxval is always true.
5689 We can't fold that away here as we must make a
5690 comparison, but we can fold them to comparisons
5691 with the same result that can be handled:
5692 op0 GT maxval -> op0 LT minval
5693 op0 LE maxval -> op0 GE minval
5694 where minval = (-maxval - 1). */
5695 *op1 = GEN_INT (-maxval - 1);
5696 *code = *code == GT ? LT : GE;
5698 return;
5700 case GTU:
5701 case LEU:
5702 if (i != ~((unsigned HOST_WIDE_INT) 0))
5704 /* Try to convert to GEU/LTU, unless that would
5705 be more expensive. */
5706 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5707 && arm_const_double_prefer_rsbs_rsc (*op1))
5708 return;
5709 *op1 = GEN_INT (i + 1);
5710 *code = *code == GTU ? GEU : LTU;
5712 else
5714 /* GTU ~0 is always false, LEU ~0 is always true.
5715 We can't fold that away here as we must make a
5716 comparison, but we can fold them to comparisons
5717 with the same result that can be handled:
5718 op0 GTU ~0 -> op0 LTU 0
5719 op0 LEU ~0 -> op0 GEU 0. */
5720 *op1 = const0_rtx;
5721 *code = *code == GTU ? LTU : GEU;
5723 return;
5725 default:
5726 gcc_unreachable ();
5730 if (!op0_preserve_value)
5732 std::swap (*op0, *op1);
5733 *code = (int)swap_condition ((enum rtx_code)*code);
5736 return;
5739 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5740 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5741 to facilitate possible combining with a cmp into 'ands'. */
5742 if (mode == SImode
5743 && GET_CODE (*op0) == ZERO_EXTEND
5744 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5745 && GET_MODE (XEXP (*op0, 0)) == QImode
5746 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5747 && subreg_lowpart_p (XEXP (*op0, 0))
5748 && *op1 == const0_rtx)
5749 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5750 GEN_INT (255));
5752 /* Comparisons smaller than DImode. Only adjust comparisons against
5753 an out-of-range constant. */
5754 if (!CONST_INT_P (*op1)
5755 || const_ok_for_arm (INTVAL (*op1))
5756 || const_ok_for_arm (- INTVAL (*op1)))
5757 return;
5759 i = INTVAL (*op1);
5761 switch (*code)
5763 case EQ:
5764 case NE:
5765 return;
5767 case GT:
5768 case LE:
5769 if (i != maxval
5770 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5772 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5773 *code = *code == GT ? GE : LT;
5774 return;
5776 break;
5778 case GE:
5779 case LT:
5780 if (i != ~maxval
5781 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5783 *op1 = GEN_INT (i - 1);
5784 *code = *code == GE ? GT : LE;
5785 return;
5787 break;
5789 case GTU:
5790 case LEU:
5791 if (i != ~((unsigned HOST_WIDE_INT) 0)
5792 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5794 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5795 *code = *code == GTU ? GEU : LTU;
5796 return;
5798 break;
5800 case GEU:
5801 case LTU:
5802 if (i != 0
5803 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5805 *op1 = GEN_INT (i - 1);
5806 *code = *code == GEU ? GTU : LEU;
5807 return;
5809 break;
5811 default:
5812 gcc_unreachable ();
5817 /* Define how to find the value returned by a function. */
5819 static rtx
5820 arm_function_value(const_tree type, const_tree func,
5821 bool outgoing ATTRIBUTE_UNUSED)
5823 machine_mode mode;
5824 int unsignedp ATTRIBUTE_UNUSED;
5825 rtx r ATTRIBUTE_UNUSED;
5827 mode = TYPE_MODE (type);
5829 if (TARGET_AAPCS_BASED)
5830 return aapcs_allocate_return_reg (mode, type, func);
5832 /* Promote integer types. */
5833 if (INTEGRAL_TYPE_P (type))
5834 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5836 /* Promotes small structs returned in a register to full-word size
5837 for big-endian AAPCS. */
5838 if (arm_return_in_msb (type))
5840 HOST_WIDE_INT size = int_size_in_bytes (type);
5841 if (size % UNITS_PER_WORD != 0)
5843 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5844 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5848 return arm_libcall_value_1 (mode);
5851 /* libcall hashtable helpers. */
5853 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5855 static inline hashval_t hash (const rtx_def *);
5856 static inline bool equal (const rtx_def *, const rtx_def *);
5857 static inline void remove (rtx_def *);
5860 inline bool
5861 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5863 return rtx_equal_p (p1, p2);
5866 inline hashval_t
5867 libcall_hasher::hash (const rtx_def *p1)
5869 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5872 typedef hash_table<libcall_hasher> libcall_table_type;
5874 static void
5875 add_libcall (libcall_table_type *htab, rtx libcall)
5877 *htab->find_slot (libcall, INSERT) = libcall;
5880 static bool
5881 arm_libcall_uses_aapcs_base (const_rtx libcall)
5883 static bool init_done = false;
5884 static libcall_table_type *libcall_htab = NULL;
5886 if (!init_done)
5888 init_done = true;
5890 libcall_htab = new libcall_table_type (31);
5891 add_libcall (libcall_htab,
5892 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5893 add_libcall (libcall_htab,
5894 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5895 add_libcall (libcall_htab,
5896 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5897 add_libcall (libcall_htab,
5898 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5900 add_libcall (libcall_htab,
5901 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5902 add_libcall (libcall_htab,
5903 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5904 add_libcall (libcall_htab,
5905 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5906 add_libcall (libcall_htab,
5907 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5909 add_libcall (libcall_htab,
5910 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5911 add_libcall (libcall_htab,
5912 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5913 add_libcall (libcall_htab,
5914 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5915 add_libcall (libcall_htab,
5916 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5917 add_libcall (libcall_htab,
5918 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5919 add_libcall (libcall_htab,
5920 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5921 add_libcall (libcall_htab,
5922 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5923 add_libcall (libcall_htab,
5924 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5925 add_libcall (libcall_htab,
5926 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5927 add_libcall (libcall_htab,
5928 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5930 /* Values from double-precision helper functions are returned in core
5931 registers if the selected core only supports single-precision
5932 arithmetic, even if we are using the hard-float ABI. The same is
5933 true for single-precision helpers except in case of MVE, because in
5934 MVE we will be using the hard-float ABI on a CPU which doesn't support
5935 single-precision operations in hardware. In MVE the following check
5936 enables use of emulation for the single-precision arithmetic
5937 operations. */
5938 if (TARGET_HAVE_MVE)
5940 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5941 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5942 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5943 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5944 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5945 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5946 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5947 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5948 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5949 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5950 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5952 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5953 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5954 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5955 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5956 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5957 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5958 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5959 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5960 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5961 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5962 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5963 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5964 SFmode));
5965 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5966 DFmode));
5967 add_libcall (libcall_htab,
5968 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5971 return libcall && libcall_htab->find (libcall) != NULL;
5974 static rtx
5975 arm_libcall_value_1 (machine_mode mode)
5977 if (TARGET_AAPCS_BASED)
5978 return aapcs_libcall_value (mode);
5979 else if (TARGET_IWMMXT_ABI
5980 && arm_vector_mode_supported_p (mode))
5981 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5982 else
5983 return gen_rtx_REG (mode, ARG_REGISTER (1));
5986 /* Define how to find the value returned by a library function
5987 assuming the value has mode MODE. */
5989 static rtx
5990 arm_libcall_value (machine_mode mode, const_rtx libcall)
5992 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5993 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5995 /* The following libcalls return their result in integer registers,
5996 even though they return a floating point value. */
5997 if (arm_libcall_uses_aapcs_base (libcall))
5998 return gen_rtx_REG (mode, ARG_REGISTER(1));
6002 return arm_libcall_value_1 (mode);
6005 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6007 static bool
6008 arm_function_value_regno_p (const unsigned int regno)
6010 if (regno == ARG_REGISTER (1)
6011 || (TARGET_32BIT
6012 && TARGET_AAPCS_BASED
6013 && TARGET_HARD_FLOAT
6014 && regno == FIRST_VFP_REGNUM)
6015 || (TARGET_IWMMXT_ABI
6016 && regno == FIRST_IWMMXT_REGNUM))
6017 return true;
6019 return false;
6022 /* Determine the amount of memory needed to store the possible return
6023 registers of an untyped call. */
6025 arm_apply_result_size (void)
6027 int size = 16;
6029 if (TARGET_32BIT)
6031 if (TARGET_HARD_FLOAT_ABI)
6032 size += 32;
6033 if (TARGET_IWMMXT_ABI)
6034 size += 8;
6037 return size;
6040 /* Decide whether TYPE should be returned in memory (true)
6041 or in a register (false). FNTYPE is the type of the function making
6042 the call. */
6043 static bool
6044 arm_return_in_memory (const_tree type, const_tree fntype)
6046 HOST_WIDE_INT size;
6048 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6050 if (TARGET_AAPCS_BASED)
6052 /* Simple, non-aggregate types (ie not including vectors and
6053 complex) are always returned in a register (or registers).
6054 We don't care about which register here, so we can short-cut
6055 some of the detail. */
6056 if (!AGGREGATE_TYPE_P (type)
6057 && TREE_CODE (type) != VECTOR_TYPE
6058 && TREE_CODE (type) != COMPLEX_TYPE)
6059 return false;
6061 /* Any return value that is no larger than one word can be
6062 returned in r0. */
6063 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6064 return false;
6066 /* Check any available co-processors to see if they accept the
6067 type as a register candidate (VFP, for example, can return
6068 some aggregates in consecutive registers). These aren't
6069 available if the call is variadic. */
6070 if (aapcs_select_return_coproc (type, fntype) >= 0)
6071 return false;
6073 /* Vector values should be returned using ARM registers, not
6074 memory (unless they're over 16 bytes, which will break since
6075 we only have four call-clobbered registers to play with). */
6076 if (TREE_CODE (type) == VECTOR_TYPE)
6077 return (size < 0 || size > (4 * UNITS_PER_WORD));
6079 /* The rest go in memory. */
6080 return true;
6083 if (TREE_CODE (type) == VECTOR_TYPE)
6084 return (size < 0 || size > (4 * UNITS_PER_WORD));
6086 if (!AGGREGATE_TYPE_P (type) &&
6087 (TREE_CODE (type) != VECTOR_TYPE))
6088 /* All simple types are returned in registers. */
6089 return false;
6091 if (arm_abi != ARM_ABI_APCS)
6093 /* ATPCS and later return aggregate types in memory only if they are
6094 larger than a word (or are variable size). */
6095 return (size < 0 || size > UNITS_PER_WORD);
6098 /* For the arm-wince targets we choose to be compatible with Microsoft's
6099 ARM and Thumb compilers, which always return aggregates in memory. */
6100 #ifndef ARM_WINCE
6101 /* All structures/unions bigger than one word are returned in memory.
6102 Also catch the case where int_size_in_bytes returns -1. In this case
6103 the aggregate is either huge or of variable size, and in either case
6104 we will want to return it via memory and not in a register. */
6105 if (size < 0 || size > UNITS_PER_WORD)
6106 return true;
6108 if (TREE_CODE (type) == RECORD_TYPE)
6110 tree field;
6112 /* For a struct the APCS says that we only return in a register
6113 if the type is 'integer like' and every addressable element
6114 has an offset of zero. For practical purposes this means
6115 that the structure can have at most one non bit-field element
6116 and that this element must be the first one in the structure. */
6118 /* Find the first field, ignoring non FIELD_DECL things which will
6119 have been created by C++. */
6120 /* NOTE: This code is deprecated and has not been updated to handle
6121 DECL_FIELD_ABI_IGNORED. */
6122 for (field = TYPE_FIELDS (type);
6123 field && TREE_CODE (field) != FIELD_DECL;
6124 field = DECL_CHAIN (field))
6125 continue;
6127 if (field == NULL)
6128 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6130 /* Check that the first field is valid for returning in a register. */
6132 /* ... Floats are not allowed */
6133 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6134 return true;
6136 /* ... Aggregates that are not themselves valid for returning in
6137 a register are not allowed. */
6138 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6139 return true;
6141 /* Now check the remaining fields, if any. Only bitfields are allowed,
6142 since they are not addressable. */
6143 for (field = DECL_CHAIN (field);
6144 field;
6145 field = DECL_CHAIN (field))
6147 if (TREE_CODE (field) != FIELD_DECL)
6148 continue;
6150 if (!DECL_BIT_FIELD_TYPE (field))
6151 return true;
6154 return false;
6157 if (TREE_CODE (type) == UNION_TYPE)
6159 tree field;
6161 /* Unions can be returned in registers if every element is
6162 integral, or can be returned in an integer register. */
6163 for (field = TYPE_FIELDS (type);
6164 field;
6165 field = DECL_CHAIN (field))
6167 if (TREE_CODE (field) != FIELD_DECL)
6168 continue;
6170 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6171 return true;
6173 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6174 return true;
6177 return false;
6179 #endif /* not ARM_WINCE */
6181 /* Return all other types in memory. */
6182 return true;
6185 const struct pcs_attribute_arg
6187 const char *arg;
6188 enum arm_pcs value;
6189 } pcs_attribute_args[] =
6191 {"aapcs", ARM_PCS_AAPCS},
6192 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6193 #if 0
6194 /* We could recognize these, but changes would be needed elsewhere
6195 * to implement them. */
6196 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6197 {"atpcs", ARM_PCS_ATPCS},
6198 {"apcs", ARM_PCS_APCS},
6199 #endif
6200 {NULL, ARM_PCS_UNKNOWN}
6203 static enum arm_pcs
6204 arm_pcs_from_attribute (tree attr)
6206 const struct pcs_attribute_arg *ptr;
6207 const char *arg;
6209 /* Get the value of the argument. */
6210 if (TREE_VALUE (attr) == NULL_TREE
6211 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6212 return ARM_PCS_UNKNOWN;
6214 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6216 /* Check it against the list of known arguments. */
6217 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6218 if (streq (arg, ptr->arg))
6219 return ptr->value;
6221 /* An unrecognized interrupt type. */
6222 return ARM_PCS_UNKNOWN;
6225 /* Get the PCS variant to use for this call. TYPE is the function's type
6226 specification, DECL is the specific declartion. DECL may be null if
6227 the call could be indirect or if this is a library call. */
6228 static enum arm_pcs
6229 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6231 bool user_convention = false;
6232 enum arm_pcs user_pcs = arm_pcs_default;
6233 tree attr;
6235 gcc_assert (type);
6237 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6238 if (attr)
6240 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6241 user_convention = true;
6244 if (TARGET_AAPCS_BASED)
6246 /* Detect varargs functions. These always use the base rules
6247 (no argument is ever a candidate for a co-processor
6248 register). */
6249 bool base_rules = stdarg_p (type);
6251 if (user_convention)
6253 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6254 sorry ("non-AAPCS derived PCS variant");
6255 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6256 error ("variadic functions must use the base AAPCS variant");
6259 if (base_rules)
6260 return ARM_PCS_AAPCS;
6261 else if (user_convention)
6262 return user_pcs;
6263 #if 0
6264 /* Unfortunately, this is not safe and can lead to wrong code
6265 being generated (PR96882). Not all calls into the back-end
6266 pass the DECL, so it is unsafe to make any PCS-changing
6267 decisions based on it. In particular the RETURN_IN_MEMORY
6268 hook is only ever passed a TYPE. This needs revisiting to
6269 see if there are any partial improvements that can be
6270 re-enabled. */
6271 else if (decl && flag_unit_at_a_time)
6273 /* Local functions never leak outside this compilation unit,
6274 so we are free to use whatever conventions are
6275 appropriate. */
6276 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6277 cgraph_node *local_info_node
6278 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6279 if (local_info_node && local_info_node->local)
6280 return ARM_PCS_AAPCS_LOCAL;
6282 #endif
6284 else if (user_convention && user_pcs != arm_pcs_default)
6285 sorry ("PCS variant");
6287 /* For everything else we use the target's default. */
6288 return arm_pcs_default;
6292 static void
6293 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6294 const_tree fntype ATTRIBUTE_UNUSED,
6295 rtx libcall ATTRIBUTE_UNUSED,
6296 const_tree fndecl ATTRIBUTE_UNUSED)
6298 /* Record the unallocated VFP registers. */
6299 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6300 pcum->aapcs_vfp_reg_alloc = 0;
6303 /* Bitmasks that indicate whether earlier versions of GCC would have
6304 taken a different path through the ABI logic. This should result in
6305 a -Wpsabi warning if the earlier path led to a different ABI decision.
6307 WARN_PSABI_EMPTY_CXX17_BASE
6308 Indicates that the type includes an artificial empty C++17 base field
6309 that, prior to GCC 10.1, would prevent the type from being treated as
6310 a HFA or HVA. See PR94711 for details.
6312 WARN_PSABI_NO_UNIQUE_ADDRESS
6313 Indicates that the type includes an empty [[no_unique_address]] field
6314 that, prior to GCC 10.1, would prevent the type from being treated as
6315 a HFA or HVA. */
6316 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6317 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6318 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6320 /* Walk down the type tree of TYPE counting consecutive base elements.
6321 If *MODEP is VOIDmode, then set it to the first valid floating point
6322 type. If a non-floating point type is found, or if a floating point
6323 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6324 otherwise return the count in the sub-tree.
6326 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6327 function has changed its behavior relative to earlier versions of GCC.
6328 Normally the argument should be nonnull and point to a zero-initialized
6329 variable. The function then records whether the ABI decision might
6330 be affected by a known fix to the ABI logic, setting the associated
6331 WARN_PSABI_* bits if so.
6333 When the argument is instead a null pointer, the function tries to
6334 simulate the behavior of GCC before all such ABI fixes were made.
6335 This is useful to check whether the function returns something
6336 different after the ABI fixes. */
6337 static int
6338 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6339 unsigned int *warn_psabi_flags)
6341 machine_mode mode;
6342 HOST_WIDE_INT size;
6344 switch (TREE_CODE (type))
6346 case REAL_TYPE:
6347 mode = TYPE_MODE (type);
6348 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6349 return -1;
6351 if (*modep == VOIDmode)
6352 *modep = mode;
6354 if (*modep == mode)
6355 return 1;
6357 break;
6359 case COMPLEX_TYPE:
6360 mode = TYPE_MODE (TREE_TYPE (type));
6361 if (mode != DFmode && mode != SFmode)
6362 return -1;
6364 if (*modep == VOIDmode)
6365 *modep = mode;
6367 if (*modep == mode)
6368 return 2;
6370 break;
6372 case VECTOR_TYPE:
6373 /* Use V2SImode and V4SImode as representatives of all 64-bit
6374 and 128-bit vector types, whether or not those modes are
6375 supported with the present options. */
6376 size = int_size_in_bytes (type);
6377 switch (size)
6379 case 8:
6380 mode = V2SImode;
6381 break;
6382 case 16:
6383 mode = V4SImode;
6384 break;
6385 default:
6386 return -1;
6389 if (*modep == VOIDmode)
6390 *modep = mode;
6392 /* Vector modes are considered to be opaque: two vectors are
6393 equivalent for the purposes of being homogeneous aggregates
6394 if they are the same size. */
6395 if (*modep == mode)
6396 return 1;
6398 break;
6400 case ARRAY_TYPE:
6402 int count;
6403 tree index = TYPE_DOMAIN (type);
6405 /* Can't handle incomplete types nor sizes that are not
6406 fixed. */
6407 if (!COMPLETE_TYPE_P (type)
6408 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6409 return -1;
6411 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6412 warn_psabi_flags);
6413 if (count == -1
6414 || !index
6415 || !TYPE_MAX_VALUE (index)
6416 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6417 || !TYPE_MIN_VALUE (index)
6418 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6419 || count < 0)
6420 return -1;
6422 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6423 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6425 /* There must be no padding. */
6426 if (wi::to_wide (TYPE_SIZE (type))
6427 != count * GET_MODE_BITSIZE (*modep))
6428 return -1;
6430 return count;
6433 case RECORD_TYPE:
6435 int count = 0;
6436 int sub_count;
6437 tree field;
6439 /* Can't handle incomplete types nor sizes that are not
6440 fixed. */
6441 if (!COMPLETE_TYPE_P (type)
6442 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6443 return -1;
6445 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6447 if (TREE_CODE (field) != FIELD_DECL)
6448 continue;
6450 if (DECL_FIELD_ABI_IGNORED (field))
6452 /* See whether this is something that earlier versions of
6453 GCC failed to ignore. */
6454 unsigned int flag;
6455 if (lookup_attribute ("no_unique_address",
6456 DECL_ATTRIBUTES (field)))
6457 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6458 else if (cxx17_empty_base_field_p (field))
6459 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6460 else
6461 /* No compatibility problem. */
6462 continue;
6464 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6465 if (warn_psabi_flags)
6467 *warn_psabi_flags |= flag;
6468 continue;
6471 /* A zero-width bitfield may affect layout in some
6472 circumstances, but adds no members. The determination
6473 of whether or not a type is an HFA is performed after
6474 layout is complete, so if the type still looks like an
6475 HFA afterwards, it is still classed as one. This is
6476 potentially an ABI break for the hard-float ABI. */
6477 else if (DECL_BIT_FIELD (field)
6478 && integer_zerop (DECL_SIZE (field)))
6480 /* Prior to GCC-12 these fields were striped early,
6481 hiding them from the back-end entirely and
6482 resulting in the correct behaviour for argument
6483 passing. Simulate that old behaviour without
6484 generating a warning. */
6485 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6486 continue;
6487 if (warn_psabi_flags)
6489 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6490 continue;
6494 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6495 warn_psabi_flags);
6496 if (sub_count < 0)
6497 return -1;
6498 count += sub_count;
6501 /* There must be no padding. */
6502 if (wi::to_wide (TYPE_SIZE (type))
6503 != count * GET_MODE_BITSIZE (*modep))
6504 return -1;
6506 return count;
6509 case UNION_TYPE:
6510 case QUAL_UNION_TYPE:
6512 /* These aren't very interesting except in a degenerate case. */
6513 int count = 0;
6514 int sub_count;
6515 tree field;
6517 /* Can't handle incomplete types nor sizes that are not
6518 fixed. */
6519 if (!COMPLETE_TYPE_P (type)
6520 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6521 return -1;
6523 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6525 if (TREE_CODE (field) != FIELD_DECL)
6526 continue;
6528 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6529 warn_psabi_flags);
6530 if (sub_count < 0)
6531 return -1;
6532 count = count > sub_count ? count : sub_count;
6535 /* There must be no padding. */
6536 if (wi::to_wide (TYPE_SIZE (type))
6537 != count * GET_MODE_BITSIZE (*modep))
6538 return -1;
6540 return count;
6543 default:
6544 break;
6547 return -1;
6550 /* Return true if PCS_VARIANT should use VFP registers. */
6551 static bool
6552 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6554 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6556 static bool seen_thumb1_vfp = false;
6558 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6560 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6561 /* sorry() is not immediately fatal, so only display this once. */
6562 seen_thumb1_vfp = true;
6565 return true;
6568 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6569 return false;
6571 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6572 (TARGET_VFP_DOUBLE || !is_double));
6575 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6576 suitable for passing or returning in VFP registers for the PCS
6577 variant selected. If it is, then *BASE_MODE is updated to contain
6578 a machine mode describing each element of the argument's type and
6579 *COUNT to hold the number of such elements. */
6580 static bool
6581 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6582 machine_mode mode, const_tree type,
6583 machine_mode *base_mode, int *count)
6585 machine_mode new_mode = VOIDmode;
6587 /* If we have the type information, prefer that to working things
6588 out from the mode. */
6589 if (type)
6591 unsigned int warn_psabi_flags = 0;
6592 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6593 &warn_psabi_flags);
6594 if (ag_count > 0 && ag_count <= 4)
6596 static unsigned last_reported_type_uid;
6597 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6598 int alt;
6599 if (warn_psabi
6600 && warn_psabi_flags
6601 && uid != last_reported_type_uid
6602 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6603 != ag_count))
6605 const char *url10
6606 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6607 const char *url12
6608 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6609 gcc_assert (alt == -1);
6610 last_reported_type_uid = uid;
6611 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6612 qualification. */
6613 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6614 inform (input_location, "parameter passing for argument of "
6615 "type %qT with %<[[no_unique_address]]%> members "
6616 "changed %{in GCC 10.1%}",
6617 TYPE_MAIN_VARIANT (type), url10);
6618 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6619 inform (input_location, "parameter passing for argument of "
6620 "type %qT when C++17 is enabled changed to match "
6621 "C++14 %{in GCC 10.1%}",
6622 TYPE_MAIN_VARIANT (type), url10);
6623 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6624 inform (input_location, "parameter passing for argument of "
6625 "type %qT changed %{in GCC 12.1%}",
6626 TYPE_MAIN_VARIANT (type), url12);
6628 *count = ag_count;
6630 else
6631 return false;
6633 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6634 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6635 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6637 *count = 1;
6638 new_mode = mode;
6640 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6642 *count = 2;
6643 new_mode = (mode == DCmode ? DFmode : SFmode);
6645 else
6646 return false;
6649 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6650 return false;
6652 *base_mode = new_mode;
6654 if (TARGET_GENERAL_REGS_ONLY)
6655 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6656 type);
6658 return true;
6661 static bool
6662 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6663 machine_mode mode, const_tree type)
6665 int count ATTRIBUTE_UNUSED;
6666 machine_mode ag_mode ATTRIBUTE_UNUSED;
6668 if (!use_vfp_abi (pcs_variant, false))
6669 return false;
6670 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6671 &ag_mode, &count);
6674 static bool
6675 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6676 const_tree type)
6678 if (!use_vfp_abi (pcum->pcs_variant, false))
6679 return false;
6681 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6682 &pcum->aapcs_vfp_rmode,
6683 &pcum->aapcs_vfp_rcount);
6686 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6687 for the behaviour of this function. */
6689 static bool
6690 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6691 const_tree type ATTRIBUTE_UNUSED)
6693 int rmode_size
6694 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6695 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6696 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6697 int regno;
6699 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6700 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6702 pcum->aapcs_vfp_reg_alloc = mask << regno;
6703 if (mode == BLKmode
6704 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6705 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6707 int i;
6708 int rcount = pcum->aapcs_vfp_rcount;
6709 int rshift = shift;
6710 machine_mode rmode = pcum->aapcs_vfp_rmode;
6711 rtx par;
6712 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6714 /* Avoid using unsupported vector modes. */
6715 if (rmode == V2SImode)
6716 rmode = DImode;
6717 else if (rmode == V4SImode)
6719 rmode = DImode;
6720 rcount *= 2;
6721 rshift /= 2;
6724 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6725 for (i = 0; i < rcount; i++)
6727 rtx tmp = gen_rtx_REG (rmode,
6728 FIRST_VFP_REGNUM + regno + i * rshift);
6729 tmp = gen_rtx_EXPR_LIST
6730 (VOIDmode, tmp,
6731 GEN_INT (i * GET_MODE_SIZE (rmode)));
6732 XVECEXP (par, 0, i) = tmp;
6735 pcum->aapcs_reg = par;
6737 else
6738 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6739 return true;
6741 return false;
6744 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6745 comment there for the behaviour of this function. */
6747 static rtx
6748 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6749 machine_mode mode,
6750 const_tree type ATTRIBUTE_UNUSED)
6752 if (!use_vfp_abi (pcs_variant, false))
6753 return NULL;
6755 if (mode == BLKmode
6756 || (GET_MODE_CLASS (mode) == MODE_INT
6757 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6758 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6760 int count;
6761 machine_mode ag_mode;
6762 int i;
6763 rtx par;
6764 int shift;
6766 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6767 &ag_mode, &count);
6769 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6771 if (ag_mode == V2SImode)
6772 ag_mode = DImode;
6773 else if (ag_mode == V4SImode)
6775 ag_mode = DImode;
6776 count *= 2;
6779 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6780 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6781 for (i = 0; i < count; i++)
6783 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6784 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6785 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6786 XVECEXP (par, 0, i) = tmp;
6789 return par;
6792 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6795 static void
6796 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6797 machine_mode mode ATTRIBUTE_UNUSED,
6798 const_tree type ATTRIBUTE_UNUSED)
6800 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6801 pcum->aapcs_vfp_reg_alloc = 0;
6802 return;
6805 #define AAPCS_CP(X) \
6807 aapcs_ ## X ## _cum_init, \
6808 aapcs_ ## X ## _is_call_candidate, \
6809 aapcs_ ## X ## _allocate, \
6810 aapcs_ ## X ## _is_return_candidate, \
6811 aapcs_ ## X ## _allocate_return_reg, \
6812 aapcs_ ## X ## _advance \
6815 /* Table of co-processors that can be used to pass arguments in
6816 registers. Idealy no arugment should be a candidate for more than
6817 one co-processor table entry, but the table is processed in order
6818 and stops after the first match. If that entry then fails to put
6819 the argument into a co-processor register, the argument will go on
6820 the stack. */
6821 static struct
6823 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6824 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6826 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6827 BLKmode) is a candidate for this co-processor's registers; this
6828 function should ignore any position-dependent state in
6829 CUMULATIVE_ARGS and only use call-type dependent information. */
6830 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6832 /* Return true if the argument does get a co-processor register; it
6833 should set aapcs_reg to an RTX of the register allocated as is
6834 required for a return from FUNCTION_ARG. */
6835 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6837 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6838 be returned in this co-processor's registers. */
6839 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6841 /* Allocate and return an RTX element to hold the return type of a call. This
6842 routine must not fail and will only be called if is_return_candidate
6843 returned true with the same parameters. */
6844 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6846 /* Finish processing this argument and prepare to start processing
6847 the next one. */
6848 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6849 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6851 AAPCS_CP(vfp)
6854 #undef AAPCS_CP
6856 static int
6857 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6858 const_tree type)
6860 int i;
6862 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6863 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6864 return i;
6866 return -1;
6869 static int
6870 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6872 /* We aren't passed a decl, so we can't check that a call is local.
6873 However, it isn't clear that that would be a win anyway, since it
6874 might limit some tail-calling opportunities. */
6875 enum arm_pcs pcs_variant;
6877 if (fntype)
6879 const_tree fndecl = NULL_TREE;
6881 if (TREE_CODE (fntype) == FUNCTION_DECL)
6883 fndecl = fntype;
6884 fntype = TREE_TYPE (fntype);
6887 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6889 else
6890 pcs_variant = arm_pcs_default;
6892 if (pcs_variant != ARM_PCS_AAPCS)
6894 int i;
6896 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6897 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6898 TYPE_MODE (type),
6899 type))
6900 return i;
6902 return -1;
6905 static rtx
6906 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6907 const_tree fntype)
6909 /* We aren't passed a decl, so we can't check that a call is local.
6910 However, it isn't clear that that would be a win anyway, since it
6911 might limit some tail-calling opportunities. */
6912 enum arm_pcs pcs_variant;
6913 int unsignedp ATTRIBUTE_UNUSED;
6915 if (fntype)
6917 const_tree fndecl = NULL_TREE;
6919 if (TREE_CODE (fntype) == FUNCTION_DECL)
6921 fndecl = fntype;
6922 fntype = TREE_TYPE (fntype);
6925 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6927 else
6928 pcs_variant = arm_pcs_default;
6930 /* Promote integer types. */
6931 if (type && INTEGRAL_TYPE_P (type))
6932 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6934 if (pcs_variant != ARM_PCS_AAPCS)
6936 int i;
6938 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6939 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6940 type))
6941 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6942 mode, type);
6945 /* Promotes small structs returned in a register to full-word size
6946 for big-endian AAPCS. */
6947 if (type && arm_return_in_msb (type))
6949 HOST_WIDE_INT size = int_size_in_bytes (type);
6950 if (size % UNITS_PER_WORD != 0)
6952 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6953 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6957 return gen_rtx_REG (mode, R0_REGNUM);
6960 static rtx
6961 aapcs_libcall_value (machine_mode mode)
6963 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6964 && GET_MODE_SIZE (mode) <= 4)
6965 mode = SImode;
6967 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6970 /* Lay out a function argument using the AAPCS rules. The rule
6971 numbers referred to here are those in the AAPCS. */
6972 static void
6973 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6974 const_tree type, bool named)
6976 int nregs, nregs2;
6977 int ncrn;
6979 /* We only need to do this once per argument. */
6980 if (pcum->aapcs_arg_processed)
6981 return;
6983 pcum->aapcs_arg_processed = true;
6985 /* Special case: if named is false then we are handling an incoming
6986 anonymous argument which is on the stack. */
6987 if (!named)
6988 return;
6990 /* Is this a potential co-processor register candidate? */
6991 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6993 int slot = aapcs_select_call_coproc (pcum, mode, type);
6994 pcum->aapcs_cprc_slot = slot;
6996 /* We don't have to apply any of the rules from part B of the
6997 preparation phase, these are handled elsewhere in the
6998 compiler. */
7000 if (slot >= 0)
7002 /* A Co-processor register candidate goes either in its own
7003 class of registers or on the stack. */
7004 if (!pcum->aapcs_cprc_failed[slot])
7006 /* C1.cp - Try to allocate the argument to co-processor
7007 registers. */
7008 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7009 return;
7011 /* C2.cp - Put the argument on the stack and note that we
7012 can't assign any more candidates in this slot. We also
7013 need to note that we have allocated stack space, so that
7014 we won't later try to split a non-cprc candidate between
7015 core registers and the stack. */
7016 pcum->aapcs_cprc_failed[slot] = true;
7017 pcum->can_split = false;
7020 /* We didn't get a register, so this argument goes on the
7021 stack. */
7022 gcc_assert (pcum->can_split == false);
7023 return;
7027 /* C3 - For double-word aligned arguments, round the NCRN up to the
7028 next even number. */
7029 ncrn = pcum->aapcs_ncrn;
7030 if (ncrn & 1)
7032 int res = arm_needs_doubleword_align (mode, type);
7033 /* Only warn during RTL expansion of call stmts, otherwise we would
7034 warn e.g. during gimplification even on functions that will be
7035 always inlined, and we'd warn multiple times. Don't warn when
7036 called in expand_function_start either, as we warn instead in
7037 arm_function_arg_boundary in that case. */
7038 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7039 inform (input_location, "parameter passing for argument of type "
7040 "%qT changed in GCC 7.1", type);
7041 else if (res > 0)
7042 ncrn++;
7045 nregs = ARM_NUM_REGS2(mode, type);
7047 /* Sigh, this test should really assert that nregs > 0, but a GCC
7048 extension allows empty structs and then gives them empty size; it
7049 then allows such a structure to be passed by value. For some of
7050 the code below we have to pretend that such an argument has
7051 non-zero size so that we 'locate' it correctly either in
7052 registers or on the stack. */
7053 gcc_assert (nregs >= 0);
7055 nregs2 = nregs ? nregs : 1;
7057 /* C4 - Argument fits entirely in core registers. */
7058 if (ncrn + nregs2 <= NUM_ARG_REGS)
7060 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7061 pcum->aapcs_next_ncrn = ncrn + nregs;
7062 return;
7065 /* C5 - Some core registers left and there are no arguments already
7066 on the stack: split this argument between the remaining core
7067 registers and the stack. */
7068 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7070 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7071 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7072 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7073 return;
7076 /* C6 - NCRN is set to 4. */
7077 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7079 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7080 return;
7083 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7084 for a call to a function whose data type is FNTYPE.
7085 For a library call, FNTYPE is NULL. */
7086 void
7087 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7088 rtx libname,
7089 tree fndecl ATTRIBUTE_UNUSED)
7091 /* Long call handling. */
7092 if (fntype)
7093 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7094 else
7095 pcum->pcs_variant = arm_pcs_default;
7097 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7099 if (arm_libcall_uses_aapcs_base (libname))
7100 pcum->pcs_variant = ARM_PCS_AAPCS;
7102 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7103 pcum->aapcs_reg = NULL_RTX;
7104 pcum->aapcs_partial = 0;
7105 pcum->aapcs_arg_processed = false;
7106 pcum->aapcs_cprc_slot = -1;
7107 pcum->can_split = true;
7109 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7111 int i;
7113 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7115 pcum->aapcs_cprc_failed[i] = false;
7116 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7119 return;
7122 /* Legacy ABIs */
7124 /* On the ARM, the offset starts at 0. */
7125 pcum->nregs = 0;
7126 pcum->iwmmxt_nregs = 0;
7127 pcum->can_split = true;
7129 /* Varargs vectors are treated the same as long long.
7130 named_count avoids having to change the way arm handles 'named' */
7131 pcum->named_count = 0;
7132 pcum->nargs = 0;
7134 if (TARGET_REALLY_IWMMXT && fntype)
7136 tree fn_arg;
7138 for (fn_arg = TYPE_ARG_TYPES (fntype);
7139 fn_arg;
7140 fn_arg = TREE_CHAIN (fn_arg))
7141 pcum->named_count += 1;
7143 if (! pcum->named_count)
7144 pcum->named_count = INT_MAX;
7148 /* Return 2 if double word alignment is required for argument passing,
7149 but wasn't required before the fix for PR88469.
7150 Return 1 if double word alignment is required for argument passing.
7151 Return -1 if double word alignment used to be required for argument
7152 passing before PR77728 ABI fix, but is not required anymore.
7153 Return 0 if double word alignment is not required and wasn't requried
7154 before either. */
7155 static int
7156 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7158 if (!type)
7159 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7161 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7162 if (!AGGREGATE_TYPE_P (type))
7163 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7165 /* Array types: Use member alignment of element type. */
7166 if (TREE_CODE (type) == ARRAY_TYPE)
7167 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7169 int ret = 0;
7170 int ret2 = 0;
7171 /* Record/aggregate types: Use greatest member alignment of any member.
7173 Note that we explicitly consider zero-sized fields here, even though
7174 they don't map to AAPCS machine types. For example, in:
7176 struct __attribute__((aligned(8))) empty {};
7178 struct s {
7179 [[no_unique_address]] empty e;
7180 int x;
7183 "s" contains only one Fundamental Data Type (the int field)
7184 but gains 8-byte alignment and size thanks to "e". */
7185 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7186 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7188 if (TREE_CODE (field) == FIELD_DECL)
7189 return 1;
7190 else
7191 /* Before PR77728 fix, we were incorrectly considering also
7192 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7193 Make sure we can warn about that with -Wpsabi. */
7194 ret = -1;
7196 else if (TREE_CODE (field) == FIELD_DECL
7197 && DECL_BIT_FIELD_TYPE (field)
7198 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7199 ret2 = 1;
7201 if (ret2)
7202 return 2;
7204 return ret;
7208 /* Determine where to put an argument to a function.
7209 Value is zero to push the argument on the stack,
7210 or a hard register in which to store the argument.
7212 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7213 the preceding args and about the function being called.
7214 ARG is a description of the argument.
7216 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7217 other arguments are passed on the stack. If (NAMED == 0) (which happens
7218 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7219 defined), say it is passed in the stack (function_prologue will
7220 indeed make it pass in the stack if necessary). */
7222 static rtx
7223 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7225 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7226 int nregs;
7228 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7229 a call insn (op3 of a call_value insn). */
7230 if (arg.end_marker_p ())
7231 return const0_rtx;
7233 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7235 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7236 return pcum->aapcs_reg;
7239 /* Varargs vectors are treated the same as long long.
7240 named_count avoids having to change the way arm handles 'named' */
7241 if (TARGET_IWMMXT_ABI
7242 && arm_vector_mode_supported_p (arg.mode)
7243 && pcum->named_count > pcum->nargs + 1)
7245 if (pcum->iwmmxt_nregs <= 9)
7246 return gen_rtx_REG (arg.mode,
7247 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7248 else
7250 pcum->can_split = false;
7251 return NULL_RTX;
7255 /* Put doubleword aligned quantities in even register pairs. */
7256 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7258 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7259 if (res < 0 && warn_psabi)
7260 inform (input_location, "parameter passing for argument of type "
7261 "%qT changed in GCC 7.1", arg.type);
7262 else if (res > 0)
7264 pcum->nregs++;
7265 if (res > 1 && warn_psabi)
7266 inform (input_location, "parameter passing for argument of type "
7267 "%qT changed in GCC 9.1", arg.type);
7271 /* Only allow splitting an arg between regs and memory if all preceding
7272 args were allocated to regs. For args passed by reference we only count
7273 the reference pointer. */
7274 if (pcum->can_split)
7275 nregs = 1;
7276 else
7277 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7279 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7280 return NULL_RTX;
7282 return gen_rtx_REG (arg.mode, pcum->nregs);
7285 static unsigned int
7286 arm_function_arg_boundary (machine_mode mode, const_tree type)
7288 if (!ARM_DOUBLEWORD_ALIGN)
7289 return PARM_BOUNDARY;
7291 int res = arm_needs_doubleword_align (mode, type);
7292 if (res < 0 && warn_psabi)
7293 inform (input_location, "parameter passing for argument of type %qT "
7294 "changed in GCC 7.1", type);
7295 if (res > 1 && warn_psabi)
7296 inform (input_location, "parameter passing for argument of type "
7297 "%qT changed in GCC 9.1", type);
7299 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7302 static int
7303 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7305 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7306 int nregs = pcum->nregs;
7308 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7310 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7311 return pcum->aapcs_partial;
7314 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7315 return 0;
7317 if (NUM_ARG_REGS > nregs
7318 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7319 && pcum->can_split)
7320 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7322 return 0;
7325 /* Update the data in PCUM to advance over argument ARG. */
7327 static void
7328 arm_function_arg_advance (cumulative_args_t pcum_v,
7329 const function_arg_info &arg)
7331 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7333 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7335 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7337 if (pcum->aapcs_cprc_slot >= 0)
7339 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7340 arg.type);
7341 pcum->aapcs_cprc_slot = -1;
7344 /* Generic stuff. */
7345 pcum->aapcs_arg_processed = false;
7346 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7347 pcum->aapcs_reg = NULL_RTX;
7348 pcum->aapcs_partial = 0;
7350 else
7352 pcum->nargs += 1;
7353 if (arm_vector_mode_supported_p (arg.mode)
7354 && pcum->named_count > pcum->nargs
7355 && TARGET_IWMMXT_ABI)
7356 pcum->iwmmxt_nregs += 1;
7357 else
7358 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7362 /* Variable sized types are passed by reference. This is a GCC
7363 extension to the ARM ABI. */
7365 static bool
7366 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7368 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7371 /* Encode the current state of the #pragma [no_]long_calls. */
7372 typedef enum
7374 OFF, /* No #pragma [no_]long_calls is in effect. */
7375 LONG, /* #pragma long_calls is in effect. */
7376 SHORT /* #pragma no_long_calls is in effect. */
7377 } arm_pragma_enum;
7379 static arm_pragma_enum arm_pragma_long_calls = OFF;
7381 void
7382 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7384 arm_pragma_long_calls = LONG;
7387 void
7388 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7390 arm_pragma_long_calls = SHORT;
7393 void
7394 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7396 arm_pragma_long_calls = OFF;
7399 /* Handle an attribute requiring a FUNCTION_DECL;
7400 arguments as in struct attribute_spec.handler. */
7401 static tree
7402 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7403 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7405 if (TREE_CODE (*node) != FUNCTION_DECL)
7407 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7408 name);
7409 *no_add_attrs = true;
7412 return NULL_TREE;
7415 /* Handle an "interrupt" or "isr" attribute;
7416 arguments as in struct attribute_spec.handler. */
7417 static tree
7418 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7419 bool *no_add_attrs)
7421 if (DECL_P (*node))
7423 if (TREE_CODE (*node) != FUNCTION_DECL)
7425 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7426 name);
7427 *no_add_attrs = true;
7429 else if (TARGET_VFP_BASE)
7431 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7432 name);
7434 /* FIXME: the argument if any is checked for type attributes;
7435 should it be checked for decl ones? */
7437 else
7439 if (TREE_CODE (*node) == FUNCTION_TYPE
7440 || TREE_CODE (*node) == METHOD_TYPE)
7442 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7444 warning (OPT_Wattributes, "%qE attribute ignored",
7445 name);
7446 *no_add_attrs = true;
7449 else if (TREE_CODE (*node) == POINTER_TYPE
7450 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7451 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7452 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7454 *node = build_variant_type_copy (*node);
7455 TREE_TYPE (*node) = build_type_attribute_variant
7456 (TREE_TYPE (*node),
7457 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7458 *no_add_attrs = true;
7460 else
7462 /* Possibly pass this attribute on from the type to a decl. */
7463 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7464 | (int) ATTR_FLAG_FUNCTION_NEXT
7465 | (int) ATTR_FLAG_ARRAY_NEXT))
7467 *no_add_attrs = true;
7468 return tree_cons (name, args, NULL_TREE);
7470 else
7472 warning (OPT_Wattributes, "%qE attribute ignored",
7473 name);
7478 return NULL_TREE;
7481 /* Handle a "pcs" attribute; arguments as in struct
7482 attribute_spec.handler. */
7483 static tree
7484 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7485 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7487 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7489 warning (OPT_Wattributes, "%qE attribute ignored", name);
7490 *no_add_attrs = true;
7492 return NULL_TREE;
7495 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7496 /* Handle the "notshared" attribute. This attribute is another way of
7497 requesting hidden visibility. ARM's compiler supports
7498 "__declspec(notshared)"; we support the same thing via an
7499 attribute. */
7501 static tree
7502 arm_handle_notshared_attribute (tree *node,
7503 tree name ATTRIBUTE_UNUSED,
7504 tree args ATTRIBUTE_UNUSED,
7505 int flags ATTRIBUTE_UNUSED,
7506 bool *no_add_attrs)
7508 tree decl = TYPE_NAME (*node);
7510 if (decl)
7512 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7513 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7514 *no_add_attrs = false;
7516 return NULL_TREE;
7518 #endif
7520 /* This function returns true if a function with declaration FNDECL and type
7521 FNTYPE uses the stack to pass arguments or return variables and false
7522 otherwise. This is used for functions with the attributes
7523 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7524 diagnostic messages if the stack is used. NAME is the name of the attribute
7525 used. */
7527 static bool
7528 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7530 function_args_iterator args_iter;
7531 CUMULATIVE_ARGS args_so_far_v;
7532 cumulative_args_t args_so_far;
7533 bool first_param = true;
7534 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7536 /* Error out if any argument is passed on the stack. */
7537 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7538 args_so_far = pack_cumulative_args (&args_so_far_v);
7539 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7541 rtx arg_rtx;
7543 prev_arg_type = arg_type;
7544 if (VOID_TYPE_P (arg_type))
7545 continue;
7547 function_arg_info arg (arg_type, /*named=*/true);
7548 if (!first_param)
7549 /* ??? We should advance after processing the argument and pass
7550 the argument we're advancing past. */
7551 arm_function_arg_advance (args_so_far, arg);
7552 arg_rtx = arm_function_arg (args_so_far, arg);
7553 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7555 error ("%qE attribute not available to functions with arguments "
7556 "passed on the stack", name);
7557 return true;
7559 first_param = false;
7562 /* Error out for variadic functions since we cannot control how many
7563 arguments will be passed and thus stack could be used. stdarg_p () is not
7564 used for the checking to avoid browsing arguments twice. */
7565 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7567 error ("%qE attribute not available to functions with variable number "
7568 "of arguments", name);
7569 return true;
7572 /* Error out if return value is passed on the stack. */
7573 ret_type = TREE_TYPE (fntype);
7574 if (arm_return_in_memory (ret_type, fntype))
7576 error ("%qE attribute not available to functions that return value on "
7577 "the stack", name);
7578 return true;
7580 return false;
7583 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7584 function will check whether the attribute is allowed here and will add the
7585 attribute to the function declaration tree or otherwise issue a warning. */
7587 static tree
7588 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7589 tree /* args */,
7590 int /* flags */,
7591 bool *no_add_attrs)
7593 tree fndecl;
7595 if (!use_cmse)
7597 *no_add_attrs = true;
7598 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7599 "option", name);
7600 return NULL_TREE;
7603 /* Ignore attribute for function types. */
7604 if (TREE_CODE (*node) != FUNCTION_DECL)
7606 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7607 name);
7608 *no_add_attrs = true;
7609 return NULL_TREE;
7612 fndecl = *node;
7614 /* Warn for static linkage functions. */
7615 if (!TREE_PUBLIC (fndecl))
7617 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7618 "with static linkage", name);
7619 *no_add_attrs = true;
7620 return NULL_TREE;
7623 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7624 TREE_TYPE (fndecl));
7625 return NULL_TREE;
7629 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7630 function will check whether the attribute is allowed here and will add the
7631 attribute to the function type tree or otherwise issue a diagnostic. The
7632 reason we check this at declaration time is to only allow the use of the
7633 attribute with declarations of function pointers and not function
7634 declarations. This function checks NODE is of the expected type and issues
7635 diagnostics otherwise using NAME. If it is not of the expected type
7636 *NO_ADD_ATTRS will be set to true. */
7638 static tree
7639 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7640 tree /* args */,
7641 int /* flags */,
7642 bool *no_add_attrs)
7644 tree decl = NULL_TREE;
7645 tree fntype, type;
7647 if (!use_cmse)
7649 *no_add_attrs = true;
7650 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7651 "option", name);
7652 return NULL_TREE;
7655 if (DECL_P (*node))
7657 fntype = TREE_TYPE (*node);
7659 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7660 decl = *node;
7662 else
7663 fntype = *node;
7665 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7666 fntype = TREE_TYPE (fntype);
7668 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7670 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7671 "function pointer", name);
7672 *no_add_attrs = true;
7673 return NULL_TREE;
7676 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7678 if (*no_add_attrs)
7679 return NULL_TREE;
7681 /* Prevent trees being shared among function types with and without
7682 cmse_nonsecure_call attribute. */
7683 if (decl)
7685 type = build_distinct_type_copy (TREE_TYPE (decl));
7686 TREE_TYPE (decl) = type;
7688 else
7690 type = build_distinct_type_copy (*node);
7691 *node = type;
7694 fntype = type;
7696 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7698 type = fntype;
7699 fntype = TREE_TYPE (fntype);
7700 fntype = build_distinct_type_copy (fntype);
7701 TREE_TYPE (type) = fntype;
7704 /* Construct a type attribute and add it to the function type. */
7705 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7706 TYPE_ATTRIBUTES (fntype));
7707 TYPE_ATTRIBUTES (fntype) = attrs;
7708 return NULL_TREE;
7711 /* Return 0 if the attributes for two types are incompatible, 1 if they
7712 are compatible, and 2 if they are nearly compatible (which causes a
7713 warning to be generated). */
7714 static int
7715 arm_comp_type_attributes (const_tree type1, const_tree type2)
7717 int l1, l2, s1, s2;
7719 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7720 TYPE_ATTRIBUTES (type1));
7721 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7722 TYPE_ATTRIBUTES (type2));
7723 if (bool (attrs1) != bool (attrs2))
7724 return 0;
7725 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7726 return 0;
7728 /* Check for mismatch of non-default calling convention. */
7729 if (TREE_CODE (type1) != FUNCTION_TYPE)
7730 return 1;
7732 /* Check for mismatched call attributes. */
7733 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7734 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7735 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7736 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7738 /* Only bother to check if an attribute is defined. */
7739 if (l1 | l2 | s1 | s2)
7741 /* If one type has an attribute, the other must have the same attribute. */
7742 if ((l1 != l2) || (s1 != s2))
7743 return 0;
7745 /* Disallow mixed attributes. */
7746 if ((l1 & s2) || (l2 & s1))
7747 return 0;
7750 /* Check for mismatched ISR attribute. */
7751 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7752 if (! l1)
7753 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7754 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7755 if (! l2)
7756 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7757 if (l1 != l2)
7758 return 0;
7760 l1 = lookup_attribute ("cmse_nonsecure_call",
7761 TYPE_ATTRIBUTES (type1)) != NULL;
7762 l2 = lookup_attribute ("cmse_nonsecure_call",
7763 TYPE_ATTRIBUTES (type2)) != NULL;
7765 if (l1 != l2)
7766 return 0;
7768 return 1;
7771 /* Assigns default attributes to newly defined type. This is used to
7772 set short_call/long_call attributes for function types of
7773 functions defined inside corresponding #pragma scopes. */
7774 static void
7775 arm_set_default_type_attributes (tree type)
7777 /* Add __attribute__ ((long_call)) to all functions, when
7778 inside #pragma long_calls or __attribute__ ((short_call)),
7779 when inside #pragma no_long_calls. */
7780 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7782 tree type_attr_list, attr_name;
7783 type_attr_list = TYPE_ATTRIBUTES (type);
7785 if (arm_pragma_long_calls == LONG)
7786 attr_name = get_identifier ("long_call");
7787 else if (arm_pragma_long_calls == SHORT)
7788 attr_name = get_identifier ("short_call");
7789 else
7790 return;
7792 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7793 TYPE_ATTRIBUTES (type) = type_attr_list;
7797 /* Return true if DECL is known to be linked into section SECTION. */
7799 static bool
7800 arm_function_in_section_p (tree decl, section *section)
7802 /* We can only be certain about the prevailing symbol definition. */
7803 if (!decl_binds_to_current_def_p (decl))
7804 return false;
7806 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7807 if (!DECL_SECTION_NAME (decl))
7809 /* Make sure that we will not create a unique section for DECL. */
7810 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7811 return false;
7814 return function_section (decl) == section;
7817 /* Return nonzero if a 32-bit "long_call" should be generated for
7818 a call from the current function to DECL. We generate a long_call
7819 if the function:
7821 a. has an __attribute__((long call))
7822 or b. is within the scope of a #pragma long_calls
7823 or c. the -mlong-calls command line switch has been specified
7825 However we do not generate a long call if the function:
7827 d. has an __attribute__ ((short_call))
7828 or e. is inside the scope of a #pragma no_long_calls
7829 or f. is defined in the same section as the current function. */
7831 bool
7832 arm_is_long_call_p (tree decl)
7834 tree attrs;
7836 if (!decl)
7837 return TARGET_LONG_CALLS;
7839 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7840 if (lookup_attribute ("short_call", attrs))
7841 return false;
7843 /* For "f", be conservative, and only cater for cases in which the
7844 whole of the current function is placed in the same section. */
7845 if (!flag_reorder_blocks_and_partition
7846 && TREE_CODE (decl) == FUNCTION_DECL
7847 && arm_function_in_section_p (decl, current_function_section ()))
7848 return false;
7850 if (lookup_attribute ("long_call", attrs))
7851 return true;
7853 return TARGET_LONG_CALLS;
7856 /* Return nonzero if it is ok to make a tail-call to DECL. */
7857 static bool
7858 arm_function_ok_for_sibcall (tree decl, tree exp)
7860 unsigned long func_type;
7862 if (cfun->machine->sibcall_blocked)
7863 return false;
7865 if (TARGET_FDPIC)
7867 /* In FDPIC, never tailcall something for which we have no decl:
7868 the target function could be in a different module, requiring
7869 a different FDPIC register value. */
7870 if (decl == NULL)
7871 return false;
7874 /* Never tailcall something if we are generating code for Thumb-1. */
7875 if (TARGET_THUMB1)
7876 return false;
7878 /* The PIC register is live on entry to VxWorks PLT entries, so we
7879 must make the call before restoring the PIC register. */
7880 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7881 return false;
7883 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7884 may be used both as target of the call and base register for restoring
7885 the VFP registers */
7886 if (TARGET_APCS_FRAME && TARGET_ARM
7887 && TARGET_HARD_FLOAT
7888 && decl && arm_is_long_call_p (decl))
7889 return false;
7891 /* If we are interworking and the function is not declared static
7892 then we can't tail-call it unless we know that it exists in this
7893 compilation unit (since it might be a Thumb routine). */
7894 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7895 && !TREE_ASM_WRITTEN (decl))
7896 return false;
7898 func_type = arm_current_func_type ();
7899 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7900 if (IS_INTERRUPT (func_type))
7901 return false;
7903 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7904 generated for entry functions themselves. */
7905 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7906 return false;
7908 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7909 this would complicate matters for later code generation. */
7910 if (TREE_CODE (exp) == CALL_EXPR)
7912 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7913 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7914 return false;
7917 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7919 /* Check that the return value locations are the same. For
7920 example that we aren't returning a value from the sibling in
7921 a VFP register but then need to transfer it to a core
7922 register. */
7923 rtx a, b;
7924 tree decl_or_type = decl;
7926 /* If it is an indirect function pointer, get the function type. */
7927 if (!decl)
7928 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7930 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7931 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7932 cfun->decl, false);
7933 if (!rtx_equal_p (a, b))
7934 return false;
7937 /* Never tailcall if function may be called with a misaligned SP. */
7938 if (IS_STACKALIGN (func_type))
7939 return false;
7941 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7942 references should become a NOP. Don't convert such calls into
7943 sibling calls. */
7944 if (TARGET_AAPCS_BASED
7945 && arm_abi == ARM_ABI_AAPCS
7946 && decl
7947 && DECL_WEAK (decl))
7948 return false;
7950 /* We cannot do a tailcall for an indirect call by descriptor if all the
7951 argument registers are used because the only register left to load the
7952 address is IP and it will already contain the static chain. */
7953 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7955 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7956 CUMULATIVE_ARGS cum;
7957 cumulative_args_t cum_v;
7959 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7960 cum_v = pack_cumulative_args (&cum);
7962 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7964 tree type = TREE_VALUE (t);
7965 if (!VOID_TYPE_P (type))
7967 function_arg_info arg (type, /*named=*/true);
7968 arm_function_arg_advance (cum_v, arg);
7972 function_arg_info arg (integer_type_node, /*named=*/true);
7973 if (!arm_function_arg (cum_v, arg))
7974 return false;
7977 /* Everything else is ok. */
7978 return true;
7982 /* Addressing mode support functions. */
7984 /* Return nonzero if X is a legitimate immediate operand when compiling
7985 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7987 legitimate_pic_operand_p (rtx x)
7989 if (SYMBOL_REF_P (x)
7990 || (GET_CODE (x) == CONST
7991 && GET_CODE (XEXP (x, 0)) == PLUS
7992 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7993 return 0;
7995 return 1;
7998 /* Record that the current function needs a PIC register. If PIC_REG is null,
7999 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8000 both case cfun->machine->pic_reg is initialized if we have not already done
8001 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8002 PIC register is reloaded in the current position of the instruction stream
8003 irregardless of whether it was loaded before. Otherwise, it is only loaded
8004 if not already done so (crtl->uses_pic_offset_table is null). Note that
8005 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8006 is only supported iff COMPUTE_NOW is false. */
8008 static void
8009 require_pic_register (rtx pic_reg, bool compute_now)
8011 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8013 /* A lot of the logic here is made obscure by the fact that this
8014 routine gets called as part of the rtx cost estimation process.
8015 We don't want those calls to affect any assumptions about the real
8016 function; and further, we can't call entry_of_function() until we
8017 start the real expansion process. */
8018 if (!crtl->uses_pic_offset_table || compute_now)
8020 gcc_assert (can_create_pseudo_p ()
8021 || (pic_reg != NULL_RTX
8022 && REG_P (pic_reg)
8023 && GET_MODE (pic_reg) == Pmode));
8024 if (arm_pic_register != INVALID_REGNUM
8025 && !compute_now
8026 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8028 if (!cfun->machine->pic_reg)
8029 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8031 /* Play games to avoid marking the function as needing pic
8032 if we are being called as part of the cost-estimation
8033 process. */
8034 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8035 crtl->uses_pic_offset_table = 1;
8037 else
8039 rtx_insn *seq, *insn;
8041 if (pic_reg == NULL_RTX)
8042 pic_reg = gen_reg_rtx (Pmode);
8043 if (!cfun->machine->pic_reg)
8044 cfun->machine->pic_reg = pic_reg;
8046 /* Play games to avoid marking the function as needing pic
8047 if we are being called as part of the cost-estimation
8048 process. */
8049 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8051 crtl->uses_pic_offset_table = 1;
8052 start_sequence ();
8054 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8055 && arm_pic_register > LAST_LO_REGNUM
8056 && !compute_now)
8057 emit_move_insn (cfun->machine->pic_reg,
8058 gen_rtx_REG (Pmode, arm_pic_register));
8059 else
8060 arm_load_pic_register (0UL, pic_reg);
8062 seq = get_insns ();
8063 end_sequence ();
8065 for (insn = seq; insn; insn = NEXT_INSN (insn))
8066 if (INSN_P (insn))
8067 INSN_LOCATION (insn) = prologue_location;
8069 /* We can be called during expansion of PHI nodes, where
8070 we can't yet emit instructions directly in the final
8071 insn stream. Queue the insns on the entry edge, they will
8072 be committed after everything else is expanded. */
8073 if (currently_expanding_to_rtl)
8074 insert_insn_on_edge (seq,
8075 single_succ_edge
8076 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8077 else
8078 emit_insn (seq);
8084 /* Generate insns to calculate the address of ORIG in pic mode. */
8085 static rtx_insn *
8086 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8088 rtx pat;
8089 rtx mem;
8091 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8093 /* Make the MEM as close to a constant as possible. */
8094 mem = SET_SRC (pat);
8095 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8096 MEM_READONLY_P (mem) = 1;
8097 MEM_NOTRAP_P (mem) = 1;
8099 return emit_insn (pat);
8102 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8103 created to hold the result of the load. If not NULL, PIC_REG indicates
8104 which register to use as PIC register, otherwise it is decided by register
8105 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8106 location in the instruction stream, irregardless of whether it was loaded
8107 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8108 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8110 Returns the register REG into which the PIC load is performed. */
8113 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8114 bool compute_now)
8116 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8118 if (SYMBOL_REF_P (orig)
8119 || LABEL_REF_P (orig))
8121 if (reg == 0)
8123 gcc_assert (can_create_pseudo_p ());
8124 reg = gen_reg_rtx (Pmode);
8127 /* VxWorks does not impose a fixed gap between segments; the run-time
8128 gap can be different from the object-file gap. We therefore can't
8129 use GOTOFF unless we are absolutely sure that the symbol is in the
8130 same segment as the GOT. Unfortunately, the flexibility of linker
8131 scripts means that we can't be sure of that in general, so assume
8132 that GOTOFF is never valid on VxWorks. */
8133 /* References to weak symbols cannot be resolved locally: they
8134 may be overridden by a non-weak definition at link time. */
8135 rtx_insn *insn;
8136 if ((LABEL_REF_P (orig)
8137 || (SYMBOL_REF_P (orig)
8138 && SYMBOL_REF_LOCAL_P (orig)
8139 && (SYMBOL_REF_DECL (orig)
8140 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8141 && (!SYMBOL_REF_FUNCTION_P (orig)
8142 || arm_fdpic_local_funcdesc_p (orig))))
8143 && NEED_GOT_RELOC
8144 && arm_pic_data_is_text_relative)
8145 insn = arm_pic_static_addr (orig, reg);
8146 else
8148 /* If this function doesn't have a pic register, create one now. */
8149 require_pic_register (pic_reg, compute_now);
8151 if (pic_reg == NULL_RTX)
8152 pic_reg = cfun->machine->pic_reg;
8154 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8157 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8158 by loop. */
8159 set_unique_reg_note (insn, REG_EQUAL, orig);
8161 return reg;
8163 else if (GET_CODE (orig) == CONST)
8165 rtx base, offset;
8167 if (GET_CODE (XEXP (orig, 0)) == PLUS
8168 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8169 return orig;
8171 /* Handle the case where we have: const (UNSPEC_TLS). */
8172 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8173 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8174 return orig;
8176 /* Handle the case where we have:
8177 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8178 CONST_INT. */
8179 if (GET_CODE (XEXP (orig, 0)) == PLUS
8180 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8181 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8183 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8184 return orig;
8187 if (reg == 0)
8189 gcc_assert (can_create_pseudo_p ());
8190 reg = gen_reg_rtx (Pmode);
8193 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8195 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8196 pic_reg, compute_now);
8197 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8198 base == reg ? 0 : reg, pic_reg,
8199 compute_now);
8201 if (CONST_INT_P (offset))
8203 /* The base register doesn't really matter, we only want to
8204 test the index for the appropriate mode. */
8205 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8207 gcc_assert (can_create_pseudo_p ());
8208 offset = force_reg (Pmode, offset);
8211 if (CONST_INT_P (offset))
8212 return plus_constant (Pmode, base, INTVAL (offset));
8215 if (GET_MODE_SIZE (mode) > 4
8216 && (GET_MODE_CLASS (mode) == MODE_INT
8217 || TARGET_SOFT_FLOAT))
8219 emit_insn (gen_addsi3 (reg, base, offset));
8220 return reg;
8223 return gen_rtx_PLUS (Pmode, base, offset);
8226 return orig;
8230 /* Generate insns that produce the address of the stack canary */
8232 arm_stack_protect_tls_canary_mem (bool reload)
8234 rtx tp = gen_reg_rtx (SImode);
8235 if (reload)
8236 emit_insn (gen_reload_tp_hard (tp));
8237 else
8238 emit_insn (gen_load_tp_hard (tp));
8240 rtx reg = gen_reg_rtx (SImode);
8241 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8242 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8243 return gen_rtx_MEM (SImode, reg);
8247 /* Whether a register is callee saved or not. This is necessary because high
8248 registers are marked as caller saved when optimizing for size on Thumb-1
8249 targets despite being callee saved in order to avoid using them. */
8250 #define callee_saved_reg_p(reg) \
8251 (!call_used_or_fixed_reg_p (reg) \
8252 || (TARGET_THUMB1 && optimize_size \
8253 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8255 /* Return a mask for the call-clobbered low registers that are unused
8256 at the end of the prologue. */
8257 static unsigned long
8258 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8260 unsigned long mask = 0;
8261 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8263 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8264 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8265 mask |= 1 << (reg - FIRST_LO_REGNUM);
8266 return mask;
8269 /* Similarly for the start of the epilogue. */
8270 static unsigned long
8271 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8273 unsigned long mask = 0;
8274 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8276 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8277 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8278 mask |= 1 << (reg - FIRST_LO_REGNUM);
8279 return mask;
8282 /* Find a spare register to use during the prolog of a function. */
8284 static int
8285 thumb_find_work_register (unsigned long pushed_regs_mask)
8287 int reg;
8289 unsigned long unused_regs
8290 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8292 /* Check the argument registers first as these are call-used. The
8293 register allocation order means that sometimes r3 might be used
8294 but earlier argument registers might not, so check them all. */
8295 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8296 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8297 return reg;
8299 /* Otherwise look for a call-saved register that is going to be pushed. */
8300 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8301 if (pushed_regs_mask & (1 << reg))
8302 return reg;
8304 if (TARGET_THUMB2)
8306 /* Thumb-2 can use high regs. */
8307 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8308 if (pushed_regs_mask & (1 << reg))
8309 return reg;
8311 /* Something went wrong - thumb_compute_save_reg_mask()
8312 should have arranged for a suitable register to be pushed. */
8313 gcc_unreachable ();
8316 static GTY(()) int pic_labelno;
8318 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8319 low register. */
8321 void
8322 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8324 rtx l1, labelno, pic_tmp, pic_rtx;
8326 if (crtl->uses_pic_offset_table == 0
8327 || TARGET_SINGLE_PIC_BASE
8328 || TARGET_FDPIC)
8329 return;
8331 gcc_assert (flag_pic);
8333 if (pic_reg == NULL_RTX)
8334 pic_reg = cfun->machine->pic_reg;
8335 if (TARGET_VXWORKS_RTP)
8337 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8338 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8339 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8341 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8343 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8344 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8346 else
8348 /* We use an UNSPEC rather than a LABEL_REF because this label
8349 never appears in the code stream. */
8351 labelno = GEN_INT (pic_labelno++);
8352 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8353 l1 = gen_rtx_CONST (VOIDmode, l1);
8355 /* On the ARM the PC register contains 'dot + 8' at the time of the
8356 addition, on the Thumb it is 'dot + 4'. */
8357 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8358 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8359 UNSPEC_GOTSYM_OFF);
8360 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8362 if (TARGET_32BIT)
8364 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8366 else /* TARGET_THUMB1 */
8368 if (arm_pic_register != INVALID_REGNUM
8369 && REGNO (pic_reg) > LAST_LO_REGNUM)
8371 /* We will have pushed the pic register, so we should always be
8372 able to find a work register. */
8373 pic_tmp = gen_rtx_REG (SImode,
8374 thumb_find_work_register (saved_regs));
8375 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8376 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8377 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8379 else if (arm_pic_register != INVALID_REGNUM
8380 && arm_pic_register > LAST_LO_REGNUM
8381 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8383 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8384 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8385 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8387 else
8388 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8392 /* Need to emit this whether or not we obey regdecls,
8393 since setjmp/longjmp can cause life info to screw up. */
8394 emit_use (pic_reg);
8397 /* Try to determine whether an object, referenced via ORIG, will be
8398 placed in the text or data segment. This is used in FDPIC mode, to
8399 decide which relocations to use when accessing ORIG. *IS_READONLY
8400 is set to true if ORIG is a read-only location, false otherwise.
8401 Return true if we could determine the location of ORIG, false
8402 otherwise. *IS_READONLY is valid only when we return true. */
8403 static bool
8404 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8406 *is_readonly = false;
8408 if (LABEL_REF_P (orig))
8410 *is_readonly = true;
8411 return true;
8414 if (SYMBOL_REF_P (orig))
8416 if (CONSTANT_POOL_ADDRESS_P (orig))
8418 *is_readonly = true;
8419 return true;
8421 if (SYMBOL_REF_LOCAL_P (orig)
8422 && !SYMBOL_REF_EXTERNAL_P (orig)
8423 && SYMBOL_REF_DECL (orig)
8424 && (!DECL_P (SYMBOL_REF_DECL (orig))
8425 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8427 tree decl = SYMBOL_REF_DECL (orig);
8428 tree init = (TREE_CODE (decl) == VAR_DECL)
8429 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8430 ? decl : 0;
8431 int reloc = 0;
8432 bool named_section, readonly;
8434 if (init && init != error_mark_node)
8435 reloc = compute_reloc_for_constant (init);
8437 named_section = TREE_CODE (decl) == VAR_DECL
8438 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8439 readonly = decl_readonly_section (decl, reloc);
8441 /* We don't know where the link script will put a named
8442 section, so return false in such a case. */
8443 if (named_section)
8444 return false;
8446 *is_readonly = readonly;
8447 return true;
8450 /* We don't know. */
8451 return false;
8454 gcc_unreachable ();
8457 /* Generate code to load the address of a static var when flag_pic is set. */
8458 static rtx_insn *
8459 arm_pic_static_addr (rtx orig, rtx reg)
8461 rtx l1, labelno, offset_rtx;
8462 rtx_insn *insn;
8464 gcc_assert (flag_pic);
8466 bool is_readonly = false;
8467 bool info_known = false;
8469 if (TARGET_FDPIC
8470 && SYMBOL_REF_P (orig)
8471 && !SYMBOL_REF_FUNCTION_P (orig))
8472 info_known = arm_is_segment_info_known (orig, &is_readonly);
8474 if (TARGET_FDPIC
8475 && SYMBOL_REF_P (orig)
8476 && !SYMBOL_REF_FUNCTION_P (orig)
8477 && !info_known)
8479 /* We don't know where orig is stored, so we have be
8480 pessimistic and use a GOT relocation. */
8481 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8483 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8485 else if (TARGET_FDPIC
8486 && SYMBOL_REF_P (orig)
8487 && (SYMBOL_REF_FUNCTION_P (orig)
8488 || !is_readonly))
8490 /* We use the GOTOFF relocation. */
8491 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8493 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8494 emit_insn (gen_movsi (reg, l1));
8495 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8497 else
8499 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8500 PC-relative access. */
8501 /* We use an UNSPEC rather than a LABEL_REF because this label
8502 never appears in the code stream. */
8503 labelno = GEN_INT (pic_labelno++);
8504 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8505 l1 = gen_rtx_CONST (VOIDmode, l1);
8507 /* On the ARM the PC register contains 'dot + 8' at the time of the
8508 addition, on the Thumb it is 'dot + 4'. */
8509 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8510 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8511 UNSPEC_SYMBOL_OFFSET);
8512 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8514 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8515 labelno));
8518 return insn;
8521 /* Return nonzero if X is valid as an ARM state addressing register. */
8522 static int
8523 arm_address_register_rtx_p (rtx x, int strict_p)
8525 int regno;
8527 if (!REG_P (x))
8528 return 0;
8530 regno = REGNO (x);
8532 if (strict_p)
8533 return ARM_REGNO_OK_FOR_BASE_P (regno);
8535 return (regno <= LAST_ARM_REGNUM
8536 || regno >= FIRST_PSEUDO_REGISTER
8537 || regno == FRAME_POINTER_REGNUM
8538 || regno == ARG_POINTER_REGNUM);
8541 /* Return TRUE if this rtx is the difference of a symbol and a label,
8542 and will reduce to a PC-relative relocation in the object file.
8543 Expressions like this can be left alone when generating PIC, rather
8544 than forced through the GOT. */
8545 static int
8546 pcrel_constant_p (rtx x)
8548 if (GET_CODE (x) == MINUS)
8549 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8551 return FALSE;
8554 /* Return true if X will surely end up in an index register after next
8555 splitting pass. */
8556 static bool
8557 will_be_in_index_register (const_rtx x)
8559 /* arm.md: calculate_pic_address will split this into a register. */
8560 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8563 /* Return nonzero if X is a valid ARM state address operand. */
8565 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8566 int strict_p)
8568 bool use_ldrd;
8569 enum rtx_code code = GET_CODE (x);
8571 if (arm_address_register_rtx_p (x, strict_p))
8572 return 1;
8574 use_ldrd = (TARGET_LDRD
8575 && (mode == DImode || mode == DFmode));
8577 if (code == POST_INC || code == PRE_DEC
8578 || ((code == PRE_INC || code == POST_DEC)
8579 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8580 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8582 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8583 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8584 && GET_CODE (XEXP (x, 1)) == PLUS
8585 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8587 rtx addend = XEXP (XEXP (x, 1), 1);
8589 /* Don't allow ldrd post increment by register because it's hard
8590 to fixup invalid register choices. */
8591 if (use_ldrd
8592 && GET_CODE (x) == POST_MODIFY
8593 && REG_P (addend))
8594 return 0;
8596 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8597 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8600 /* After reload constants split into minipools will have addresses
8601 from a LABEL_REF. */
8602 else if (reload_completed
8603 && (code == LABEL_REF
8604 || (code == CONST
8605 && GET_CODE (XEXP (x, 0)) == PLUS
8606 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8607 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8608 return 1;
8610 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8611 return 0;
8613 else if (code == PLUS)
8615 rtx xop0 = XEXP (x, 0);
8616 rtx xop1 = XEXP (x, 1);
8618 return ((arm_address_register_rtx_p (xop0, strict_p)
8619 && ((CONST_INT_P (xop1)
8620 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8621 || (!strict_p && will_be_in_index_register (xop1))))
8622 || (arm_address_register_rtx_p (xop1, strict_p)
8623 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8626 #if 0
8627 /* Reload currently can't handle MINUS, so disable this for now */
8628 else if (GET_CODE (x) == MINUS)
8630 rtx xop0 = XEXP (x, 0);
8631 rtx xop1 = XEXP (x, 1);
8633 return (arm_address_register_rtx_p (xop0, strict_p)
8634 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8636 #endif
8638 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8639 && code == SYMBOL_REF
8640 && CONSTANT_POOL_ADDRESS_P (x)
8641 && ! (flag_pic
8642 && symbol_mentioned_p (get_pool_constant (x))
8643 && ! pcrel_constant_p (get_pool_constant (x))))
8644 return 1;
8646 return 0;
8649 /* Return true if we can avoid creating a constant pool entry for x. */
8650 static bool
8651 can_avoid_literal_pool_for_label_p (rtx x)
8653 /* Normally we can assign constant values to target registers without
8654 the help of constant pool. But there are cases we have to use constant
8655 pool like:
8656 1) assign a label to register.
8657 2) sign-extend a 8bit value to 32bit and then assign to register.
8659 Constant pool access in format:
8660 (set (reg r0) (mem (symbol_ref (".LC0"))))
8661 will cause the use of literal pool (later in function arm_reorg).
8662 So here we mark such format as an invalid format, then the compiler
8663 will adjust it into:
8664 (set (reg r0) (symbol_ref (".LC0")))
8665 (set (reg r0) (mem (reg r0))).
8666 No extra register is required, and (mem (reg r0)) won't cause the use
8667 of literal pools. */
8668 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8669 && CONSTANT_POOL_ADDRESS_P (x))
8670 return 1;
8671 return 0;
8675 /* Return nonzero if X is a valid Thumb-2 address operand. */
8676 static int
8677 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8679 bool use_ldrd;
8680 enum rtx_code code = GET_CODE (x);
8682 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8683 can store and load it like any other 16-bit value. */
8684 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8685 mode = HImode;
8687 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8688 return mve_vector_mem_operand (mode, x, strict_p);
8690 if (arm_address_register_rtx_p (x, strict_p))
8691 return 1;
8693 use_ldrd = (TARGET_LDRD
8694 && (mode == DImode || mode == DFmode));
8696 if (code == POST_INC || code == PRE_DEC
8697 || ((code == PRE_INC || code == POST_DEC)
8698 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8699 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8701 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8702 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8703 && GET_CODE (XEXP (x, 1)) == PLUS
8704 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8706 /* Thumb-2 only has autoincrement by constant. */
8707 rtx addend = XEXP (XEXP (x, 1), 1);
8708 HOST_WIDE_INT offset;
8710 if (!CONST_INT_P (addend))
8711 return 0;
8713 offset = INTVAL(addend);
8714 if (GET_MODE_SIZE (mode) <= 4)
8715 return (offset > -256 && offset < 256);
8717 return (use_ldrd && offset > -1024 && offset < 1024
8718 && (offset & 3) == 0);
8721 /* After reload constants split into minipools will have addresses
8722 from a LABEL_REF. */
8723 else if (reload_completed
8724 && (code == LABEL_REF
8725 || (code == CONST
8726 && GET_CODE (XEXP (x, 0)) == PLUS
8727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8728 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8729 return 1;
8731 else if (mode == TImode
8732 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8733 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8734 return 0;
8736 else if (code == PLUS)
8738 rtx xop0 = XEXP (x, 0);
8739 rtx xop1 = XEXP (x, 1);
8741 return ((arm_address_register_rtx_p (xop0, strict_p)
8742 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8743 || (!strict_p && will_be_in_index_register (xop1))))
8744 || (arm_address_register_rtx_p (xop1, strict_p)
8745 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8748 else if (can_avoid_literal_pool_for_label_p (x))
8749 return 0;
8751 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8752 && code == SYMBOL_REF
8753 && CONSTANT_POOL_ADDRESS_P (x)
8754 && ! (flag_pic
8755 && symbol_mentioned_p (get_pool_constant (x))
8756 && ! pcrel_constant_p (get_pool_constant (x))))
8757 return 1;
8759 return 0;
8762 /* Return nonzero if INDEX is valid for an address index operand in
8763 ARM state. */
8764 static int
8765 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8766 int strict_p)
8768 HOST_WIDE_INT range;
8769 enum rtx_code code = GET_CODE (index);
8771 /* Standard coprocessor addressing modes. */
8772 if (TARGET_HARD_FLOAT
8773 && (mode == SFmode || mode == DFmode))
8774 return (code == CONST_INT && INTVAL (index) < 1024
8775 && INTVAL (index) > -1024
8776 && (INTVAL (index) & 3) == 0);
8778 /* For quad modes, we restrict the constant offset to be slightly less
8779 than what the instruction format permits. We do this because for
8780 quad mode moves, we will actually decompose them into two separate
8781 double-mode reads or writes. INDEX must therefore be a valid
8782 (double-mode) offset and so should INDEX+8. */
8783 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8784 return (code == CONST_INT
8785 && INTVAL (index) < 1016
8786 && INTVAL (index) > -1024
8787 && (INTVAL (index) & 3) == 0);
8789 /* We have no such constraint on double mode offsets, so we permit the
8790 full range of the instruction format. */
8791 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8792 return (code == CONST_INT
8793 && INTVAL (index) < 1024
8794 && INTVAL (index) > -1024
8795 && (INTVAL (index) & 3) == 0);
8797 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8798 return (code == CONST_INT
8799 && INTVAL (index) < 1024
8800 && INTVAL (index) > -1024
8801 && (INTVAL (index) & 3) == 0);
8803 if (arm_address_register_rtx_p (index, strict_p)
8804 && (GET_MODE_SIZE (mode) <= 4))
8805 return 1;
8807 if (mode == DImode || mode == DFmode)
8809 if (code == CONST_INT)
8811 HOST_WIDE_INT val = INTVAL (index);
8813 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8814 If vldr is selected it uses arm_coproc_mem_operand. */
8815 if (TARGET_LDRD)
8816 return val > -256 && val < 256;
8817 else
8818 return val > -4096 && val < 4092;
8821 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8824 if (GET_MODE_SIZE (mode) <= 4
8825 && ! (arm_arch4
8826 && (mode == HImode
8827 || mode == HFmode
8828 || (mode == QImode && outer == SIGN_EXTEND))))
8830 if (code == MULT)
8832 rtx xiop0 = XEXP (index, 0);
8833 rtx xiop1 = XEXP (index, 1);
8835 return ((arm_address_register_rtx_p (xiop0, strict_p)
8836 && power_of_two_operand (xiop1, SImode))
8837 || (arm_address_register_rtx_p (xiop1, strict_p)
8838 && power_of_two_operand (xiop0, SImode)));
8840 else if (code == LSHIFTRT || code == ASHIFTRT
8841 || code == ASHIFT || code == ROTATERT)
8843 rtx op = XEXP (index, 1);
8845 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8846 && CONST_INT_P (op)
8847 && INTVAL (op) > 0
8848 && INTVAL (op) <= 31);
8852 /* For ARM v4 we may be doing a sign-extend operation during the
8853 load. */
8854 if (arm_arch4)
8856 if (mode == HImode
8857 || mode == HFmode
8858 || (outer == SIGN_EXTEND && mode == QImode))
8859 range = 256;
8860 else
8861 range = 4096;
8863 else
8864 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8866 return (code == CONST_INT
8867 && INTVAL (index) < range
8868 && INTVAL (index) > -range);
8871 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8872 index operand. i.e. 1, 2, 4 or 8. */
8873 static bool
8874 thumb2_index_mul_operand (rtx op)
8876 HOST_WIDE_INT val;
8878 if (!CONST_INT_P (op))
8879 return false;
8881 val = INTVAL(op);
8882 return (val == 1 || val == 2 || val == 4 || val == 8);
8885 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8886 static int
8887 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8889 enum rtx_code code = GET_CODE (index);
8891 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8892 /* Standard coprocessor addressing modes. */
8893 if (TARGET_VFP_BASE
8894 && (mode == SFmode || mode == DFmode))
8895 return (code == CONST_INT && INTVAL (index) < 1024
8896 /* Thumb-2 allows only > -256 index range for it's core register
8897 load/stores. Since we allow SF/DF in core registers, we have
8898 to use the intersection between -256~4096 (core) and -1024~1024
8899 (coprocessor). */
8900 && INTVAL (index) > -256
8901 && (INTVAL (index) & 3) == 0);
8903 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8905 /* For DImode assume values will usually live in core regs
8906 and only allow LDRD addressing modes. */
8907 if (!TARGET_LDRD || mode != DImode)
8908 return (code == CONST_INT
8909 && INTVAL (index) < 1024
8910 && INTVAL (index) > -1024
8911 && (INTVAL (index) & 3) == 0);
8914 /* For quad modes, we restrict the constant offset to be slightly less
8915 than what the instruction format permits. We do this because for
8916 quad mode moves, we will actually decompose them into two separate
8917 double-mode reads or writes. INDEX must therefore be a valid
8918 (double-mode) offset and so should INDEX+8. */
8919 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8920 return (code == CONST_INT
8921 && INTVAL (index) < 1016
8922 && INTVAL (index) > -1024
8923 && (INTVAL (index) & 3) == 0);
8925 /* We have no such constraint on double mode offsets, so we permit the
8926 full range of the instruction format. */
8927 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8928 return (code == CONST_INT
8929 && INTVAL (index) < 1024
8930 && INTVAL (index) > -1024
8931 && (INTVAL (index) & 3) == 0);
8933 if (arm_address_register_rtx_p (index, strict_p)
8934 && (GET_MODE_SIZE (mode) <= 4))
8935 return 1;
8937 if (mode == DImode || mode == DFmode)
8939 if (code == CONST_INT)
8941 HOST_WIDE_INT val = INTVAL (index);
8942 /* Thumb-2 ldrd only has reg+const addressing modes.
8943 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8944 If vldr is selected it uses arm_coproc_mem_operand. */
8945 if (TARGET_LDRD)
8946 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8947 else
8948 return IN_RANGE (val, -255, 4095 - 4);
8950 else
8951 return 0;
8954 if (code == MULT)
8956 rtx xiop0 = XEXP (index, 0);
8957 rtx xiop1 = XEXP (index, 1);
8959 return ((arm_address_register_rtx_p (xiop0, strict_p)
8960 && thumb2_index_mul_operand (xiop1))
8961 || (arm_address_register_rtx_p (xiop1, strict_p)
8962 && thumb2_index_mul_operand (xiop0)));
8964 else if (code == ASHIFT)
8966 rtx op = XEXP (index, 1);
8968 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8969 && CONST_INT_P (op)
8970 && INTVAL (op) > 0
8971 && INTVAL (op) <= 3);
8974 return (code == CONST_INT
8975 && INTVAL (index) < 4096
8976 && INTVAL (index) > -256);
8979 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8980 static int
8981 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8983 int regno;
8985 if (!REG_P (x))
8986 return 0;
8988 regno = REGNO (x);
8990 if (strict_p)
8991 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8993 return (regno <= LAST_LO_REGNUM
8994 || regno > LAST_VIRTUAL_REGISTER
8995 || regno == FRAME_POINTER_REGNUM
8996 || (GET_MODE_SIZE (mode) >= 4
8997 && (regno == STACK_POINTER_REGNUM
8998 || regno >= FIRST_PSEUDO_REGISTER
8999 || x == hard_frame_pointer_rtx
9000 || x == arg_pointer_rtx)));
9003 /* Return nonzero if x is a legitimate index register. This is the case
9004 for any base register that can access a QImode object. */
9005 inline static int
9006 thumb1_index_register_rtx_p (rtx x, int strict_p)
9008 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9011 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9013 The AP may be eliminated to either the SP or the FP, so we use the
9014 least common denominator, e.g. SImode, and offsets from 0 to 64.
9016 ??? Verify whether the above is the right approach.
9018 ??? Also, the FP may be eliminated to the SP, so perhaps that
9019 needs special handling also.
9021 ??? Look at how the mips16 port solves this problem. It probably uses
9022 better ways to solve some of these problems.
9024 Although it is not incorrect, we don't accept QImode and HImode
9025 addresses based on the frame pointer or arg pointer until the
9026 reload pass starts. This is so that eliminating such addresses
9027 into stack based ones won't produce impossible code. */
9029 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9031 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9032 return 0;
9034 /* ??? Not clear if this is right. Experiment. */
9035 if (GET_MODE_SIZE (mode) < 4
9036 && !(reload_in_progress || reload_completed)
9037 && (reg_mentioned_p (frame_pointer_rtx, x)
9038 || reg_mentioned_p (arg_pointer_rtx, x)
9039 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9040 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9041 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9042 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9043 return 0;
9045 /* Accept any base register. SP only in SImode or larger. */
9046 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9047 return 1;
9049 /* This is PC relative data before arm_reorg runs. */
9050 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9051 && SYMBOL_REF_P (x)
9052 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9053 && !arm_disable_literal_pool)
9054 return 1;
9056 /* This is PC relative data after arm_reorg runs. */
9057 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9058 && reload_completed
9059 && (LABEL_REF_P (x)
9060 || (GET_CODE (x) == CONST
9061 && GET_CODE (XEXP (x, 0)) == PLUS
9062 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9063 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9064 return 1;
9066 /* Post-inc indexing only supported for SImode and larger. */
9067 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9068 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9069 return 1;
9071 else if (GET_CODE (x) == PLUS)
9073 /* REG+REG address can be any two index registers. */
9074 /* We disallow FRAME+REG addressing since we know that FRAME
9075 will be replaced with STACK, and SP relative addressing only
9076 permits SP+OFFSET. */
9077 if (GET_MODE_SIZE (mode) <= 4
9078 && XEXP (x, 0) != frame_pointer_rtx
9079 && XEXP (x, 1) != frame_pointer_rtx
9080 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9081 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9082 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9083 return 1;
9085 /* REG+const has 5-7 bit offset for non-SP registers. */
9086 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9087 || XEXP (x, 0) == arg_pointer_rtx)
9088 && CONST_INT_P (XEXP (x, 1))
9089 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9090 return 1;
9092 /* REG+const has 10-bit offset for SP, but only SImode and
9093 larger is supported. */
9094 /* ??? Should probably check for DI/DFmode overflow here
9095 just like GO_IF_LEGITIMATE_OFFSET does. */
9096 else if (REG_P (XEXP (x, 0))
9097 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9098 && GET_MODE_SIZE (mode) >= 4
9099 && CONST_INT_P (XEXP (x, 1))
9100 && INTVAL (XEXP (x, 1)) >= 0
9101 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9102 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9103 return 1;
9105 else if (REG_P (XEXP (x, 0))
9106 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9107 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9108 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9109 && REGNO (XEXP (x, 0))
9110 <= LAST_VIRTUAL_POINTER_REGISTER))
9111 && GET_MODE_SIZE (mode) >= 4
9112 && CONST_INT_P (XEXP (x, 1))
9113 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9114 return 1;
9117 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9118 && GET_MODE_SIZE (mode) == 4
9119 && SYMBOL_REF_P (x)
9120 && CONSTANT_POOL_ADDRESS_P (x)
9121 && !arm_disable_literal_pool
9122 && ! (flag_pic
9123 && symbol_mentioned_p (get_pool_constant (x))
9124 && ! pcrel_constant_p (get_pool_constant (x))))
9125 return 1;
9127 return 0;
9130 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9131 instruction of mode MODE. */
9133 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9135 switch (GET_MODE_SIZE (mode))
9137 case 1:
9138 return val >= 0 && val < 32;
9140 case 2:
9141 return val >= 0 && val < 64 && (val & 1) == 0;
9143 default:
9144 return (val >= 0
9145 && (val + GET_MODE_SIZE (mode)) <= 128
9146 && (val & 3) == 0);
9150 bool
9151 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9153 if (TARGET_ARM)
9154 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9155 else if (TARGET_THUMB2)
9156 return thumb2_legitimate_address_p (mode, x, strict_p);
9157 else /* if (TARGET_THUMB1) */
9158 return thumb1_legitimate_address_p (mode, x, strict_p);
9161 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9163 Given an rtx X being reloaded into a reg required to be
9164 in class CLASS, return the class of reg to actually use.
9165 In general this is just CLASS, but for the Thumb core registers and
9166 immediate constants we prefer a LO_REGS class or a subset. */
9168 static reg_class_t
9169 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9171 if (TARGET_32BIT)
9172 return rclass;
9173 else
9175 if (rclass == GENERAL_REGS)
9176 return LO_REGS;
9177 else
9178 return rclass;
9182 /* Build the SYMBOL_REF for __tls_get_addr. */
9184 static GTY(()) rtx tls_get_addr_libfunc;
9186 static rtx
9187 get_tls_get_addr (void)
9189 if (!tls_get_addr_libfunc)
9190 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9191 return tls_get_addr_libfunc;
9195 arm_load_tp (rtx target)
9197 if (!target)
9198 target = gen_reg_rtx (SImode);
9200 if (TARGET_HARD_TP)
9202 /* Can return in any reg. */
9203 emit_insn (gen_load_tp_hard (target));
9205 else
9207 /* Always returned in r0. Immediately copy the result into a pseudo,
9208 otherwise other uses of r0 (e.g. setting up function arguments) may
9209 clobber the value. */
9211 rtx tmp;
9213 if (TARGET_FDPIC)
9215 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9216 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9218 emit_insn (gen_load_tp_soft_fdpic ());
9220 /* Restore r9. */
9221 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9223 else
9224 emit_insn (gen_load_tp_soft ());
9226 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9227 emit_move_insn (target, tmp);
9229 return target;
9232 static rtx
9233 load_tls_operand (rtx x, rtx reg)
9235 rtx tmp;
9237 if (reg == NULL_RTX)
9238 reg = gen_reg_rtx (SImode);
9240 tmp = gen_rtx_CONST (SImode, x);
9242 emit_move_insn (reg, tmp);
9244 return reg;
9247 static rtx_insn *
9248 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9250 rtx label, labelno = NULL_RTX, sum;
9252 gcc_assert (reloc != TLS_DESCSEQ);
9253 start_sequence ();
9255 if (TARGET_FDPIC)
9257 sum = gen_rtx_UNSPEC (Pmode,
9258 gen_rtvec (2, x, GEN_INT (reloc)),
9259 UNSPEC_TLS);
9261 else
9263 labelno = GEN_INT (pic_labelno++);
9264 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9265 label = gen_rtx_CONST (VOIDmode, label);
9267 sum = gen_rtx_UNSPEC (Pmode,
9268 gen_rtvec (4, x, GEN_INT (reloc), label,
9269 GEN_INT (TARGET_ARM ? 8 : 4)),
9270 UNSPEC_TLS);
9272 reg = load_tls_operand (sum, reg);
9274 if (TARGET_FDPIC)
9275 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9276 else if (TARGET_ARM)
9277 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9278 else
9279 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9281 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9282 LCT_PURE, /* LCT_CONST? */
9283 Pmode, reg, Pmode);
9285 rtx_insn *insns = get_insns ();
9286 end_sequence ();
9288 return insns;
9291 static rtx
9292 arm_tls_descseq_addr (rtx x, rtx reg)
9294 rtx labelno = GEN_INT (pic_labelno++);
9295 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9296 rtx sum = gen_rtx_UNSPEC (Pmode,
9297 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9298 gen_rtx_CONST (VOIDmode, label),
9299 GEN_INT (!TARGET_ARM)),
9300 UNSPEC_TLS);
9301 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9303 emit_insn (gen_tlscall (x, labelno));
9304 if (!reg)
9305 reg = gen_reg_rtx (SImode);
9306 else
9307 gcc_assert (REGNO (reg) != R0_REGNUM);
9309 emit_move_insn (reg, reg0);
9311 return reg;
9316 legitimize_tls_address (rtx x, rtx reg)
9318 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9319 rtx_insn *insns;
9320 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9322 switch (model)
9324 case TLS_MODEL_GLOBAL_DYNAMIC:
9325 if (TARGET_GNU2_TLS)
9327 gcc_assert (!TARGET_FDPIC);
9329 reg = arm_tls_descseq_addr (x, reg);
9331 tp = arm_load_tp (NULL_RTX);
9333 dest = gen_rtx_PLUS (Pmode, tp, reg);
9335 else
9337 /* Original scheme */
9338 if (TARGET_FDPIC)
9339 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9340 else
9341 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9342 dest = gen_reg_rtx (Pmode);
9343 emit_libcall_block (insns, dest, ret, x);
9345 return dest;
9347 case TLS_MODEL_LOCAL_DYNAMIC:
9348 if (TARGET_GNU2_TLS)
9350 gcc_assert (!TARGET_FDPIC);
9352 reg = arm_tls_descseq_addr (x, reg);
9354 tp = arm_load_tp (NULL_RTX);
9356 dest = gen_rtx_PLUS (Pmode, tp, reg);
9358 else
9360 if (TARGET_FDPIC)
9361 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9362 else
9363 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9365 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9366 share the LDM result with other LD model accesses. */
9367 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9368 UNSPEC_TLS);
9369 dest = gen_reg_rtx (Pmode);
9370 emit_libcall_block (insns, dest, ret, eqv);
9372 /* Load the addend. */
9373 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9374 GEN_INT (TLS_LDO32)),
9375 UNSPEC_TLS);
9376 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9377 dest = gen_rtx_PLUS (Pmode, dest, addend);
9379 return dest;
9381 case TLS_MODEL_INITIAL_EXEC:
9382 if (TARGET_FDPIC)
9384 sum = gen_rtx_UNSPEC (Pmode,
9385 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9386 UNSPEC_TLS);
9387 reg = load_tls_operand (sum, reg);
9388 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9389 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9391 else
9393 labelno = GEN_INT (pic_labelno++);
9394 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9395 label = gen_rtx_CONST (VOIDmode, label);
9396 sum = gen_rtx_UNSPEC (Pmode,
9397 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9398 GEN_INT (TARGET_ARM ? 8 : 4)),
9399 UNSPEC_TLS);
9400 reg = load_tls_operand (sum, reg);
9402 if (TARGET_ARM)
9403 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9404 else if (TARGET_THUMB2)
9405 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9406 else
9408 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9409 emit_move_insn (reg, gen_const_mem (SImode, reg));
9413 tp = arm_load_tp (NULL_RTX);
9415 return gen_rtx_PLUS (Pmode, tp, reg);
9417 case TLS_MODEL_LOCAL_EXEC:
9418 tp = arm_load_tp (NULL_RTX);
9420 reg = gen_rtx_UNSPEC (Pmode,
9421 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9422 UNSPEC_TLS);
9423 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9425 return gen_rtx_PLUS (Pmode, tp, reg);
9427 default:
9428 abort ();
9432 /* Try machine-dependent ways of modifying an illegitimate address
9433 to be legitimate. If we find one, return the new, valid address. */
9435 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9437 if (arm_tls_referenced_p (x))
9439 rtx addend = NULL;
9441 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9443 addend = XEXP (XEXP (x, 0), 1);
9444 x = XEXP (XEXP (x, 0), 0);
9447 if (!SYMBOL_REF_P (x))
9448 return x;
9450 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9452 x = legitimize_tls_address (x, NULL_RTX);
9454 if (addend)
9456 x = gen_rtx_PLUS (SImode, x, addend);
9457 orig_x = x;
9459 else
9460 return x;
9463 if (TARGET_THUMB1)
9464 return thumb_legitimize_address (x, orig_x, mode);
9466 if (GET_CODE (x) == PLUS)
9468 rtx xop0 = XEXP (x, 0);
9469 rtx xop1 = XEXP (x, 1);
9471 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9472 xop0 = force_reg (SImode, xop0);
9474 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9475 && !symbol_mentioned_p (xop1))
9476 xop1 = force_reg (SImode, xop1);
9478 if (ARM_BASE_REGISTER_RTX_P (xop0)
9479 && CONST_INT_P (xop1))
9481 HOST_WIDE_INT n, low_n;
9482 rtx base_reg, val;
9483 n = INTVAL (xop1);
9485 /* VFP addressing modes actually allow greater offsets, but for
9486 now we just stick with the lowest common denominator. */
9487 if (mode == DImode || mode == DFmode)
9489 low_n = n & 0x0f;
9490 n &= ~0x0f;
9491 if (low_n > 4)
9493 n += 16;
9494 low_n -= 16;
9497 else
9499 low_n = ((mode) == TImode ? 0
9500 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9501 n -= low_n;
9504 base_reg = gen_reg_rtx (SImode);
9505 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9506 emit_move_insn (base_reg, val);
9507 x = plus_constant (Pmode, base_reg, low_n);
9509 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9510 x = gen_rtx_PLUS (SImode, xop0, xop1);
9513 /* XXX We don't allow MINUS any more -- see comment in
9514 arm_legitimate_address_outer_p (). */
9515 else if (GET_CODE (x) == MINUS)
9517 rtx xop0 = XEXP (x, 0);
9518 rtx xop1 = XEXP (x, 1);
9520 if (CONSTANT_P (xop0))
9521 xop0 = force_reg (SImode, xop0);
9523 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9524 xop1 = force_reg (SImode, xop1);
9526 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9527 x = gen_rtx_MINUS (SImode, xop0, xop1);
9530 /* Make sure to take full advantage of the pre-indexed addressing mode
9531 with absolute addresses which often allows for the base register to
9532 be factorized for multiple adjacent memory references, and it might
9533 even allows for the mini pool to be avoided entirely. */
9534 else if (CONST_INT_P (x) && optimize > 0)
9536 unsigned int bits;
9537 HOST_WIDE_INT mask, base, index;
9538 rtx base_reg;
9540 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9541 only use a 8-bit index. So let's use a 12-bit index for
9542 SImode only and hope that arm_gen_constant will enable LDRB
9543 to use more bits. */
9544 bits = (mode == SImode) ? 12 : 8;
9545 mask = (1 << bits) - 1;
9546 base = INTVAL (x) & ~mask;
9547 index = INTVAL (x) & mask;
9548 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9550 /* It'll most probably be more efficient to generate the
9551 base with more bits set and use a negative index instead.
9552 Don't do this for Thumb as negative offsets are much more
9553 limited. */
9554 base |= mask;
9555 index -= mask;
9557 base_reg = force_reg (SImode, GEN_INT (base));
9558 x = plus_constant (Pmode, base_reg, index);
9561 if (flag_pic)
9563 /* We need to find and carefully transform any SYMBOL and LABEL
9564 references; so go back to the original address expression. */
9565 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9566 false /*compute_now*/);
9568 if (new_x != orig_x)
9569 x = new_x;
9572 return x;
9576 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9577 to be legitimate. If we find one, return the new, valid address. */
9579 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9581 if (GET_CODE (x) == PLUS
9582 && CONST_INT_P (XEXP (x, 1))
9583 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9584 || INTVAL (XEXP (x, 1)) < 0))
9586 rtx xop0 = XEXP (x, 0);
9587 rtx xop1 = XEXP (x, 1);
9588 HOST_WIDE_INT offset = INTVAL (xop1);
9590 /* Try and fold the offset into a biasing of the base register and
9591 then offsetting that. Don't do this when optimizing for space
9592 since it can cause too many CSEs. */
9593 if (optimize_size && offset >= 0
9594 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9596 HOST_WIDE_INT delta;
9598 if (offset >= 256)
9599 delta = offset - (256 - GET_MODE_SIZE (mode));
9600 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9601 delta = 31 * GET_MODE_SIZE (mode);
9602 else
9603 delta = offset & (~31 * GET_MODE_SIZE (mode));
9605 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9606 NULL_RTX);
9607 x = plus_constant (Pmode, xop0, delta);
9609 else if (offset < 0 && offset > -256)
9610 /* Small negative offsets are best done with a subtract before the
9611 dereference, forcing these into a register normally takes two
9612 instructions. */
9613 x = force_operand (x, NULL_RTX);
9614 else
9616 /* For the remaining cases, force the constant into a register. */
9617 xop1 = force_reg (SImode, xop1);
9618 x = gen_rtx_PLUS (SImode, xop0, xop1);
9621 else if (GET_CODE (x) == PLUS
9622 && s_register_operand (XEXP (x, 1), SImode)
9623 && !s_register_operand (XEXP (x, 0), SImode))
9625 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9627 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9630 if (flag_pic)
9632 /* We need to find and carefully transform any SYMBOL and LABEL
9633 references; so go back to the original address expression. */
9634 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9635 false /*compute_now*/);
9637 if (new_x != orig_x)
9638 x = new_x;
9641 return x;
9644 /* Return TRUE if X contains any TLS symbol references. */
9646 bool
9647 arm_tls_referenced_p (rtx x)
9649 if (! TARGET_HAVE_TLS)
9650 return false;
9652 subrtx_iterator::array_type array;
9653 FOR_EACH_SUBRTX (iter, array, x, ALL)
9655 const_rtx x = *iter;
9656 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9658 /* ARM currently does not provide relocations to encode TLS variables
9659 into AArch32 instructions, only data, so there is no way to
9660 currently implement these if a literal pool is disabled. */
9661 if (arm_disable_literal_pool)
9662 sorry ("accessing thread-local storage is not currently supported "
9663 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9665 return true;
9668 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9669 TLS offsets, not real symbol references. */
9670 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9671 iter.skip_subrtxes ();
9673 return false;
9676 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9678 On the ARM, allow any integer (invalid ones are removed later by insn
9679 patterns), nice doubles and symbol_refs which refer to the function's
9680 constant pool XXX.
9682 When generating pic allow anything. */
9684 static bool
9685 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9687 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9688 return false;
9690 return flag_pic || !label_mentioned_p (x);
9693 static bool
9694 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9696 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9697 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9698 for ARMv8-M Baseline or later the result is valid. */
9699 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9700 x = XEXP (x, 0);
9702 return (CONST_INT_P (x)
9703 || CONST_DOUBLE_P (x)
9704 || CONSTANT_ADDRESS_P (x)
9705 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9706 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9707 we build the symbol address with upper/lower
9708 relocations. */
9709 || (TARGET_THUMB1
9710 && !label_mentioned_p (x)
9711 && arm_valid_symbolic_address_p (x)
9712 && arm_disable_literal_pool)
9713 || flag_pic);
9716 static bool
9717 arm_legitimate_constant_p (machine_mode mode, rtx x)
9719 return (!arm_cannot_force_const_mem (mode, x)
9720 && (TARGET_32BIT
9721 ? arm_legitimate_constant_p_1 (mode, x)
9722 : thumb_legitimate_constant_p (mode, x)));
9725 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9727 static bool
9728 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9730 rtx base, offset;
9731 split_const (x, &base, &offset);
9733 if (SYMBOL_REF_P (base))
9735 /* Function symbols cannot have an offset due to the Thumb bit. */
9736 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9737 && INTVAL (offset) != 0)
9738 return true;
9740 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9741 && !offset_within_block_p (base, INTVAL (offset)))
9742 return true;
9744 return arm_tls_referenced_p (x);
9747 #define REG_OR_SUBREG_REG(X) \
9748 (REG_P (X) \
9749 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9751 #define REG_OR_SUBREG_RTX(X) \
9752 (REG_P (X) ? (X) : SUBREG_REG (X))
9754 static inline int
9755 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9757 machine_mode mode = GET_MODE (x);
9758 int total, words;
9760 switch (code)
9762 case ASHIFT:
9763 case ASHIFTRT:
9764 case LSHIFTRT:
9765 case ROTATERT:
9766 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9768 case PLUS:
9769 case MINUS:
9770 case COMPARE:
9771 case NEG:
9772 case NOT:
9773 return COSTS_N_INSNS (1);
9775 case MULT:
9776 if (arm_arch6m && arm_m_profile_small_mul)
9777 return COSTS_N_INSNS (32);
9779 if (CONST_INT_P (XEXP (x, 1)))
9781 int cycles = 0;
9782 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9784 while (i)
9786 i >>= 2;
9787 cycles++;
9789 return COSTS_N_INSNS (2) + cycles;
9791 return COSTS_N_INSNS (1) + 16;
9793 case SET:
9794 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9795 the mode. */
9796 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9797 return (COSTS_N_INSNS (words)
9798 + 4 * ((MEM_P (SET_SRC (x)))
9799 + MEM_P (SET_DEST (x))));
9801 case CONST_INT:
9802 if (outer == SET)
9804 if (UINTVAL (x) < 256
9805 /* 16-bit constant. */
9806 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9807 return 0;
9808 if (thumb_shiftable_const (INTVAL (x)))
9809 return COSTS_N_INSNS (2);
9810 return arm_disable_literal_pool
9811 ? COSTS_N_INSNS (8)
9812 : COSTS_N_INSNS (3);
9814 else if ((outer == PLUS || outer == COMPARE)
9815 && INTVAL (x) < 256 && INTVAL (x) > -256)
9816 return 0;
9817 else if ((outer == IOR || outer == XOR || outer == AND)
9818 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9819 return COSTS_N_INSNS (1);
9820 else if (outer == AND)
9822 int i;
9823 /* This duplicates the tests in the andsi3 expander. */
9824 for (i = 9; i <= 31; i++)
9825 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9826 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9827 return COSTS_N_INSNS (2);
9829 else if (outer == ASHIFT || outer == ASHIFTRT
9830 || outer == LSHIFTRT)
9831 return 0;
9832 return COSTS_N_INSNS (2);
9834 case CONST:
9835 case CONST_DOUBLE:
9836 case LABEL_REF:
9837 case SYMBOL_REF:
9838 return COSTS_N_INSNS (3);
9840 case UDIV:
9841 case UMOD:
9842 case DIV:
9843 case MOD:
9844 return 100;
9846 case TRUNCATE:
9847 return 99;
9849 case AND:
9850 case XOR:
9851 case IOR:
9852 /* XXX guess. */
9853 return 8;
9855 case MEM:
9856 /* XXX another guess. */
9857 /* Memory costs quite a lot for the first word, but subsequent words
9858 load at the equivalent of a single insn each. */
9859 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9860 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9861 ? 4 : 0));
9863 case IF_THEN_ELSE:
9864 /* XXX a guess. */
9865 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9866 return 14;
9867 return 2;
9869 case SIGN_EXTEND:
9870 case ZERO_EXTEND:
9871 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9872 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9874 if (mode == SImode)
9875 return total;
9877 if (arm_arch6)
9878 return total + COSTS_N_INSNS (1);
9880 /* Assume a two-shift sequence. Increase the cost slightly so
9881 we prefer actual shifts over an extend operation. */
9882 return total + 1 + COSTS_N_INSNS (2);
9884 default:
9885 return 99;
9889 /* Estimates the size cost of thumb1 instructions.
9890 For now most of the code is copied from thumb1_rtx_costs. We need more
9891 fine grain tuning when we have more related test cases. */
9892 static inline int
9893 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9895 machine_mode mode = GET_MODE (x);
9896 int words, cost;
9898 switch (code)
9900 case ASHIFT:
9901 case ASHIFTRT:
9902 case LSHIFTRT:
9903 case ROTATERT:
9904 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9906 case PLUS:
9907 case MINUS:
9908 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9909 defined by RTL expansion, especially for the expansion of
9910 multiplication. */
9911 if ((GET_CODE (XEXP (x, 0)) == MULT
9912 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9913 || (GET_CODE (XEXP (x, 1)) == MULT
9914 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9915 return COSTS_N_INSNS (2);
9916 /* Fall through. */
9917 case COMPARE:
9918 case NEG:
9919 case NOT:
9920 return COSTS_N_INSNS (1);
9922 case MULT:
9923 if (CONST_INT_P (XEXP (x, 1)))
9925 /* Thumb1 mul instruction can't operate on const. We must Load it
9926 into a register first. */
9927 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9928 /* For the targets which have a very small and high-latency multiply
9929 unit, we prefer to synthesize the mult with up to 5 instructions,
9930 giving a good balance between size and performance. */
9931 if (arm_arch6m && arm_m_profile_small_mul)
9932 return COSTS_N_INSNS (5);
9933 else
9934 return COSTS_N_INSNS (1) + const_size;
9936 return COSTS_N_INSNS (1);
9938 case SET:
9939 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9940 the mode. */
9941 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9942 cost = COSTS_N_INSNS (words);
9943 if (satisfies_constraint_J (SET_SRC (x))
9944 || satisfies_constraint_K (SET_SRC (x))
9945 /* Too big an immediate for a 2-byte mov, using MOVT. */
9946 || (CONST_INT_P (SET_SRC (x))
9947 && UINTVAL (SET_SRC (x)) >= 256
9948 && TARGET_HAVE_MOVT
9949 && satisfies_constraint_j (SET_SRC (x)))
9950 /* thumb1_movdi_insn. */
9951 || ((words > 1) && MEM_P (SET_SRC (x))))
9952 cost += COSTS_N_INSNS (1);
9953 return cost;
9955 case CONST_INT:
9956 if (outer == SET)
9958 if (UINTVAL (x) < 256)
9959 return COSTS_N_INSNS (1);
9960 /* movw is 4byte long. */
9961 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9962 return COSTS_N_INSNS (2);
9963 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9964 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9965 return COSTS_N_INSNS (2);
9966 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9967 if (thumb_shiftable_const (INTVAL (x)))
9968 return COSTS_N_INSNS (2);
9969 return arm_disable_literal_pool
9970 ? COSTS_N_INSNS (8)
9971 : COSTS_N_INSNS (3);
9973 else if ((outer == PLUS || outer == COMPARE)
9974 && INTVAL (x) < 256 && INTVAL (x) > -256)
9975 return 0;
9976 else if ((outer == IOR || outer == XOR || outer == AND)
9977 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9978 return COSTS_N_INSNS (1);
9979 else if (outer == AND)
9981 int i;
9982 /* This duplicates the tests in the andsi3 expander. */
9983 for (i = 9; i <= 31; i++)
9984 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9985 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9986 return COSTS_N_INSNS (2);
9988 else if (outer == ASHIFT || outer == ASHIFTRT
9989 || outer == LSHIFTRT)
9990 return 0;
9991 return COSTS_N_INSNS (2);
9993 case CONST:
9994 case CONST_DOUBLE:
9995 case LABEL_REF:
9996 case SYMBOL_REF:
9997 return COSTS_N_INSNS (3);
9999 case UDIV:
10000 case UMOD:
10001 case DIV:
10002 case MOD:
10003 return 100;
10005 case TRUNCATE:
10006 return 99;
10008 case AND:
10009 case XOR:
10010 case IOR:
10011 return COSTS_N_INSNS (1);
10013 case MEM:
10014 return (COSTS_N_INSNS (1)
10015 + COSTS_N_INSNS (1)
10016 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10017 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10018 ? COSTS_N_INSNS (1) : 0));
10020 case IF_THEN_ELSE:
10021 /* XXX a guess. */
10022 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10023 return 14;
10024 return 2;
10026 case ZERO_EXTEND:
10027 /* XXX still guessing. */
10028 switch (GET_MODE (XEXP (x, 0)))
10030 case E_QImode:
10031 return (1 + (mode == DImode ? 4 : 0)
10032 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10034 case E_HImode:
10035 return (4 + (mode == DImode ? 4 : 0)
10036 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10038 case E_SImode:
10039 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10041 default:
10042 return 99;
10045 default:
10046 return 99;
10050 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10051 PLUS, adds the carry flag, then return the other operand. If
10052 neither is a carry, return OP unchanged. */
10053 static rtx
10054 strip_carry_operation (rtx op)
10056 gcc_assert (GET_CODE (op) == PLUS);
10057 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10058 return XEXP (op, 1);
10059 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10060 return XEXP (op, 0);
10061 return op;
10064 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10065 operand, then return the operand that is being shifted. If the shift
10066 is not by a constant, then set SHIFT_REG to point to the operand.
10067 Return NULL if OP is not a shifter operand. */
10068 static rtx
10069 shifter_op_p (rtx op, rtx *shift_reg)
10071 enum rtx_code code = GET_CODE (op);
10073 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10074 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10075 return XEXP (op, 0);
10076 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10077 return XEXP (op, 0);
10078 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10079 || code == ASHIFTRT)
10081 if (!CONST_INT_P (XEXP (op, 1)))
10082 *shift_reg = XEXP (op, 1);
10083 return XEXP (op, 0);
10086 return NULL;
10089 static bool
10090 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10092 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10093 rtx_code code = GET_CODE (x);
10094 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10096 switch (XINT (x, 1))
10098 case UNSPEC_UNALIGNED_LOAD:
10099 /* We can only do unaligned loads into the integer unit, and we can't
10100 use LDM or LDRD. */
10101 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10102 if (speed_p)
10103 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10104 + extra_cost->ldst.load_unaligned);
10106 #ifdef NOT_YET
10107 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10108 ADDR_SPACE_GENERIC, speed_p);
10109 #endif
10110 return true;
10112 case UNSPEC_UNALIGNED_STORE:
10113 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10114 if (speed_p)
10115 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10116 + extra_cost->ldst.store_unaligned);
10118 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10119 #ifdef NOT_YET
10120 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10121 ADDR_SPACE_GENERIC, speed_p);
10122 #endif
10123 return true;
10125 case UNSPEC_VRINTZ:
10126 case UNSPEC_VRINTP:
10127 case UNSPEC_VRINTM:
10128 case UNSPEC_VRINTR:
10129 case UNSPEC_VRINTX:
10130 case UNSPEC_VRINTA:
10131 if (speed_p)
10132 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10134 return true;
10135 default:
10136 *cost = COSTS_N_INSNS (2);
10137 break;
10139 return true;
10142 /* Cost of a libcall. We assume one insn per argument, an amount for the
10143 call (one insn for -Os) and then one for processing the result. */
10144 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10146 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10147 do \
10149 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10150 if (shift_op != NULL \
10151 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10153 if (shift_reg) \
10155 if (speed_p) \
10156 *cost += extra_cost->alu.arith_shift_reg; \
10157 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10158 ASHIFT, 1, speed_p); \
10160 else if (speed_p) \
10161 *cost += extra_cost->alu.arith_shift; \
10163 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10164 ASHIFT, 0, speed_p) \
10165 + rtx_cost (XEXP (x, 1 - IDX), \
10166 GET_MODE (shift_op), \
10167 OP, 1, speed_p)); \
10168 return true; \
10171 while (0)
10173 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10174 considering the costs of the addressing mode and memory access
10175 separately. */
10176 static bool
10177 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10178 int *cost, bool speed_p)
10180 machine_mode mode = GET_MODE (x);
10182 *cost = COSTS_N_INSNS (1);
10184 if (flag_pic
10185 && GET_CODE (XEXP (x, 0)) == PLUS
10186 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10187 /* This will be split into two instructions. Add the cost of the
10188 additional instruction here. The cost of the memory access is computed
10189 below. See arm.md:calculate_pic_address. */
10190 *cost += COSTS_N_INSNS (1);
10192 /* Calculate cost of the addressing mode. */
10193 if (speed_p)
10195 arm_addr_mode_op op_type;
10196 switch (GET_CODE (XEXP (x, 0)))
10198 default:
10199 case REG:
10200 op_type = AMO_DEFAULT;
10201 break;
10202 case MINUS:
10203 /* MINUS does not appear in RTL, but the architecture supports it,
10204 so handle this case defensively. */
10205 /* fall through */
10206 case PLUS:
10207 op_type = AMO_NO_WB;
10208 break;
10209 case PRE_INC:
10210 case PRE_DEC:
10211 case POST_INC:
10212 case POST_DEC:
10213 case PRE_MODIFY:
10214 case POST_MODIFY:
10215 op_type = AMO_WB;
10216 break;
10219 if (VECTOR_MODE_P (mode))
10220 *cost += current_tune->addr_mode_costs->vector[op_type];
10221 else if (FLOAT_MODE_P (mode))
10222 *cost += current_tune->addr_mode_costs->fp[op_type];
10223 else
10224 *cost += current_tune->addr_mode_costs->integer[op_type];
10227 /* Calculate cost of memory access. */
10228 if (speed_p)
10230 if (FLOAT_MODE_P (mode))
10232 if (GET_MODE_SIZE (mode) == 8)
10233 *cost += extra_cost->ldst.loadd;
10234 else
10235 *cost += extra_cost->ldst.loadf;
10237 else if (VECTOR_MODE_P (mode))
10238 *cost += extra_cost->ldst.loadv;
10239 else
10241 /* Integer modes */
10242 if (GET_MODE_SIZE (mode) == 8)
10243 *cost += extra_cost->ldst.ldrd;
10244 else
10245 *cost += extra_cost->ldst.load;
10249 return true;
10252 /* Helper for arm_bfi_p. */
10253 static bool
10254 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10256 unsigned HOST_WIDE_INT const1;
10257 unsigned HOST_WIDE_INT const2 = 0;
10259 if (!CONST_INT_P (XEXP (op0, 1)))
10260 return false;
10262 const1 = UINTVAL (XEXP (op0, 1));
10263 if (!CONST_INT_P (XEXP (op1, 1))
10264 || ~UINTVAL (XEXP (op1, 1)) != const1)
10265 return false;
10267 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10268 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10270 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10271 *sub0 = XEXP (XEXP (op0, 0), 0);
10273 else
10274 *sub0 = XEXP (op0, 0);
10276 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10277 return false;
10279 *sub1 = XEXP (op1, 0);
10280 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10283 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10284 format looks something like:
10286 (IOR (AND (reg1) (~const1))
10287 (AND (ASHIFT (reg2) (const2))
10288 (const1)))
10290 where const1 is a consecutive sequence of 1-bits with the
10291 least-significant non-zero bit starting at bit position const2. If
10292 const2 is zero, then the shift will not appear at all, due to
10293 canonicalization. The two arms of the IOR expression may be
10294 flipped. */
10295 static bool
10296 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10298 if (GET_CODE (x) != IOR)
10299 return false;
10300 if (GET_CODE (XEXP (x, 0)) != AND
10301 || GET_CODE (XEXP (x, 1)) != AND)
10302 return false;
10303 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10304 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10307 /* RTX costs. Make an estimate of the cost of executing the operation
10308 X, which is contained within an operation with code OUTER_CODE.
10309 SPEED_P indicates whether the cost desired is the performance cost,
10310 or the size cost. The estimate is stored in COST and the return
10311 value is TRUE if the cost calculation is final, or FALSE if the
10312 caller should recurse through the operands of X to add additional
10313 costs.
10315 We currently make no attempt to model the size savings of Thumb-2
10316 16-bit instructions. At the normal points in compilation where
10317 this code is called we have no measure of whether the condition
10318 flags are live or not, and thus no realistic way to determine what
10319 the size will eventually be. */
10320 static bool
10321 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10322 const struct cpu_cost_table *extra_cost,
10323 int *cost, bool speed_p)
10325 machine_mode mode = GET_MODE (x);
10327 *cost = COSTS_N_INSNS (1);
10329 if (TARGET_THUMB1)
10331 if (speed_p)
10332 *cost = thumb1_rtx_costs (x, code, outer_code);
10333 else
10334 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10335 return true;
10338 switch (code)
10340 case SET:
10341 *cost = 0;
10342 /* SET RTXs don't have a mode so we get it from the destination. */
10343 mode = GET_MODE (SET_DEST (x));
10345 if (REG_P (SET_SRC (x))
10346 && REG_P (SET_DEST (x)))
10348 /* Assume that most copies can be done with a single insn,
10349 unless we don't have HW FP, in which case everything
10350 larger than word mode will require two insns. */
10351 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10352 && GET_MODE_SIZE (mode) > 4)
10353 || mode == DImode)
10354 ? 2 : 1);
10355 /* Conditional register moves can be encoded
10356 in 16 bits in Thumb mode. */
10357 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10358 *cost >>= 1;
10360 return true;
10363 if (CONST_INT_P (SET_SRC (x)))
10365 /* Handle CONST_INT here, since the value doesn't have a mode
10366 and we would otherwise be unable to work out the true cost. */
10367 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10368 0, speed_p);
10369 outer_code = SET;
10370 /* Slightly lower the cost of setting a core reg to a constant.
10371 This helps break up chains and allows for better scheduling. */
10372 if (REG_P (SET_DEST (x))
10373 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10374 *cost -= 1;
10375 x = SET_SRC (x);
10376 /* Immediate moves with an immediate in the range [0, 255] can be
10377 encoded in 16 bits in Thumb mode. */
10378 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10379 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10380 *cost >>= 1;
10381 goto const_int_cost;
10384 return false;
10386 case MEM:
10387 return arm_mem_costs (x, extra_cost, cost, speed_p);
10389 case PARALLEL:
10391 /* Calculations of LDM costs are complex. We assume an initial cost
10392 (ldm_1st) which will load the number of registers mentioned in
10393 ldm_regs_per_insn_1st registers; then each additional
10394 ldm_regs_per_insn_subsequent registers cost one more insn. The
10395 formula for N regs is thus:
10397 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10398 + ldm_regs_per_insn_subsequent - 1)
10399 / ldm_regs_per_insn_subsequent).
10401 Additional costs may also be added for addressing. A similar
10402 formula is used for STM. */
10404 bool is_ldm = load_multiple_operation (x, SImode);
10405 bool is_stm = store_multiple_operation (x, SImode);
10407 if (is_ldm || is_stm)
10409 if (speed_p)
10411 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10412 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10413 ? extra_cost->ldst.ldm_regs_per_insn_1st
10414 : extra_cost->ldst.stm_regs_per_insn_1st;
10415 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10416 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10417 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10419 *cost += regs_per_insn_1st
10420 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10421 + regs_per_insn_sub - 1)
10422 / regs_per_insn_sub);
10423 return true;
10427 return false;
10429 case DIV:
10430 case UDIV:
10431 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10432 && (mode == SFmode || !TARGET_VFP_SINGLE))
10433 *cost += COSTS_N_INSNS (speed_p
10434 ? extra_cost->fp[mode != SFmode].div : 0);
10435 else if (mode == SImode && TARGET_IDIV)
10436 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10437 else
10438 *cost = LIBCALL_COST (2);
10440 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10441 possible udiv is prefered. */
10442 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10443 return false; /* All arguments must be in registers. */
10445 case MOD:
10446 /* MOD by a power of 2 can be expanded as:
10447 rsbs r1, r0, #0
10448 and r0, r0, #(n - 1)
10449 and r1, r1, #(n - 1)
10450 rsbpl r0, r1, #0. */
10451 if (CONST_INT_P (XEXP (x, 1))
10452 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10453 && mode == SImode)
10455 *cost += COSTS_N_INSNS (3);
10457 if (speed_p)
10458 *cost += 2 * extra_cost->alu.logical
10459 + extra_cost->alu.arith;
10460 return true;
10463 /* Fall-through. */
10464 case UMOD:
10465 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10466 possible udiv is prefered. */
10467 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10468 return false; /* All arguments must be in registers. */
10470 case ROTATE:
10471 if (mode == SImode && REG_P (XEXP (x, 1)))
10473 *cost += (COSTS_N_INSNS (1)
10474 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10475 if (speed_p)
10476 *cost += extra_cost->alu.shift_reg;
10477 return true;
10479 /* Fall through */
10480 case ROTATERT:
10481 case ASHIFT:
10482 case LSHIFTRT:
10483 case ASHIFTRT:
10484 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10486 *cost += (COSTS_N_INSNS (2)
10487 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10488 if (speed_p)
10489 *cost += 2 * extra_cost->alu.shift;
10490 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10491 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10492 *cost += 1;
10493 return true;
10495 else if (mode == SImode)
10497 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10498 /* Slightly disparage register shifts at -Os, but not by much. */
10499 if (!CONST_INT_P (XEXP (x, 1)))
10500 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10501 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10502 return true;
10504 else if (GET_MODE_CLASS (mode) == MODE_INT
10505 && GET_MODE_SIZE (mode) < 4)
10507 if (code == ASHIFT)
10509 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10510 /* Slightly disparage register shifts at -Os, but not by
10511 much. */
10512 if (!CONST_INT_P (XEXP (x, 1)))
10513 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10514 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10516 else if (code == LSHIFTRT || code == ASHIFTRT)
10518 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10520 /* Can use SBFX/UBFX. */
10521 if (speed_p)
10522 *cost += extra_cost->alu.bfx;
10523 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10525 else
10527 *cost += COSTS_N_INSNS (1);
10528 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10529 if (speed_p)
10531 if (CONST_INT_P (XEXP (x, 1)))
10532 *cost += 2 * extra_cost->alu.shift;
10533 else
10534 *cost += (extra_cost->alu.shift
10535 + extra_cost->alu.shift_reg);
10537 else
10538 /* Slightly disparage register shifts. */
10539 *cost += !CONST_INT_P (XEXP (x, 1));
10542 else /* Rotates. */
10544 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10545 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10546 if (speed_p)
10548 if (CONST_INT_P (XEXP (x, 1)))
10549 *cost += (2 * extra_cost->alu.shift
10550 + extra_cost->alu.log_shift);
10551 else
10552 *cost += (extra_cost->alu.shift
10553 + extra_cost->alu.shift_reg
10554 + extra_cost->alu.log_shift_reg);
10557 return true;
10560 *cost = LIBCALL_COST (2);
10561 return false;
10563 case BSWAP:
10564 if (arm_arch6)
10566 if (mode == SImode)
10568 if (speed_p)
10569 *cost += extra_cost->alu.rev;
10571 return false;
10574 else
10576 /* No rev instruction available. Look at arm_legacy_rev
10577 and thumb_legacy_rev for the form of RTL used then. */
10578 if (TARGET_THUMB)
10580 *cost += COSTS_N_INSNS (9);
10582 if (speed_p)
10584 *cost += 6 * extra_cost->alu.shift;
10585 *cost += 3 * extra_cost->alu.logical;
10588 else
10590 *cost += COSTS_N_INSNS (4);
10592 if (speed_p)
10594 *cost += 2 * extra_cost->alu.shift;
10595 *cost += extra_cost->alu.arith_shift;
10596 *cost += 2 * extra_cost->alu.logical;
10599 return true;
10601 return false;
10603 case MINUS:
10604 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10605 && (mode == SFmode || !TARGET_VFP_SINGLE))
10607 if (GET_CODE (XEXP (x, 0)) == MULT
10608 || GET_CODE (XEXP (x, 1)) == MULT)
10610 rtx mul_op0, mul_op1, sub_op;
10612 if (speed_p)
10613 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10615 if (GET_CODE (XEXP (x, 0)) == MULT)
10617 mul_op0 = XEXP (XEXP (x, 0), 0);
10618 mul_op1 = XEXP (XEXP (x, 0), 1);
10619 sub_op = XEXP (x, 1);
10621 else
10623 mul_op0 = XEXP (XEXP (x, 1), 0);
10624 mul_op1 = XEXP (XEXP (x, 1), 1);
10625 sub_op = XEXP (x, 0);
10628 /* The first operand of the multiply may be optionally
10629 negated. */
10630 if (GET_CODE (mul_op0) == NEG)
10631 mul_op0 = XEXP (mul_op0, 0);
10633 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10634 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10635 + rtx_cost (sub_op, mode, code, 0, speed_p));
10637 return true;
10640 if (speed_p)
10641 *cost += extra_cost->fp[mode != SFmode].addsub;
10642 return false;
10645 if (mode == SImode)
10647 rtx shift_by_reg = NULL;
10648 rtx shift_op;
10649 rtx non_shift_op;
10650 rtx op0 = XEXP (x, 0);
10651 rtx op1 = XEXP (x, 1);
10653 /* Factor out any borrow operation. There's more than one way
10654 of expressing this; try to recognize them all. */
10655 if (GET_CODE (op0) == MINUS)
10657 if (arm_borrow_operation (op1, SImode))
10659 op1 = XEXP (op0, 1);
10660 op0 = XEXP (op0, 0);
10662 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10663 op0 = XEXP (op0, 0);
10665 else if (GET_CODE (op1) == PLUS
10666 && arm_borrow_operation (XEXP (op1, 0), SImode))
10667 op1 = XEXP (op1, 0);
10668 else if (GET_CODE (op0) == NEG
10669 && arm_borrow_operation (op1, SImode))
10671 /* Negate with carry-in. For Thumb2 this is done with
10672 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10673 RSC instruction that exists in Arm mode. */
10674 if (speed_p)
10675 *cost += (TARGET_THUMB2
10676 ? extra_cost->alu.arith_shift
10677 : extra_cost->alu.arith);
10678 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10679 return true;
10681 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10682 Note we do mean ~borrow here. */
10683 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10685 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10686 return true;
10689 shift_op = shifter_op_p (op0, &shift_by_reg);
10690 if (shift_op == NULL)
10692 shift_op = shifter_op_p (op1, &shift_by_reg);
10693 non_shift_op = op0;
10695 else
10696 non_shift_op = op1;
10698 if (shift_op != NULL)
10700 if (shift_by_reg != NULL)
10702 if (speed_p)
10703 *cost += extra_cost->alu.arith_shift_reg;
10704 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10706 else if (speed_p)
10707 *cost += extra_cost->alu.arith_shift;
10709 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10710 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10711 return true;
10714 if (arm_arch_thumb2
10715 && GET_CODE (XEXP (x, 1)) == MULT)
10717 /* MLS. */
10718 if (speed_p)
10719 *cost += extra_cost->mult[0].add;
10720 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10721 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10722 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10723 return true;
10726 if (CONST_INT_P (op0))
10728 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10729 INTVAL (op0), NULL_RTX,
10730 NULL_RTX, 1, 0);
10731 *cost = COSTS_N_INSNS (insns);
10732 if (speed_p)
10733 *cost += insns * extra_cost->alu.arith;
10734 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10735 return true;
10737 else if (speed_p)
10738 *cost += extra_cost->alu.arith;
10740 /* Don't recurse as we don't want to cost any borrow that
10741 we've stripped. */
10742 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10743 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10744 return true;
10747 if (GET_MODE_CLASS (mode) == MODE_INT
10748 && GET_MODE_SIZE (mode) < 4)
10750 rtx shift_op, shift_reg;
10751 shift_reg = NULL;
10753 /* We check both sides of the MINUS for shifter operands since,
10754 unlike PLUS, it's not commutative. */
10756 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10757 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10759 /* Slightly disparage, as we might need to widen the result. */
10760 *cost += 1;
10761 if (speed_p)
10762 *cost += extra_cost->alu.arith;
10764 if (CONST_INT_P (XEXP (x, 0)))
10766 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10767 return true;
10770 return false;
10773 if (mode == DImode)
10775 *cost += COSTS_N_INSNS (1);
10777 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10779 rtx op1 = XEXP (x, 1);
10781 if (speed_p)
10782 *cost += 2 * extra_cost->alu.arith;
10784 if (GET_CODE (op1) == ZERO_EXTEND)
10785 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10786 0, speed_p);
10787 else
10788 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10789 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10790 0, speed_p);
10791 return true;
10793 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10795 if (speed_p)
10796 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10797 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10798 0, speed_p)
10799 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10800 return true;
10802 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10803 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10805 if (speed_p)
10806 *cost += (extra_cost->alu.arith
10807 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10808 ? extra_cost->alu.arith
10809 : extra_cost->alu.arith_shift));
10810 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10811 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10812 GET_CODE (XEXP (x, 1)), 0, speed_p));
10813 return true;
10816 if (speed_p)
10817 *cost += 2 * extra_cost->alu.arith;
10818 return false;
10821 /* Vector mode? */
10823 *cost = LIBCALL_COST (2);
10824 return false;
10826 case PLUS:
10827 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10828 && (mode == SFmode || !TARGET_VFP_SINGLE))
10830 if (GET_CODE (XEXP (x, 0)) == MULT)
10832 rtx mul_op0, mul_op1, add_op;
10834 if (speed_p)
10835 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10837 mul_op0 = XEXP (XEXP (x, 0), 0);
10838 mul_op1 = XEXP (XEXP (x, 0), 1);
10839 add_op = XEXP (x, 1);
10841 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10842 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10843 + rtx_cost (add_op, mode, code, 0, speed_p));
10845 return true;
10848 if (speed_p)
10849 *cost += extra_cost->fp[mode != SFmode].addsub;
10850 return false;
10852 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10854 *cost = LIBCALL_COST (2);
10855 return false;
10858 /* Narrow modes can be synthesized in SImode, but the range
10859 of useful sub-operations is limited. Check for shift operations
10860 on one of the operands. Only left shifts can be used in the
10861 narrow modes. */
10862 if (GET_MODE_CLASS (mode) == MODE_INT
10863 && GET_MODE_SIZE (mode) < 4)
10865 rtx shift_op, shift_reg;
10866 shift_reg = NULL;
10868 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10870 if (CONST_INT_P (XEXP (x, 1)))
10872 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10873 INTVAL (XEXP (x, 1)), NULL_RTX,
10874 NULL_RTX, 1, 0);
10875 *cost = COSTS_N_INSNS (insns);
10876 if (speed_p)
10877 *cost += insns * extra_cost->alu.arith;
10878 /* Slightly penalize a narrow operation as the result may
10879 need widening. */
10880 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10881 return true;
10884 /* Slightly penalize a narrow operation as the result may
10885 need widening. */
10886 *cost += 1;
10887 if (speed_p)
10888 *cost += extra_cost->alu.arith;
10890 return false;
10893 if (mode == SImode)
10895 rtx shift_op, shift_reg;
10897 if (TARGET_INT_SIMD
10898 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10899 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10901 /* UXTA[BH] or SXTA[BH]. */
10902 if (speed_p)
10903 *cost += extra_cost->alu.extend_arith;
10904 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10905 0, speed_p)
10906 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10907 return true;
10910 rtx op0 = XEXP (x, 0);
10911 rtx op1 = XEXP (x, 1);
10913 /* Handle a side effect of adding in the carry to an addition. */
10914 if (GET_CODE (op0) == PLUS
10915 && arm_carry_operation (op1, mode))
10917 op1 = XEXP (op0, 1);
10918 op0 = XEXP (op0, 0);
10920 else if (GET_CODE (op1) == PLUS
10921 && arm_carry_operation (op0, mode))
10923 op0 = XEXP (op1, 0);
10924 op1 = XEXP (op1, 1);
10926 else if (GET_CODE (op0) == PLUS)
10928 op0 = strip_carry_operation (op0);
10929 if (swap_commutative_operands_p (op0, op1))
10930 std::swap (op0, op1);
10933 if (arm_carry_operation (op0, mode))
10935 /* Adding the carry to a register is a canonicalization of
10936 adding 0 to the register plus the carry. */
10937 if (speed_p)
10938 *cost += extra_cost->alu.arith;
10939 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10940 return true;
10943 shift_reg = NULL;
10944 shift_op = shifter_op_p (op0, &shift_reg);
10945 if (shift_op != NULL)
10947 if (shift_reg)
10949 if (speed_p)
10950 *cost += extra_cost->alu.arith_shift_reg;
10951 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10953 else if (speed_p)
10954 *cost += extra_cost->alu.arith_shift;
10956 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10957 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10958 return true;
10961 if (GET_CODE (op0) == MULT)
10963 rtx mul_op = op0;
10965 if (TARGET_DSP_MULTIPLY
10966 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10967 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10968 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10969 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10970 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10971 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10972 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10973 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10974 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10975 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10976 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10977 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10978 == 16))))))
10980 /* SMLA[BT][BT]. */
10981 if (speed_p)
10982 *cost += extra_cost->mult[0].extend_add;
10983 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10984 SIGN_EXTEND, 0, speed_p)
10985 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10986 SIGN_EXTEND, 0, speed_p)
10987 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10988 return true;
10991 if (speed_p)
10992 *cost += extra_cost->mult[0].add;
10993 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10994 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10995 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10996 return true;
10999 if (CONST_INT_P (op1))
11001 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11002 INTVAL (op1), NULL_RTX,
11003 NULL_RTX, 1, 0);
11004 *cost = COSTS_N_INSNS (insns);
11005 if (speed_p)
11006 *cost += insns * extra_cost->alu.arith;
11007 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11008 return true;
11011 if (speed_p)
11012 *cost += extra_cost->alu.arith;
11014 /* Don't recurse here because we want to test the operands
11015 without any carry operation. */
11016 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11017 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11018 return true;
11021 if (mode == DImode)
11023 if (GET_CODE (XEXP (x, 0)) == MULT
11024 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11025 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11026 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11027 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11029 if (speed_p)
11030 *cost += extra_cost->mult[1].extend_add;
11031 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11032 ZERO_EXTEND, 0, speed_p)
11033 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11034 ZERO_EXTEND, 0, speed_p)
11035 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11036 return true;
11039 *cost += COSTS_N_INSNS (1);
11041 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11042 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11044 if (speed_p)
11045 *cost += (extra_cost->alu.arith
11046 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11047 ? extra_cost->alu.arith
11048 : extra_cost->alu.arith_shift));
11050 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11051 0, speed_p)
11052 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11053 return true;
11056 if (speed_p)
11057 *cost += 2 * extra_cost->alu.arith;
11058 return false;
11061 /* Vector mode? */
11062 *cost = LIBCALL_COST (2);
11063 return false;
11064 case IOR:
11066 rtx sub0, sub1;
11067 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11069 if (speed_p)
11070 *cost += extra_cost->alu.rev;
11072 return true;
11074 else if (mode == SImode && arm_arch_thumb2
11075 && arm_bfi_p (x, &sub0, &sub1))
11077 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11078 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11079 if (speed_p)
11080 *cost += extra_cost->alu.bfi;
11082 return true;
11086 /* Fall through. */
11087 case AND: case XOR:
11088 if (mode == SImode)
11090 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11091 rtx op0 = XEXP (x, 0);
11092 rtx shift_op, shift_reg;
11094 if (subcode == NOT
11095 && (code == AND
11096 || (code == IOR && TARGET_THUMB2)))
11097 op0 = XEXP (op0, 0);
11099 shift_reg = NULL;
11100 shift_op = shifter_op_p (op0, &shift_reg);
11101 if (shift_op != NULL)
11103 if (shift_reg)
11105 if (speed_p)
11106 *cost += extra_cost->alu.log_shift_reg;
11107 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11109 else if (speed_p)
11110 *cost += extra_cost->alu.log_shift;
11112 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11113 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11114 return true;
11117 if (CONST_INT_P (XEXP (x, 1)))
11119 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11120 INTVAL (XEXP (x, 1)), NULL_RTX,
11121 NULL_RTX, 1, 0);
11123 *cost = COSTS_N_INSNS (insns);
11124 if (speed_p)
11125 *cost += insns * extra_cost->alu.logical;
11126 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11127 return true;
11130 if (speed_p)
11131 *cost += extra_cost->alu.logical;
11132 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11133 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11134 return true;
11137 if (mode == DImode)
11139 rtx op0 = XEXP (x, 0);
11140 enum rtx_code subcode = GET_CODE (op0);
11142 *cost += COSTS_N_INSNS (1);
11144 if (subcode == NOT
11145 && (code == AND
11146 || (code == IOR && TARGET_THUMB2)))
11147 op0 = XEXP (op0, 0);
11149 if (GET_CODE (op0) == ZERO_EXTEND)
11151 if (speed_p)
11152 *cost += 2 * extra_cost->alu.logical;
11154 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11155 0, speed_p)
11156 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11157 return true;
11159 else if (GET_CODE (op0) == SIGN_EXTEND)
11161 if (speed_p)
11162 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11164 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11165 0, speed_p)
11166 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11167 return true;
11170 if (speed_p)
11171 *cost += 2 * extra_cost->alu.logical;
11173 return true;
11175 /* Vector mode? */
11177 *cost = LIBCALL_COST (2);
11178 return false;
11180 case MULT:
11181 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11182 && (mode == SFmode || !TARGET_VFP_SINGLE))
11184 rtx op0 = XEXP (x, 0);
11186 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11187 op0 = XEXP (op0, 0);
11189 if (speed_p)
11190 *cost += extra_cost->fp[mode != SFmode].mult;
11192 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11193 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11194 return true;
11196 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11198 *cost = LIBCALL_COST (2);
11199 return false;
11202 if (mode == SImode)
11204 if (TARGET_DSP_MULTIPLY
11205 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11206 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11207 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11208 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11209 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11210 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11211 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11212 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11213 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11214 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11215 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11216 && (INTVAL (XEXP (XEXP (x, 1), 1))
11217 == 16))))))
11219 /* SMUL[TB][TB]. */
11220 if (speed_p)
11221 *cost += extra_cost->mult[0].extend;
11222 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11223 SIGN_EXTEND, 0, speed_p);
11224 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11225 SIGN_EXTEND, 1, speed_p);
11226 return true;
11228 if (speed_p)
11229 *cost += extra_cost->mult[0].simple;
11230 return false;
11233 if (mode == DImode)
11235 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11236 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11237 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11238 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11240 if (speed_p)
11241 *cost += extra_cost->mult[1].extend;
11242 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11243 ZERO_EXTEND, 0, speed_p)
11244 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11245 ZERO_EXTEND, 0, speed_p));
11246 return true;
11249 *cost = LIBCALL_COST (2);
11250 return false;
11253 /* Vector mode? */
11254 *cost = LIBCALL_COST (2);
11255 return false;
11257 case NEG:
11258 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11259 && (mode == SFmode || !TARGET_VFP_SINGLE))
11261 if (GET_CODE (XEXP (x, 0)) == MULT)
11263 /* VNMUL. */
11264 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11265 return true;
11268 if (speed_p)
11269 *cost += extra_cost->fp[mode != SFmode].neg;
11271 return false;
11273 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11275 *cost = LIBCALL_COST (1);
11276 return false;
11279 if (mode == SImode)
11281 if (GET_CODE (XEXP (x, 0)) == ABS)
11283 *cost += COSTS_N_INSNS (1);
11284 /* Assume the non-flag-changing variant. */
11285 if (speed_p)
11286 *cost += (extra_cost->alu.log_shift
11287 + extra_cost->alu.arith_shift);
11288 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11289 return true;
11292 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11293 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11295 *cost += COSTS_N_INSNS (1);
11296 /* No extra cost for MOV imm and MVN imm. */
11297 /* If the comparison op is using the flags, there's no further
11298 cost, otherwise we need to add the cost of the comparison. */
11299 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11300 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11301 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11303 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11304 *cost += (COSTS_N_INSNS (1)
11305 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11306 0, speed_p)
11307 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11308 1, speed_p));
11309 if (speed_p)
11310 *cost += extra_cost->alu.arith;
11312 return true;
11315 if (speed_p)
11316 *cost += extra_cost->alu.arith;
11317 return false;
11320 if (GET_MODE_CLASS (mode) == MODE_INT
11321 && GET_MODE_SIZE (mode) < 4)
11323 /* Slightly disparage, as we might need an extend operation. */
11324 *cost += 1;
11325 if (speed_p)
11326 *cost += extra_cost->alu.arith;
11327 return false;
11330 if (mode == DImode)
11332 *cost += COSTS_N_INSNS (1);
11333 if (speed_p)
11334 *cost += 2 * extra_cost->alu.arith;
11335 return false;
11338 /* Vector mode? */
11339 *cost = LIBCALL_COST (1);
11340 return false;
11342 case NOT:
11343 if (mode == SImode)
11345 rtx shift_op;
11346 rtx shift_reg = NULL;
11348 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11350 if (shift_op)
11352 if (shift_reg != NULL)
11354 if (speed_p)
11355 *cost += extra_cost->alu.log_shift_reg;
11356 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11358 else if (speed_p)
11359 *cost += extra_cost->alu.log_shift;
11360 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11361 return true;
11364 if (speed_p)
11365 *cost += extra_cost->alu.logical;
11366 return false;
11368 if (mode == DImode)
11370 *cost += COSTS_N_INSNS (1);
11371 return false;
11374 /* Vector mode? */
11376 *cost += LIBCALL_COST (1);
11377 return false;
11379 case IF_THEN_ELSE:
11381 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11383 *cost += COSTS_N_INSNS (3);
11384 return true;
11386 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11387 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11389 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11390 /* Assume that if one arm of the if_then_else is a register,
11391 that it will be tied with the result and eliminate the
11392 conditional insn. */
11393 if (REG_P (XEXP (x, 1)))
11394 *cost += op2cost;
11395 else if (REG_P (XEXP (x, 2)))
11396 *cost += op1cost;
11397 else
11399 if (speed_p)
11401 if (extra_cost->alu.non_exec_costs_exec)
11402 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11403 else
11404 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11406 else
11407 *cost += op1cost + op2cost;
11410 return true;
11412 case COMPARE:
11413 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11414 *cost = 0;
11415 else
11417 machine_mode op0mode;
11418 /* We'll mostly assume that the cost of a compare is the cost of the
11419 LHS. However, there are some notable exceptions. */
11421 /* Floating point compares are never done as side-effects. */
11422 op0mode = GET_MODE (XEXP (x, 0));
11423 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11424 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11426 if (speed_p)
11427 *cost += extra_cost->fp[op0mode != SFmode].compare;
11429 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11431 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11432 return true;
11435 return false;
11437 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11439 *cost = LIBCALL_COST (2);
11440 return false;
11443 /* DImode compares normally take two insns. */
11444 if (op0mode == DImode)
11446 *cost += COSTS_N_INSNS (1);
11447 if (speed_p)
11448 *cost += 2 * extra_cost->alu.arith;
11449 return false;
11452 if (op0mode == SImode)
11454 rtx shift_op;
11455 rtx shift_reg;
11457 if (XEXP (x, 1) == const0_rtx
11458 && !(REG_P (XEXP (x, 0))
11459 || (GET_CODE (XEXP (x, 0)) == SUBREG
11460 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11462 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11464 /* Multiply operations that set the flags are often
11465 significantly more expensive. */
11466 if (speed_p
11467 && GET_CODE (XEXP (x, 0)) == MULT
11468 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11469 *cost += extra_cost->mult[0].flag_setting;
11471 if (speed_p
11472 && GET_CODE (XEXP (x, 0)) == PLUS
11473 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11474 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11475 0), 1), mode))
11476 *cost += extra_cost->mult[0].flag_setting;
11477 return true;
11480 shift_reg = NULL;
11481 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11482 if (shift_op != NULL)
11484 if (shift_reg != NULL)
11486 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11487 1, speed_p);
11488 if (speed_p)
11489 *cost += extra_cost->alu.arith_shift_reg;
11491 else if (speed_p)
11492 *cost += extra_cost->alu.arith_shift;
11493 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11494 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11495 return true;
11498 if (speed_p)
11499 *cost += extra_cost->alu.arith;
11500 if (CONST_INT_P (XEXP (x, 1))
11501 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11503 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11504 return true;
11506 return false;
11509 /* Vector mode? */
11511 *cost = LIBCALL_COST (2);
11512 return false;
11514 return true;
11516 case EQ:
11517 case GE:
11518 case GT:
11519 case LE:
11520 case LT:
11521 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11522 vcle and vclt). */
11523 if (TARGET_NEON
11524 && TARGET_HARD_FLOAT
11525 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11526 && (XEXP (x, 1) == CONST0_RTX (mode)))
11528 *cost = 0;
11529 return true;
11532 /* Fall through. */
11533 case NE:
11534 case LTU:
11535 case LEU:
11536 case GEU:
11537 case GTU:
11538 case ORDERED:
11539 case UNORDERED:
11540 case UNEQ:
11541 case UNLE:
11542 case UNLT:
11543 case UNGE:
11544 case UNGT:
11545 case LTGT:
11546 if (outer_code == SET)
11548 /* Is it a store-flag operation? */
11549 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11550 && XEXP (x, 1) == const0_rtx)
11552 /* Thumb also needs an IT insn. */
11553 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11554 return true;
11556 if (XEXP (x, 1) == const0_rtx)
11558 switch (code)
11560 case LT:
11561 /* LSR Rd, Rn, #31. */
11562 if (speed_p)
11563 *cost += extra_cost->alu.shift;
11564 break;
11566 case EQ:
11567 /* RSBS T1, Rn, #0
11568 ADC Rd, Rn, T1. */
11570 case NE:
11571 /* SUBS T1, Rn, #1
11572 SBC Rd, Rn, T1. */
11573 *cost += COSTS_N_INSNS (1);
11574 break;
11576 case LE:
11577 /* RSBS T1, Rn, Rn, LSR #31
11578 ADC Rd, Rn, T1. */
11579 *cost += COSTS_N_INSNS (1);
11580 if (speed_p)
11581 *cost += extra_cost->alu.arith_shift;
11582 break;
11584 case GT:
11585 /* RSB Rd, Rn, Rn, ASR #1
11586 LSR Rd, Rd, #31. */
11587 *cost += COSTS_N_INSNS (1);
11588 if (speed_p)
11589 *cost += (extra_cost->alu.arith_shift
11590 + extra_cost->alu.shift);
11591 break;
11593 case GE:
11594 /* ASR Rd, Rn, #31
11595 ADD Rd, Rn, #1. */
11596 *cost += COSTS_N_INSNS (1);
11597 if (speed_p)
11598 *cost += extra_cost->alu.shift;
11599 break;
11601 default:
11602 /* Remaining cases are either meaningless or would take
11603 three insns anyway. */
11604 *cost = COSTS_N_INSNS (3);
11605 break;
11607 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11608 return true;
11610 else
11612 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11613 if (CONST_INT_P (XEXP (x, 1))
11614 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11616 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11617 return true;
11620 return false;
11623 /* Not directly inside a set. If it involves the condition code
11624 register it must be the condition for a branch, cond_exec or
11625 I_T_E operation. Since the comparison is performed elsewhere
11626 this is just the control part which has no additional
11627 cost. */
11628 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11629 && XEXP (x, 1) == const0_rtx)
11631 *cost = 0;
11632 return true;
11634 return false;
11636 case ABS:
11637 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11638 && (mode == SFmode || !TARGET_VFP_SINGLE))
11640 if (speed_p)
11641 *cost += extra_cost->fp[mode != SFmode].neg;
11643 return false;
11645 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11647 *cost = LIBCALL_COST (1);
11648 return false;
11651 if (mode == SImode)
11653 if (speed_p)
11654 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11655 return false;
11657 /* Vector mode? */
11658 *cost = LIBCALL_COST (1);
11659 return false;
11661 case SIGN_EXTEND:
11662 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11663 && MEM_P (XEXP (x, 0)))
11665 if (mode == DImode)
11666 *cost += COSTS_N_INSNS (1);
11668 if (!speed_p)
11669 return true;
11671 if (GET_MODE (XEXP (x, 0)) == SImode)
11672 *cost += extra_cost->ldst.load;
11673 else
11674 *cost += extra_cost->ldst.load_sign_extend;
11676 if (mode == DImode)
11677 *cost += extra_cost->alu.shift;
11679 return true;
11682 /* Widening from less than 32-bits requires an extend operation. */
11683 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11685 /* We have SXTB/SXTH. */
11686 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11687 if (speed_p)
11688 *cost += extra_cost->alu.extend;
11690 else if (GET_MODE (XEXP (x, 0)) != SImode)
11692 /* Needs two shifts. */
11693 *cost += COSTS_N_INSNS (1);
11694 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11695 if (speed_p)
11696 *cost += 2 * extra_cost->alu.shift;
11699 /* Widening beyond 32-bits requires one more insn. */
11700 if (mode == DImode)
11702 *cost += COSTS_N_INSNS (1);
11703 if (speed_p)
11704 *cost += extra_cost->alu.shift;
11707 return true;
11709 case ZERO_EXTEND:
11710 if ((arm_arch4
11711 || GET_MODE (XEXP (x, 0)) == SImode
11712 || GET_MODE (XEXP (x, 0)) == QImode)
11713 && MEM_P (XEXP (x, 0)))
11715 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11717 if (mode == DImode)
11718 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11720 return true;
11723 /* Widening from less than 32-bits requires an extend operation. */
11724 if (GET_MODE (XEXP (x, 0)) == QImode)
11726 /* UXTB can be a shorter instruction in Thumb2, but it might
11727 be slower than the AND Rd, Rn, #255 alternative. When
11728 optimizing for speed it should never be slower to use
11729 AND, and we don't really model 16-bit vs 32-bit insns
11730 here. */
11731 if (speed_p)
11732 *cost += extra_cost->alu.logical;
11734 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11736 /* We have UXTB/UXTH. */
11737 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11738 if (speed_p)
11739 *cost += extra_cost->alu.extend;
11741 else if (GET_MODE (XEXP (x, 0)) != SImode)
11743 /* Needs two shifts. It's marginally preferable to use
11744 shifts rather than two BIC instructions as the second
11745 shift may merge with a subsequent insn as a shifter
11746 op. */
11747 *cost = COSTS_N_INSNS (2);
11748 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11749 if (speed_p)
11750 *cost += 2 * extra_cost->alu.shift;
11753 /* Widening beyond 32-bits requires one more insn. */
11754 if (mode == DImode)
11756 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11759 return true;
11761 case CONST_INT:
11762 *cost = 0;
11763 /* CONST_INT has no mode, so we cannot tell for sure how many
11764 insns are really going to be needed. The best we can do is
11765 look at the value passed. If it fits in SImode, then assume
11766 that's the mode it will be used for. Otherwise assume it
11767 will be used in DImode. */
11768 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11769 mode = SImode;
11770 else
11771 mode = DImode;
11773 /* Avoid blowing up in arm_gen_constant (). */
11774 if (!(outer_code == PLUS
11775 || outer_code == AND
11776 || outer_code == IOR
11777 || outer_code == XOR
11778 || outer_code == MINUS))
11779 outer_code = SET;
11781 const_int_cost:
11782 if (mode == SImode)
11784 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11785 INTVAL (x), NULL, NULL,
11786 0, 0));
11787 /* Extra costs? */
11789 else
11791 *cost += COSTS_N_INSNS (arm_gen_constant
11792 (outer_code, SImode, NULL,
11793 trunc_int_for_mode (INTVAL (x), SImode),
11794 NULL, NULL, 0, 0)
11795 + arm_gen_constant (outer_code, SImode, NULL,
11796 INTVAL (x) >> 32, NULL,
11797 NULL, 0, 0));
11798 /* Extra costs? */
11801 return true;
11803 case CONST:
11804 case LABEL_REF:
11805 case SYMBOL_REF:
11806 if (speed_p)
11808 if (arm_arch_thumb2 && !flag_pic)
11809 *cost += COSTS_N_INSNS (1);
11810 else
11811 *cost += extra_cost->ldst.load;
11813 else
11814 *cost += COSTS_N_INSNS (1);
11816 if (flag_pic)
11818 *cost += COSTS_N_INSNS (1);
11819 if (speed_p)
11820 *cost += extra_cost->alu.arith;
11823 return true;
11825 case CONST_FIXED:
11826 *cost = COSTS_N_INSNS (4);
11827 /* Fixme. */
11828 return true;
11830 case CONST_DOUBLE:
11831 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11832 && (mode == SFmode || !TARGET_VFP_SINGLE))
11834 if (vfp3_const_double_rtx (x))
11836 if (speed_p)
11837 *cost += extra_cost->fp[mode == DFmode].fpconst;
11838 return true;
11841 if (speed_p)
11843 if (mode == DFmode)
11844 *cost += extra_cost->ldst.loadd;
11845 else
11846 *cost += extra_cost->ldst.loadf;
11848 else
11849 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11851 return true;
11853 *cost = COSTS_N_INSNS (4);
11854 return true;
11856 case CONST_VECTOR:
11857 /* Fixme. */
11858 if (((TARGET_NEON && TARGET_HARD_FLOAT
11859 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11860 || TARGET_HAVE_MVE)
11861 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11862 *cost = COSTS_N_INSNS (1);
11863 else
11864 *cost = COSTS_N_INSNS (4);
11865 return true;
11867 case HIGH:
11868 case LO_SUM:
11869 /* When optimizing for size, we prefer constant pool entries to
11870 MOVW/MOVT pairs, so bump the cost of these slightly. */
11871 if (!speed_p)
11872 *cost += 1;
11873 return true;
11875 case CLZ:
11876 if (speed_p)
11877 *cost += extra_cost->alu.clz;
11878 return false;
11880 case SMIN:
11881 if (XEXP (x, 1) == const0_rtx)
11883 if (speed_p)
11884 *cost += extra_cost->alu.log_shift;
11885 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11886 return true;
11888 /* Fall through. */
11889 case SMAX:
11890 case UMIN:
11891 case UMAX:
11892 *cost += COSTS_N_INSNS (1);
11893 return false;
11895 case TRUNCATE:
11896 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11897 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11898 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11899 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11900 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11901 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11902 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11903 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11904 == ZERO_EXTEND))))
11906 if (speed_p)
11907 *cost += extra_cost->mult[1].extend;
11908 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11909 ZERO_EXTEND, 0, speed_p)
11910 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11911 ZERO_EXTEND, 0, speed_p));
11912 return true;
11914 *cost = LIBCALL_COST (1);
11915 return false;
11917 case UNSPEC_VOLATILE:
11918 case UNSPEC:
11919 return arm_unspec_cost (x, outer_code, speed_p, cost);
11921 case PC:
11922 /* Reading the PC is like reading any other register. Writing it
11923 is more expensive, but we take that into account elsewhere. */
11924 *cost = 0;
11925 return true;
11927 case ZERO_EXTRACT:
11928 /* TODO: Simple zero_extract of bottom bits using AND. */
11929 /* Fall through. */
11930 case SIGN_EXTRACT:
11931 if (arm_arch6
11932 && mode == SImode
11933 && CONST_INT_P (XEXP (x, 1))
11934 && CONST_INT_P (XEXP (x, 2)))
11936 if (speed_p)
11937 *cost += extra_cost->alu.bfx;
11938 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11939 return true;
11941 /* Without UBFX/SBFX, need to resort to shift operations. */
11942 *cost += COSTS_N_INSNS (1);
11943 if (speed_p)
11944 *cost += 2 * extra_cost->alu.shift;
11945 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11946 return true;
11948 case FLOAT_EXTEND:
11949 if (TARGET_HARD_FLOAT)
11951 if (speed_p)
11952 *cost += extra_cost->fp[mode == DFmode].widen;
11953 if (!TARGET_VFP5
11954 && GET_MODE (XEXP (x, 0)) == HFmode)
11956 /* Pre v8, widening HF->DF is a two-step process, first
11957 widening to SFmode. */
11958 *cost += COSTS_N_INSNS (1);
11959 if (speed_p)
11960 *cost += extra_cost->fp[0].widen;
11962 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11963 return true;
11966 *cost = LIBCALL_COST (1);
11967 return false;
11969 case FLOAT_TRUNCATE:
11970 if (TARGET_HARD_FLOAT)
11972 if (speed_p)
11973 *cost += extra_cost->fp[mode == DFmode].narrow;
11974 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11975 return true;
11976 /* Vector modes? */
11978 *cost = LIBCALL_COST (1);
11979 return false;
11981 case FMA:
11982 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11984 rtx op0 = XEXP (x, 0);
11985 rtx op1 = XEXP (x, 1);
11986 rtx op2 = XEXP (x, 2);
11989 /* vfms or vfnma. */
11990 if (GET_CODE (op0) == NEG)
11991 op0 = XEXP (op0, 0);
11993 /* vfnms or vfnma. */
11994 if (GET_CODE (op2) == NEG)
11995 op2 = XEXP (op2, 0);
11997 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11998 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11999 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12001 if (speed_p)
12002 *cost += extra_cost->fp[mode ==DFmode].fma;
12004 return true;
12007 *cost = LIBCALL_COST (3);
12008 return false;
12010 case FIX:
12011 case UNSIGNED_FIX:
12012 if (TARGET_HARD_FLOAT)
12014 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12015 a vcvt fixed-point conversion. */
12016 if (code == FIX && mode == SImode
12017 && GET_CODE (XEXP (x, 0)) == FIX
12018 && GET_MODE (XEXP (x, 0)) == SFmode
12019 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12020 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12021 > 0)
12023 if (speed_p)
12024 *cost += extra_cost->fp[0].toint;
12026 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12027 code, 0, speed_p);
12028 return true;
12031 if (GET_MODE_CLASS (mode) == MODE_INT)
12033 mode = GET_MODE (XEXP (x, 0));
12034 if (speed_p)
12035 *cost += extra_cost->fp[mode == DFmode].toint;
12036 /* Strip of the 'cost' of rounding towards zero. */
12037 if (GET_CODE (XEXP (x, 0)) == FIX)
12038 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12039 0, speed_p);
12040 else
12041 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12042 /* ??? Increase the cost to deal with transferring from
12043 FP -> CORE registers? */
12044 return true;
12046 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12047 && TARGET_VFP5)
12049 if (speed_p)
12050 *cost += extra_cost->fp[mode == DFmode].roundint;
12051 return false;
12053 /* Vector costs? */
12055 *cost = LIBCALL_COST (1);
12056 return false;
12058 case FLOAT:
12059 case UNSIGNED_FLOAT:
12060 if (TARGET_HARD_FLOAT)
12062 /* ??? Increase the cost to deal with transferring from CORE
12063 -> FP registers? */
12064 if (speed_p)
12065 *cost += extra_cost->fp[mode == DFmode].fromint;
12066 return false;
12068 *cost = LIBCALL_COST (1);
12069 return false;
12071 case CALL:
12072 return true;
12074 case ASM_OPERANDS:
12076 /* Just a guess. Guess number of instructions in the asm
12077 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12078 though (see PR60663). */
12079 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12080 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12082 *cost = COSTS_N_INSNS (asm_length + num_operands);
12083 return true;
12085 default:
12086 if (mode != VOIDmode)
12087 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12088 else
12089 *cost = COSTS_N_INSNS (4); /* Who knows? */
12090 return false;
12094 #undef HANDLE_NARROW_SHIFT_ARITH
12096 /* RTX costs entry point. */
12098 static bool
12099 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12100 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12102 bool result;
12103 int code = GET_CODE (x);
12104 gcc_assert (current_tune->insn_extra_cost);
12106 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12107 (enum rtx_code) outer_code,
12108 current_tune->insn_extra_cost,
12109 total, speed);
12111 if (dump_file && arm_verbose_cost)
12113 print_rtl_single (dump_file, x);
12114 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12115 *total, result ? "final" : "partial");
12117 return result;
12120 static int
12121 arm_insn_cost (rtx_insn *insn, bool speed)
12123 int cost;
12125 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12126 will likely disappear during register allocation. */
12127 if (!reload_completed
12128 && GET_CODE (PATTERN (insn)) == SET
12129 && REG_P (SET_DEST (PATTERN (insn)))
12130 && REG_P (SET_SRC (PATTERN (insn))))
12131 return 2;
12132 cost = pattern_cost (PATTERN (insn), speed);
12133 /* If the cost is zero, then it's likely a complex insn. We don't want the
12134 cost of these to be less than something we know about. */
12135 return cost ? cost : COSTS_N_INSNS (2);
12138 /* All address computations that can be done are free, but rtx cost returns
12139 the same for practically all of them. So we weight the different types
12140 of address here in the order (most pref first):
12141 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12142 static inline int
12143 arm_arm_address_cost (rtx x)
12145 enum rtx_code c = GET_CODE (x);
12147 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12148 return 0;
12149 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12150 return 10;
12152 if (c == PLUS)
12154 if (CONST_INT_P (XEXP (x, 1)))
12155 return 2;
12157 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12158 return 3;
12160 return 4;
12163 return 6;
12166 static inline int
12167 arm_thumb_address_cost (rtx x)
12169 enum rtx_code c = GET_CODE (x);
12171 if (c == REG)
12172 return 1;
12173 if (c == PLUS
12174 && REG_P (XEXP (x, 0))
12175 && CONST_INT_P (XEXP (x, 1)))
12176 return 1;
12178 return 2;
12181 static int
12182 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12183 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12185 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12188 /* Adjust cost hook for XScale. */
12189 static bool
12190 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12191 int * cost)
12193 /* Some true dependencies can have a higher cost depending
12194 on precisely how certain input operands are used. */
12195 if (dep_type == 0
12196 && recog_memoized (insn) >= 0
12197 && recog_memoized (dep) >= 0)
12199 int shift_opnum = get_attr_shift (insn);
12200 enum attr_type attr_type = get_attr_type (dep);
12202 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12203 operand for INSN. If we have a shifted input operand and the
12204 instruction we depend on is another ALU instruction, then we may
12205 have to account for an additional stall. */
12206 if (shift_opnum != 0
12207 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12208 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12209 || attr_type == TYPE_ALUS_SHIFT_IMM
12210 || attr_type == TYPE_LOGIC_SHIFT_IMM
12211 || attr_type == TYPE_LOGICS_SHIFT_IMM
12212 || attr_type == TYPE_ALU_SHIFT_REG
12213 || attr_type == TYPE_ALUS_SHIFT_REG
12214 || attr_type == TYPE_LOGIC_SHIFT_REG
12215 || attr_type == TYPE_LOGICS_SHIFT_REG
12216 || attr_type == TYPE_MOV_SHIFT
12217 || attr_type == TYPE_MVN_SHIFT
12218 || attr_type == TYPE_MOV_SHIFT_REG
12219 || attr_type == TYPE_MVN_SHIFT_REG))
12221 rtx shifted_operand;
12222 int opno;
12224 /* Get the shifted operand. */
12225 extract_insn (insn);
12226 shifted_operand = recog_data.operand[shift_opnum];
12228 /* Iterate over all the operands in DEP. If we write an operand
12229 that overlaps with SHIFTED_OPERAND, then we have increase the
12230 cost of this dependency. */
12231 extract_insn (dep);
12232 preprocess_constraints (dep);
12233 for (opno = 0; opno < recog_data.n_operands; opno++)
12235 /* We can ignore strict inputs. */
12236 if (recog_data.operand_type[opno] == OP_IN)
12237 continue;
12239 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12240 shifted_operand))
12242 *cost = 2;
12243 return false;
12248 return true;
12251 /* Adjust cost hook for Cortex A9. */
12252 static bool
12253 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12254 int * cost)
12256 switch (dep_type)
12258 case REG_DEP_ANTI:
12259 *cost = 0;
12260 return false;
12262 case REG_DEP_TRUE:
12263 case REG_DEP_OUTPUT:
12264 if (recog_memoized (insn) >= 0
12265 && recog_memoized (dep) >= 0)
12267 if (GET_CODE (PATTERN (insn)) == SET)
12269 if (GET_MODE_CLASS
12270 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12271 || GET_MODE_CLASS
12272 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12274 enum attr_type attr_type_insn = get_attr_type (insn);
12275 enum attr_type attr_type_dep = get_attr_type (dep);
12277 /* By default all dependencies of the form
12278 s0 = s0 <op> s1
12279 s0 = s0 <op> s2
12280 have an extra latency of 1 cycle because
12281 of the input and output dependency in this
12282 case. However this gets modeled as an true
12283 dependency and hence all these checks. */
12284 if (REG_P (SET_DEST (PATTERN (insn)))
12285 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12287 /* FMACS is a special case where the dependent
12288 instruction can be issued 3 cycles before
12289 the normal latency in case of an output
12290 dependency. */
12291 if ((attr_type_insn == TYPE_FMACS
12292 || attr_type_insn == TYPE_FMACD)
12293 && (attr_type_dep == TYPE_FMACS
12294 || attr_type_dep == TYPE_FMACD))
12296 if (dep_type == REG_DEP_OUTPUT)
12297 *cost = insn_default_latency (dep) - 3;
12298 else
12299 *cost = insn_default_latency (dep);
12300 return false;
12302 else
12304 if (dep_type == REG_DEP_OUTPUT)
12305 *cost = insn_default_latency (dep) + 1;
12306 else
12307 *cost = insn_default_latency (dep);
12309 return false;
12314 break;
12316 default:
12317 gcc_unreachable ();
12320 return true;
12323 /* Adjust cost hook for FA726TE. */
12324 static bool
12325 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12326 int * cost)
12328 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12329 have penalty of 3. */
12330 if (dep_type == REG_DEP_TRUE
12331 && recog_memoized (insn) >= 0
12332 && recog_memoized (dep) >= 0
12333 && get_attr_conds (dep) == CONDS_SET)
12335 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12336 if (get_attr_conds (insn) == CONDS_USE
12337 && get_attr_type (insn) != TYPE_BRANCH)
12339 *cost = 3;
12340 return false;
12343 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12344 || get_attr_conds (insn) == CONDS_USE)
12346 *cost = 0;
12347 return false;
12351 return true;
12354 /* Implement TARGET_REGISTER_MOVE_COST.
12356 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12357 it is typically more expensive than a single memory access. We set
12358 the cost to less than two memory accesses so that floating
12359 point to integer conversion does not go through memory. */
12362 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12363 reg_class_t from, reg_class_t to)
12365 if (TARGET_32BIT)
12367 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12368 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12369 return 15;
12370 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12371 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12372 return 4;
12373 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12374 return 20;
12375 else
12376 return 2;
12378 else
12380 if (from == HI_REGS || to == HI_REGS)
12381 return 4;
12382 else
12383 return 2;
12387 /* Implement TARGET_MEMORY_MOVE_COST. */
12390 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12391 bool in ATTRIBUTE_UNUSED)
12393 if (TARGET_32BIT)
12394 return 10;
12395 else
12397 if (GET_MODE_SIZE (mode) < 4)
12398 return 8;
12399 else
12400 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12404 /* Vectorizer cost model implementation. */
12406 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12407 static int
12408 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12409 tree vectype,
12410 int misalign ATTRIBUTE_UNUSED)
12412 unsigned elements;
12414 switch (type_of_cost)
12416 case scalar_stmt:
12417 return current_tune->vec_costs->scalar_stmt_cost;
12419 case scalar_load:
12420 return current_tune->vec_costs->scalar_load_cost;
12422 case scalar_store:
12423 return current_tune->vec_costs->scalar_store_cost;
12425 case vector_stmt:
12426 return current_tune->vec_costs->vec_stmt_cost;
12428 case vector_load:
12429 return current_tune->vec_costs->vec_align_load_cost;
12431 case vector_store:
12432 return current_tune->vec_costs->vec_store_cost;
12434 case vec_to_scalar:
12435 return current_tune->vec_costs->vec_to_scalar_cost;
12437 case scalar_to_vec:
12438 return current_tune->vec_costs->scalar_to_vec_cost;
12440 case unaligned_load:
12441 case vector_gather_load:
12442 return current_tune->vec_costs->vec_unalign_load_cost;
12444 case unaligned_store:
12445 case vector_scatter_store:
12446 return current_tune->vec_costs->vec_unalign_store_cost;
12448 case cond_branch_taken:
12449 return current_tune->vec_costs->cond_taken_branch_cost;
12451 case cond_branch_not_taken:
12452 return current_tune->vec_costs->cond_not_taken_branch_cost;
12454 case vec_perm:
12455 case vec_promote_demote:
12456 return current_tune->vec_costs->vec_stmt_cost;
12458 case vec_construct:
12459 elements = TYPE_VECTOR_SUBPARTS (vectype);
12460 return elements / 2 + 1;
12462 default:
12463 gcc_unreachable ();
12467 /* Return true if and only if this insn can dual-issue only as older. */
12468 static bool
12469 cortexa7_older_only (rtx_insn *insn)
12471 if (recog_memoized (insn) < 0)
12472 return false;
12474 switch (get_attr_type (insn))
12476 case TYPE_ALU_DSP_REG:
12477 case TYPE_ALU_SREG:
12478 case TYPE_ALUS_SREG:
12479 case TYPE_LOGIC_REG:
12480 case TYPE_LOGICS_REG:
12481 case TYPE_ADC_REG:
12482 case TYPE_ADCS_REG:
12483 case TYPE_ADR:
12484 case TYPE_BFM:
12485 case TYPE_REV:
12486 case TYPE_MVN_REG:
12487 case TYPE_SHIFT_IMM:
12488 case TYPE_SHIFT_REG:
12489 case TYPE_LOAD_BYTE:
12490 case TYPE_LOAD_4:
12491 case TYPE_STORE_4:
12492 case TYPE_FFARITHS:
12493 case TYPE_FADDS:
12494 case TYPE_FFARITHD:
12495 case TYPE_FADDD:
12496 case TYPE_FMOV:
12497 case TYPE_F_CVT:
12498 case TYPE_FCMPS:
12499 case TYPE_FCMPD:
12500 case TYPE_FCONSTS:
12501 case TYPE_FCONSTD:
12502 case TYPE_FMULS:
12503 case TYPE_FMACS:
12504 case TYPE_FMULD:
12505 case TYPE_FMACD:
12506 case TYPE_FDIVS:
12507 case TYPE_FDIVD:
12508 case TYPE_F_MRC:
12509 case TYPE_F_MRRC:
12510 case TYPE_F_FLAG:
12511 case TYPE_F_LOADS:
12512 case TYPE_F_STORES:
12513 return true;
12514 default:
12515 return false;
12519 /* Return true if and only if this insn can dual-issue as younger. */
12520 static bool
12521 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12523 if (recog_memoized (insn) < 0)
12525 if (verbose > 5)
12526 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12527 return false;
12530 switch (get_attr_type (insn))
12532 case TYPE_ALU_IMM:
12533 case TYPE_ALUS_IMM:
12534 case TYPE_LOGIC_IMM:
12535 case TYPE_LOGICS_IMM:
12536 case TYPE_EXTEND:
12537 case TYPE_MVN_IMM:
12538 case TYPE_MOV_IMM:
12539 case TYPE_MOV_REG:
12540 case TYPE_MOV_SHIFT:
12541 case TYPE_MOV_SHIFT_REG:
12542 case TYPE_BRANCH:
12543 case TYPE_CALL:
12544 return true;
12545 default:
12546 return false;
12551 /* Look for an instruction that can dual issue only as an older
12552 instruction, and move it in front of any instructions that can
12553 dual-issue as younger, while preserving the relative order of all
12554 other instructions in the ready list. This is a hueuristic to help
12555 dual-issue in later cycles, by postponing issue of more flexible
12556 instructions. This heuristic may affect dual issue opportunities
12557 in the current cycle. */
12558 static void
12559 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12560 int *n_readyp, int clock)
12562 int i;
12563 int first_older_only = -1, first_younger = -1;
12565 if (verbose > 5)
12566 fprintf (file,
12567 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12568 clock,
12569 *n_readyp);
12571 /* Traverse the ready list from the head (the instruction to issue
12572 first), and looking for the first instruction that can issue as
12573 younger and the first instruction that can dual-issue only as
12574 older. */
12575 for (i = *n_readyp - 1; i >= 0; i--)
12577 rtx_insn *insn = ready[i];
12578 if (cortexa7_older_only (insn))
12580 first_older_only = i;
12581 if (verbose > 5)
12582 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12583 break;
12585 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12586 first_younger = i;
12589 /* Nothing to reorder because either no younger insn found or insn
12590 that can dual-issue only as older appears before any insn that
12591 can dual-issue as younger. */
12592 if (first_younger == -1)
12594 if (verbose > 5)
12595 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12596 return;
12599 /* Nothing to reorder because no older-only insn in the ready list. */
12600 if (first_older_only == -1)
12602 if (verbose > 5)
12603 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12604 return;
12607 /* Move first_older_only insn before first_younger. */
12608 if (verbose > 5)
12609 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12610 INSN_UID(ready [first_older_only]),
12611 INSN_UID(ready [first_younger]));
12612 rtx_insn *first_older_only_insn = ready [first_older_only];
12613 for (i = first_older_only; i < first_younger; i++)
12615 ready[i] = ready[i+1];
12618 ready[i] = first_older_only_insn;
12619 return;
12622 /* Implement TARGET_SCHED_REORDER. */
12623 static int
12624 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12625 int clock)
12627 switch (arm_tune)
12629 case TARGET_CPU_cortexa7:
12630 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12631 break;
12632 default:
12633 /* Do nothing for other cores. */
12634 break;
12637 return arm_issue_rate ();
12640 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12641 It corrects the value of COST based on the relationship between
12642 INSN and DEP through the dependence LINK. It returns the new
12643 value. There is a per-core adjust_cost hook to adjust scheduler costs
12644 and the per-core hook can choose to completely override the generic
12645 adjust_cost function. Only put bits of code into arm_adjust_cost that
12646 are common across all cores. */
12647 static int
12648 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12649 unsigned int)
12651 rtx i_pat, d_pat;
12653 /* When generating Thumb-1 code, we want to place flag-setting operations
12654 close to a conditional branch which depends on them, so that we can
12655 omit the comparison. */
12656 if (TARGET_THUMB1
12657 && dep_type == 0
12658 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12659 && recog_memoized (dep) >= 0
12660 && get_attr_conds (dep) == CONDS_SET)
12661 return 0;
12663 if (current_tune->sched_adjust_cost != NULL)
12665 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12666 return cost;
12669 /* XXX Is this strictly true? */
12670 if (dep_type == REG_DEP_ANTI
12671 || dep_type == REG_DEP_OUTPUT)
12672 return 0;
12674 /* Call insns don't incur a stall, even if they follow a load. */
12675 if (dep_type == 0
12676 && CALL_P (insn))
12677 return 1;
12679 if ((i_pat = single_set (insn)) != NULL
12680 && MEM_P (SET_SRC (i_pat))
12681 && (d_pat = single_set (dep)) != NULL
12682 && MEM_P (SET_DEST (d_pat)))
12684 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12685 /* This is a load after a store, there is no conflict if the load reads
12686 from a cached area. Assume that loads from the stack, and from the
12687 constant pool are cached, and that others will miss. This is a
12688 hack. */
12690 if ((SYMBOL_REF_P (src_mem)
12691 && CONSTANT_POOL_ADDRESS_P (src_mem))
12692 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12693 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12694 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12695 return 1;
12698 return cost;
12702 arm_max_conditional_execute (void)
12704 return max_insns_skipped;
12707 static int
12708 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12710 if (TARGET_32BIT)
12711 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12712 else
12713 return (optimize > 0) ? 2 : 0;
12716 static int
12717 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12719 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12722 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12723 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12724 sequences of non-executed instructions in IT blocks probably take the same
12725 amount of time as executed instructions (and the IT instruction itself takes
12726 space in icache). This function was experimentally determined to give good
12727 results on a popular embedded benchmark. */
12729 static int
12730 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12732 return (TARGET_32BIT && speed_p) ? 1
12733 : arm_default_branch_cost (speed_p, predictable_p);
12736 static int
12737 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12739 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12742 static bool fp_consts_inited = false;
12744 static REAL_VALUE_TYPE value_fp0;
12746 static void
12747 init_fp_table (void)
12749 REAL_VALUE_TYPE r;
12751 r = REAL_VALUE_ATOF ("0", DFmode);
12752 value_fp0 = r;
12753 fp_consts_inited = true;
12756 /* Return TRUE if rtx X is a valid immediate FP constant. */
12758 arm_const_double_rtx (rtx x)
12760 const REAL_VALUE_TYPE *r;
12762 if (!fp_consts_inited)
12763 init_fp_table ();
12765 r = CONST_DOUBLE_REAL_VALUE (x);
12766 if (REAL_VALUE_MINUS_ZERO (*r))
12767 return 0;
12769 if (real_equal (r, &value_fp0))
12770 return 1;
12772 return 0;
12775 /* VFPv3 has a fairly wide range of representable immediates, formed from
12776 "quarter-precision" floating-point values. These can be evaluated using this
12777 formula (with ^ for exponentiation):
12779 -1^s * n * 2^-r
12781 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12782 16 <= n <= 31 and 0 <= r <= 7.
12784 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12786 - A (most-significant) is the sign bit.
12787 - BCD are the exponent (encoded as r XOR 3).
12788 - EFGH are the mantissa (encoded as n - 16).
12791 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12792 fconst[sd] instruction, or -1 if X isn't suitable. */
12793 static int
12794 vfp3_const_double_index (rtx x)
12796 REAL_VALUE_TYPE r, m;
12797 int sign, exponent;
12798 unsigned HOST_WIDE_INT mantissa, mant_hi;
12799 unsigned HOST_WIDE_INT mask;
12800 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12801 bool fail;
12803 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12804 return -1;
12806 r = *CONST_DOUBLE_REAL_VALUE (x);
12808 /* We can't represent these things, so detect them first. */
12809 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12810 return -1;
12812 /* Extract sign, exponent and mantissa. */
12813 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12814 r = real_value_abs (&r);
12815 exponent = REAL_EXP (&r);
12816 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12817 highest (sign) bit, with a fixed binary point at bit point_pos.
12818 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12819 bits for the mantissa, this may fail (low bits would be lost). */
12820 real_ldexp (&m, &r, point_pos - exponent);
12821 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12822 mantissa = w.elt (0);
12823 mant_hi = w.elt (1);
12825 /* If there are bits set in the low part of the mantissa, we can't
12826 represent this value. */
12827 if (mantissa != 0)
12828 return -1;
12830 /* Now make it so that mantissa contains the most-significant bits, and move
12831 the point_pos to indicate that the least-significant bits have been
12832 discarded. */
12833 point_pos -= HOST_BITS_PER_WIDE_INT;
12834 mantissa = mant_hi;
12836 /* We can permit four significant bits of mantissa only, plus a high bit
12837 which is always 1. */
12838 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12839 if ((mantissa & mask) != 0)
12840 return -1;
12842 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12843 mantissa >>= point_pos - 5;
12845 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12846 floating-point immediate zero with Neon using an integer-zero load, but
12847 that case is handled elsewhere.) */
12848 if (mantissa == 0)
12849 return -1;
12851 gcc_assert (mantissa >= 16 && mantissa <= 31);
12853 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12854 normalized significands are in the range [1, 2). (Our mantissa is shifted
12855 left 4 places at this point relative to normalized IEEE754 values). GCC
12856 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12857 REAL_EXP must be altered. */
12858 exponent = 5 - exponent;
12860 if (exponent < 0 || exponent > 7)
12861 return -1;
12863 /* Sign, mantissa and exponent are now in the correct form to plug into the
12864 formula described in the comment above. */
12865 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12868 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12870 vfp3_const_double_rtx (rtx x)
12872 if (!TARGET_VFP3)
12873 return 0;
12875 return vfp3_const_double_index (x) != -1;
12878 /* Recognize immediates which can be used in various Neon and MVE instructions.
12879 Legal immediates are described by the following table (for VMVN variants, the
12880 bitwise inverse of the constant shown is recognized. In either case, VMOV
12881 is output and the correct instruction to use for a given constant is chosen
12882 by the assembler). The constant shown is replicated across all elements of
12883 the destination vector.
12885 insn elems variant constant (binary)
12886 ---- ----- ------- -----------------
12887 vmov i32 0 00000000 00000000 00000000 abcdefgh
12888 vmov i32 1 00000000 00000000 abcdefgh 00000000
12889 vmov i32 2 00000000 abcdefgh 00000000 00000000
12890 vmov i32 3 abcdefgh 00000000 00000000 00000000
12891 vmov i16 4 00000000 abcdefgh
12892 vmov i16 5 abcdefgh 00000000
12893 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12894 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12895 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12896 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12897 vmvn i16 10 00000000 abcdefgh
12898 vmvn i16 11 abcdefgh 00000000
12899 vmov i32 12 00000000 00000000 abcdefgh 11111111
12900 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12901 vmov i32 14 00000000 abcdefgh 11111111 11111111
12902 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12903 vmov i8 16 abcdefgh
12904 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12905 eeeeeeee ffffffff gggggggg hhhhhhhh
12906 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12907 vmov f32 19 00000000 00000000 00000000 00000000
12909 For case 18, B = !b. Representable values are exactly those accepted by
12910 vfp3_const_double_index, but are output as floating-point numbers rather
12911 than indices.
12913 For case 19, we will change it to vmov.i32 when assembling.
12915 Variants 0-5 (inclusive) may also be used as immediates for the second
12916 operand of VORR/VBIC instructions.
12918 The INVERSE argument causes the bitwise inverse of the given operand to be
12919 recognized instead (used for recognizing legal immediates for the VAND/VORN
12920 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12921 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12922 output, rather than the real insns vbic/vorr).
12924 INVERSE makes no difference to the recognition of float vectors.
12926 The return value is the variant of immediate as shown in the above table, or
12927 -1 if the given value doesn't match any of the listed patterns.
12929 static int
12930 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12931 rtx *modconst, int *elementwidth)
12933 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12934 matches = 1; \
12935 for (i = 0; i < idx; i += (STRIDE)) \
12936 if (!(TEST)) \
12937 matches = 0; \
12938 if (matches) \
12940 immtype = (CLASS); \
12941 elsize = (ELSIZE); \
12942 break; \
12945 unsigned int i, elsize = 0, idx = 0, n_elts;
12946 unsigned int innersize;
12947 unsigned char bytes[16] = {};
12948 int immtype = -1, matches;
12949 unsigned int invmask = inverse ? 0xff : 0;
12950 bool vector = GET_CODE (op) == CONST_VECTOR;
12952 if (vector)
12953 n_elts = CONST_VECTOR_NUNITS (op);
12954 else
12956 n_elts = 1;
12957 gcc_assert (mode != VOIDmode);
12960 innersize = GET_MODE_UNIT_SIZE (mode);
12962 /* Only support 128-bit vectors for MVE. */
12963 if (TARGET_HAVE_MVE
12964 && (!vector
12965 || VALID_MVE_PRED_MODE (mode)
12966 || n_elts * innersize != 16))
12967 return -1;
12969 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12970 return -1;
12972 /* Vectors of float constants. */
12973 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12975 rtx el0 = CONST_VECTOR_ELT (op, 0);
12977 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12978 return -1;
12980 /* FP16 vectors cannot be represented. */
12981 if (GET_MODE_INNER (mode) == HFmode)
12982 return -1;
12984 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12985 are distinct in this context. */
12986 if (!const_vec_duplicate_p (op))
12987 return -1;
12989 if (modconst)
12990 *modconst = CONST_VECTOR_ELT (op, 0);
12992 if (elementwidth)
12993 *elementwidth = 0;
12995 if (el0 == CONST0_RTX (GET_MODE (el0)))
12996 return 19;
12997 else
12998 return 18;
13001 /* The tricks done in the code below apply for little-endian vector layout.
13002 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13003 FIXME: Implement logic for big-endian vectors. */
13004 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13005 return -1;
13007 /* Splat vector constant out into a byte vector. */
13008 for (i = 0; i < n_elts; i++)
13010 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13011 unsigned HOST_WIDE_INT elpart;
13013 gcc_assert (CONST_INT_P (el));
13014 elpart = INTVAL (el);
13016 for (unsigned int byte = 0; byte < innersize; byte++)
13018 bytes[idx++] = (elpart & 0xff) ^ invmask;
13019 elpart >>= BITS_PER_UNIT;
13023 /* Sanity check. */
13024 gcc_assert (idx == GET_MODE_SIZE (mode));
13028 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13029 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13031 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13032 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13034 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13035 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13037 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13038 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13040 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13042 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13044 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13045 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13047 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13048 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13050 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13051 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13053 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13054 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13056 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13058 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13060 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13061 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13063 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13064 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13066 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13069 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13070 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13072 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13074 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13075 && bytes[i] == bytes[(i + 8) % idx]);
13077 while (0);
13079 if (immtype == -1)
13080 return -1;
13082 if (elementwidth)
13083 *elementwidth = elsize;
13085 if (modconst)
13087 unsigned HOST_WIDE_INT imm = 0;
13089 /* Un-invert bytes of recognized vector, if necessary. */
13090 if (invmask != 0)
13091 for (i = 0; i < idx; i++)
13092 bytes[i] ^= invmask;
13094 if (immtype == 17)
13096 /* FIXME: Broken on 32-bit H_W_I hosts. */
13097 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13099 for (i = 0; i < 8; i++)
13100 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13101 << (i * BITS_PER_UNIT);
13103 *modconst = GEN_INT (imm);
13105 else
13107 unsigned HOST_WIDE_INT imm = 0;
13109 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13110 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13112 *modconst = GEN_INT (imm);
13116 return immtype;
13117 #undef CHECK
13120 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13121 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13122 (or zero for float elements), and a modified constant (whatever should be
13123 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13124 modified to "simd_immediate_valid_for_move" as this function will be used
13125 both by neon and mve. */
13127 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13128 rtx *modconst, int *elementwidth)
13130 rtx tmpconst;
13131 int tmpwidth;
13132 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13134 if (retval == -1)
13135 return 0;
13137 if (modconst)
13138 *modconst = tmpconst;
13140 if (elementwidth)
13141 *elementwidth = tmpwidth;
13143 return 1;
13146 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13147 the immediate is valid, write a constant suitable for using as an operand
13148 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13149 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13152 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13153 rtx *modconst, int *elementwidth)
13155 rtx tmpconst;
13156 int tmpwidth;
13157 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13159 if (retval < 0 || retval > 5)
13160 return 0;
13162 if (modconst)
13163 *modconst = tmpconst;
13165 if (elementwidth)
13166 *elementwidth = tmpwidth;
13168 return 1;
13171 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13172 the immediate is valid, write a constant suitable for using as an operand
13173 to VSHR/VSHL to *MODCONST and the corresponding element width to
13174 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13175 because they have different limitations. */
13178 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13179 rtx *modconst, int *elementwidth,
13180 bool isleftshift)
13182 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13183 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13184 unsigned HOST_WIDE_INT last_elt = 0;
13185 unsigned HOST_WIDE_INT maxshift;
13187 /* Split vector constant out into a byte vector. */
13188 for (i = 0; i < n_elts; i++)
13190 rtx el = CONST_VECTOR_ELT (op, i);
13191 unsigned HOST_WIDE_INT elpart;
13193 if (CONST_INT_P (el))
13194 elpart = INTVAL (el);
13195 else if (CONST_DOUBLE_P (el))
13196 return 0;
13197 else
13198 gcc_unreachable ();
13200 if (i != 0 && elpart != last_elt)
13201 return 0;
13203 last_elt = elpart;
13206 /* Shift less than element size. */
13207 maxshift = innersize * 8;
13209 if (isleftshift)
13211 /* Left shift immediate value can be from 0 to <size>-1. */
13212 if (last_elt >= maxshift)
13213 return 0;
13215 else
13217 /* Right shift immediate value can be from 1 to <size>. */
13218 if (last_elt == 0 || last_elt > maxshift)
13219 return 0;
13222 if (elementwidth)
13223 *elementwidth = innersize * 8;
13225 if (modconst)
13226 *modconst = CONST_VECTOR_ELT (op, 0);
13228 return 1;
13231 /* Return a string suitable for output of Neon immediate logic operation
13232 MNEM. */
13234 char *
13235 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13236 int inverse, int quad)
13238 int width, is_valid;
13239 static char templ[40];
13241 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13243 gcc_assert (is_valid != 0);
13245 if (quad)
13246 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13247 else
13248 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13250 return templ;
13253 /* Return a string suitable for output of Neon immediate shift operation
13254 (VSHR or VSHL) MNEM. */
13256 char *
13257 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13258 machine_mode mode, int quad,
13259 bool isleftshift)
13261 int width, is_valid;
13262 static char templ[40];
13264 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13265 gcc_assert (is_valid != 0);
13267 if (quad)
13268 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13269 else
13270 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13272 return templ;
13275 /* Output a sequence of pairwise operations to implement a reduction.
13276 NOTE: We do "too much work" here, because pairwise operations work on two
13277 registers-worth of operands in one go. Unfortunately we can't exploit those
13278 extra calculations to do the full operation in fewer steps, I don't think.
13279 Although all vector elements of the result but the first are ignored, we
13280 actually calculate the same result in each of the elements. An alternative
13281 such as initially loading a vector with zero to use as each of the second
13282 operands would use up an additional register and take an extra instruction,
13283 for no particular gain. */
13285 void
13286 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13287 rtx (*reduc) (rtx, rtx, rtx))
13289 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13290 rtx tmpsum = op1;
13292 for (i = parts / 2; i >= 1; i /= 2)
13294 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13295 emit_insn (reduc (dest, tmpsum, tmpsum));
13296 tmpsum = dest;
13300 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13301 loaded into a register using VDUP.
13303 If this is the case, and GENERATE is set, we also generate
13304 instructions to do this and return an RTX to assign to the register. */
13306 static rtx
13307 neon_vdup_constant (rtx vals, bool generate)
13309 machine_mode mode = GET_MODE (vals);
13310 machine_mode inner_mode = GET_MODE_INNER (mode);
13311 rtx x;
13313 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13314 return NULL_RTX;
13316 if (!const_vec_duplicate_p (vals, &x))
13317 /* The elements are not all the same. We could handle repeating
13318 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13319 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13320 vdup.i16). */
13321 return NULL_RTX;
13323 if (!generate)
13324 return x;
13326 /* We can load this constant by using VDUP and a constant in a
13327 single ARM register. This will be cheaper than a vector
13328 load. */
13330 x = copy_to_mode_reg (inner_mode, x);
13331 return gen_vec_duplicate (mode, x);
13334 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13336 mve_bool_vec_to_const (rtx const_vec)
13338 machine_mode mode = GET_MODE (const_vec);
13340 if (!VECTOR_MODE_P (mode))
13341 return const_vec;
13343 unsigned n_elts = GET_MODE_NUNITS (mode);
13344 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13345 unsigned shift_c = 16 / n_elts;
13346 unsigned i;
13347 int hi_val = 0;
13349 for (i = 0; i < n_elts; i++)
13351 rtx el = CONST_VECTOR_ELT (const_vec, i);
13352 unsigned HOST_WIDE_INT elpart;
13354 gcc_assert (CONST_INT_P (el));
13355 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13357 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13359 hi_val |= elpart << (index * shift_c);
13361 /* We are using mov immediate to encode this constant which writes 32-bits
13362 so we need to make sure the top 16-bits are all 0, otherwise we can't
13363 guarantee we can actually write this immediate. */
13364 return gen_int_mode (hi_val, SImode);
13367 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13368 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13369 into a register.
13371 If this is the case, and GENERATE is set, we also generate code to do
13372 this and return an RTX to copy into the register. */
13375 neon_make_constant (rtx vals, bool generate)
13377 machine_mode mode = GET_MODE (vals);
13378 rtx target;
13379 rtx const_vec = NULL_RTX;
13380 int n_elts = GET_MODE_NUNITS (mode);
13381 int n_const = 0;
13382 int i;
13384 if (GET_CODE (vals) == CONST_VECTOR)
13385 const_vec = vals;
13386 else if (GET_CODE (vals) == PARALLEL)
13388 /* A CONST_VECTOR must contain only CONST_INTs and
13389 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13390 Only store valid constants in a CONST_VECTOR. */
13391 for (i = 0; i < n_elts; ++i)
13393 rtx x = XVECEXP (vals, 0, i);
13394 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13395 n_const++;
13397 if (n_const == n_elts)
13398 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13400 else
13401 gcc_unreachable ();
13403 if (const_vec != NULL
13404 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13405 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13406 return const_vec;
13407 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13408 return mve_bool_vec_to_const (const_vec);
13409 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13410 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13411 pipeline cycle; creating the constant takes one or two ARM
13412 pipeline cycles. */
13413 return target;
13414 else if (const_vec != NULL_RTX)
13415 /* Load from constant pool. On Cortex-A8 this takes two cycles
13416 (for either double or quad vectors). We cannot take advantage
13417 of single-cycle VLD1 because we need a PC-relative addressing
13418 mode. */
13419 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13420 else
13421 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13422 We cannot construct an initializer. */
13423 return NULL_RTX;
13426 /* Initialize vector TARGET to VALS. */
13428 void
13429 neon_expand_vector_init (rtx target, rtx vals)
13431 machine_mode mode = GET_MODE (target);
13432 machine_mode inner_mode = GET_MODE_INNER (mode);
13433 int n_elts = GET_MODE_NUNITS (mode);
13434 int n_var = 0, one_var = -1;
13435 bool all_same = true;
13436 rtx x, mem;
13437 int i;
13439 for (i = 0; i < n_elts; ++i)
13441 x = XVECEXP (vals, 0, i);
13442 if (!CONSTANT_P (x))
13443 ++n_var, one_var = i;
13445 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13446 all_same = false;
13449 if (n_var == 0)
13451 rtx constant = neon_make_constant (vals);
13452 if (constant != NULL_RTX)
13454 emit_move_insn (target, constant);
13455 return;
13459 /* Splat a single non-constant element if we can. */
13460 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13462 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13463 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13464 return;
13467 /* One field is non-constant. Load constant then overwrite varying
13468 field. This is more efficient than using the stack. */
13469 if (n_var == 1)
13471 rtx copy = copy_rtx (vals);
13472 rtx merge_mask = GEN_INT (1 << one_var);
13474 /* Load constant part of vector, substitute neighboring value for
13475 varying element. */
13476 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13477 neon_expand_vector_init (target, copy);
13479 /* Insert variable. */
13480 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13481 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13482 return;
13485 /* Construct the vector in memory one field at a time
13486 and load the whole vector. */
13487 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13488 for (i = 0; i < n_elts; i++)
13489 emit_move_insn (adjust_address_nv (mem, inner_mode,
13490 i * GET_MODE_SIZE (inner_mode)),
13491 XVECEXP (vals, 0, i));
13492 emit_move_insn (target, mem);
13495 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13496 ERR if it doesn't. EXP indicates the source location, which includes the
13497 inlining history for intrinsics. */
13499 static void
13500 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13501 const_tree exp, const char *desc)
13503 HOST_WIDE_INT lane;
13505 gcc_assert (CONST_INT_P (operand));
13507 lane = INTVAL (operand);
13509 if (lane < low || lane >= high)
13511 if (exp)
13512 error_at (EXPR_LOCATION (exp),
13513 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13514 else
13515 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13519 /* Bounds-check lanes. */
13521 void
13522 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13523 const_tree exp)
13525 bounds_check (operand, low, high, exp, "lane");
13528 /* Bounds-check constants. */
13530 void
13531 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13533 bounds_check (operand, low, high, NULL_TREE, "constant");
13536 HOST_WIDE_INT
13537 neon_element_bits (machine_mode mode)
13539 return GET_MODE_UNIT_BITSIZE (mode);
13543 /* Predicates for `match_operand' and `match_operator'. */
13545 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13546 WB level is 2 if full writeback address modes are allowed, 1
13547 if limited writeback address modes (POST_INC and PRE_DEC) are
13548 allowed and 0 if no writeback at all is supported. */
13551 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13553 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13554 rtx ind;
13556 /* Reject eliminable registers. */
13557 if (! (reload_in_progress || reload_completed || lra_in_progress)
13558 && ( reg_mentioned_p (frame_pointer_rtx, op)
13559 || reg_mentioned_p (arg_pointer_rtx, op)
13560 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13561 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13562 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13563 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13564 return FALSE;
13566 /* Constants are converted into offsets from labels. */
13567 if (!MEM_P (op))
13568 return FALSE;
13570 ind = XEXP (op, 0);
13572 if (reload_completed
13573 && (LABEL_REF_P (ind)
13574 || (GET_CODE (ind) == CONST
13575 && GET_CODE (XEXP (ind, 0)) == PLUS
13576 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13577 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13578 return TRUE;
13580 /* Match: (mem (reg)). */
13581 if (REG_P (ind))
13582 return arm_address_register_rtx_p (ind, 0);
13584 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13585 acceptable in any case (subject to verification by
13586 arm_address_register_rtx_p). We need full writeback to accept
13587 PRE_INC and POST_DEC, and at least restricted writeback for
13588 PRE_INC and POST_DEC. */
13589 if (wb_level > 0
13590 && (GET_CODE (ind) == POST_INC
13591 || GET_CODE (ind) == PRE_DEC
13592 || (wb_level > 1
13593 && (GET_CODE (ind) == PRE_INC
13594 || GET_CODE (ind) == POST_DEC))))
13595 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13597 if (wb_level > 1
13598 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13599 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13600 && GET_CODE (XEXP (ind, 1)) == PLUS
13601 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13602 ind = XEXP (ind, 1);
13604 /* Match:
13605 (plus (reg)
13606 (const))
13608 The encoded immediate for 16-bit modes is multiplied by 2,
13609 while the encoded immediate for 32-bit and 64-bit modes is
13610 multiplied by 4. */
13611 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13612 if (GET_CODE (ind) == PLUS
13613 && REG_P (XEXP (ind, 0))
13614 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13615 && CONST_INT_P (XEXP (ind, 1))
13616 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13617 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13618 return TRUE;
13620 return FALSE;
13623 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13624 WB is true if full writeback address modes are allowed and is false
13625 if limited writeback address modes (POST_INC and PRE_DEC) are
13626 allowed. */
13628 int arm_coproc_mem_operand (rtx op, bool wb)
13630 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13633 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13634 context in which no writeback address modes are allowed. */
13637 arm_coproc_mem_operand_no_writeback (rtx op)
13639 return arm_coproc_mem_operand_wb (op, 0);
13642 /* This function returns TRUE on matching mode and op.
13643 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13644 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13646 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13648 enum rtx_code code;
13649 int val, reg_no;
13651 /* Match: (mem (reg)). */
13652 if (REG_P (op))
13654 int reg_no = REGNO (op);
13655 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13656 ? reg_no <= LAST_LO_REGNUM
13657 : reg_no < LAST_ARM_REGNUM)
13658 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13660 code = GET_CODE (op);
13662 if (code == POST_INC || code == PRE_DEC
13663 || code == PRE_INC || code == POST_DEC)
13665 reg_no = REGNO (XEXP (op, 0));
13666 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13667 ? reg_no <= LAST_LO_REGNUM
13668 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13669 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13671 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13672 && GET_CODE (XEXP (op, 1)) == PLUS
13673 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13674 && REG_P (XEXP (op, 0))
13675 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13676 /* Make sure to only accept PLUS after reload_completed, otherwise
13677 this will interfere with auto_inc's pattern detection. */
13678 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13679 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13681 reg_no = REGNO (XEXP (op, 0));
13682 if (code == PLUS)
13683 val = INTVAL (XEXP (op, 1));
13684 else
13685 val = INTVAL (XEXP(XEXP (op, 1), 1));
13687 switch (mode)
13689 case E_V16QImode:
13690 case E_V8QImode:
13691 case E_V4QImode:
13692 if (abs (val) > 127)
13693 return FALSE;
13694 break;
13695 case E_V8HImode:
13696 case E_V8HFmode:
13697 case E_V4HImode:
13698 case E_V4HFmode:
13699 if (val % 2 != 0 || abs (val) > 254)
13700 return FALSE;
13701 break;
13702 case E_V4SImode:
13703 case E_V4SFmode:
13704 if (val % 4 != 0 || abs (val) > 508)
13705 return FALSE;
13706 break;
13707 default:
13708 return FALSE;
13710 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13711 || (MVE_STN_LDW_MODE (mode)
13712 ? reg_no <= LAST_LO_REGNUM
13713 : (reg_no < LAST_ARM_REGNUM
13714 && (code == PLUS || reg_no != SP_REGNUM))));
13716 return FALSE;
13719 /* Return TRUE if OP is a memory operand which we can load or store a vector
13720 to/from. TYPE is one of the following values:
13721 0 - Vector load/stor (vldr)
13722 1 - Core registers (ldm)
13723 2 - Element/structure loads (vld1)
13726 neon_vector_mem_operand (rtx op, int type, bool strict)
13728 rtx ind;
13730 /* Reject eliminable registers. */
13731 if (strict && ! (reload_in_progress || reload_completed)
13732 && (reg_mentioned_p (frame_pointer_rtx, op)
13733 || reg_mentioned_p (arg_pointer_rtx, op)
13734 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13735 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13736 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13737 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13738 return FALSE;
13740 /* Constants are converted into offsets from labels. */
13741 if (!MEM_P (op))
13742 return FALSE;
13744 ind = XEXP (op, 0);
13746 if (reload_completed
13747 && (LABEL_REF_P (ind)
13748 || (GET_CODE (ind) == CONST
13749 && GET_CODE (XEXP (ind, 0)) == PLUS
13750 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13751 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13752 return TRUE;
13754 /* Match: (mem (reg)). */
13755 if (REG_P (ind))
13756 return arm_address_register_rtx_p (ind, 0);
13758 /* Allow post-increment with Neon registers. */
13759 if ((type != 1 && GET_CODE (ind) == POST_INC)
13760 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13761 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13763 /* Allow post-increment by register for VLDn */
13764 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13765 && GET_CODE (XEXP (ind, 1)) == PLUS
13766 && REG_P (XEXP (XEXP (ind, 1), 1))
13767 && REG_P (XEXP (ind, 0))
13768 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13769 return true;
13771 /* Match:
13772 (plus (reg)
13773 (const)). */
13774 if (type == 0
13775 && GET_CODE (ind) == PLUS
13776 && REG_P (XEXP (ind, 0))
13777 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13778 && CONST_INT_P (XEXP (ind, 1))
13779 && INTVAL (XEXP (ind, 1)) > -1024
13780 /* For quad modes, we restrict the constant offset to be slightly less
13781 than what the instruction format permits. We have no such constraint
13782 on double mode offsets. (This must match arm_legitimate_index_p.) */
13783 && (INTVAL (XEXP (ind, 1))
13784 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13785 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13786 return TRUE;
13788 return FALSE;
13791 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13792 type. */
13794 mve_struct_mem_operand (rtx op)
13796 rtx ind = XEXP (op, 0);
13798 /* Match: (mem (reg)). */
13799 if (REG_P (ind))
13800 return arm_address_register_rtx_p (ind, 0);
13802 /* Allow only post-increment by the mode size. */
13803 if (GET_CODE (ind) == POST_INC)
13804 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13806 return FALSE;
13809 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13810 type. */
13812 neon_struct_mem_operand (rtx op)
13814 rtx ind;
13816 /* Reject eliminable registers. */
13817 if (! (reload_in_progress || reload_completed)
13818 && ( reg_mentioned_p (frame_pointer_rtx, op)
13819 || reg_mentioned_p (arg_pointer_rtx, op)
13820 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13821 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13822 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13823 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13824 return FALSE;
13826 /* Constants are converted into offsets from labels. */
13827 if (!MEM_P (op))
13828 return FALSE;
13830 ind = XEXP (op, 0);
13832 if (reload_completed
13833 && (LABEL_REF_P (ind)
13834 || (GET_CODE (ind) == CONST
13835 && GET_CODE (XEXP (ind, 0)) == PLUS
13836 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13837 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13838 return TRUE;
13840 /* Match: (mem (reg)). */
13841 if (REG_P (ind))
13842 return arm_address_register_rtx_p (ind, 0);
13844 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13845 if (GET_CODE (ind) == POST_INC
13846 || GET_CODE (ind) == PRE_DEC)
13847 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13849 return FALSE;
13852 /* Prepares the operands for the VCMLA by lane instruction such that the right
13853 register number is selected. This instruction is special in that it always
13854 requires a D register, however there is a choice to be made between Dn[0],
13855 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13857 The VCMLA by lane function always selects two values. For instance given D0
13858 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13859 used by the instruction. However given V4SF then index 0 and 1 are valid as
13860 D0[0] or D1[0] are both valid.
13862 This function centralizes that information based on OPERANDS, OPERANDS[3]
13863 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13864 updated to contain the right index. */
13866 rtx *
13867 neon_vcmla_lane_prepare_operands (rtx *operands)
13869 int lane = INTVAL (operands[4]);
13870 machine_mode constmode = SImode;
13871 machine_mode mode = GET_MODE (operands[3]);
13872 int regno = REGNO (operands[3]);
13873 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13874 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13876 operands[3] = gen_int_mode (regno + 1, constmode);
13877 operands[4]
13878 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13880 else
13882 operands[3] = gen_int_mode (regno, constmode);
13883 operands[4] = gen_int_mode (lane, constmode);
13885 return operands;
13889 /* Return true if X is a register that will be eliminated later on. */
13891 arm_eliminable_register (rtx x)
13893 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13894 || REGNO (x) == ARG_POINTER_REGNUM
13895 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13896 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13899 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13900 coprocessor registers. Otherwise return NO_REGS. */
13902 enum reg_class
13903 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13905 if (mode == HFmode)
13907 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13908 return GENERAL_REGS;
13909 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13910 return NO_REGS;
13911 return GENERAL_REGS;
13914 /* The neon move patterns handle all legitimate vector and struct
13915 addresses. */
13916 if (TARGET_NEON
13917 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13918 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13919 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13920 || VALID_NEON_STRUCT_MODE (mode)))
13921 return NO_REGS;
13923 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13924 return NO_REGS;
13926 return GENERAL_REGS;
13929 /* Values which must be returned in the most-significant end of the return
13930 register. */
13932 static bool
13933 arm_return_in_msb (const_tree valtype)
13935 return (TARGET_AAPCS_BASED
13936 && BYTES_BIG_ENDIAN
13937 && (AGGREGATE_TYPE_P (valtype)
13938 || TREE_CODE (valtype) == COMPLEX_TYPE
13939 || FIXED_POINT_TYPE_P (valtype)));
13942 /* Return TRUE if X references a SYMBOL_REF. */
13944 symbol_mentioned_p (rtx x)
13946 const char * fmt;
13947 int i;
13949 if (SYMBOL_REF_P (x))
13950 return 1;
13952 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13953 are constant offsets, not symbols. */
13954 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13955 return 0;
13957 fmt = GET_RTX_FORMAT (GET_CODE (x));
13959 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13961 if (fmt[i] == 'E')
13963 int j;
13965 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13966 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13967 return 1;
13969 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13970 return 1;
13973 return 0;
13976 /* Return TRUE if X references a LABEL_REF. */
13978 label_mentioned_p (rtx x)
13980 const char * fmt;
13981 int i;
13983 if (LABEL_REF_P (x))
13984 return 1;
13986 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13987 instruction, but they are constant offsets, not symbols. */
13988 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13989 return 0;
13991 fmt = GET_RTX_FORMAT (GET_CODE (x));
13992 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13994 if (fmt[i] == 'E')
13996 int j;
13998 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13999 if (label_mentioned_p (XVECEXP (x, i, j)))
14000 return 1;
14002 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14003 return 1;
14006 return 0;
14010 tls_mentioned_p (rtx x)
14012 switch (GET_CODE (x))
14014 case CONST:
14015 return tls_mentioned_p (XEXP (x, 0));
14017 case UNSPEC:
14018 if (XINT (x, 1) == UNSPEC_TLS)
14019 return 1;
14021 /* Fall through. */
14022 default:
14023 return 0;
14027 /* Must not copy any rtx that uses a pc-relative address.
14028 Also, disallow copying of load-exclusive instructions that
14029 may appear after splitting of compare-and-swap-style operations
14030 so as to prevent those loops from being transformed away from their
14031 canonical forms (see PR 69904). */
14033 static bool
14034 arm_cannot_copy_insn_p (rtx_insn *insn)
14036 /* The tls call insn cannot be copied, as it is paired with a data
14037 word. */
14038 if (recog_memoized (insn) == CODE_FOR_tlscall)
14039 return true;
14041 subrtx_iterator::array_type array;
14042 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14044 const_rtx x = *iter;
14045 if (GET_CODE (x) == UNSPEC
14046 && (XINT (x, 1) == UNSPEC_PIC_BASE
14047 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14048 return true;
14051 rtx set = single_set (insn);
14052 if (set)
14054 rtx src = SET_SRC (set);
14055 if (GET_CODE (src) == ZERO_EXTEND)
14056 src = XEXP (src, 0);
14058 /* Catch the load-exclusive and load-acquire operations. */
14059 if (GET_CODE (src) == UNSPEC_VOLATILE
14060 && (XINT (src, 1) == VUNSPEC_LL
14061 || XINT (src, 1) == VUNSPEC_LAX))
14062 return true;
14064 return false;
14067 enum rtx_code
14068 minmax_code (rtx x)
14070 enum rtx_code code = GET_CODE (x);
14072 switch (code)
14074 case SMAX:
14075 return GE;
14076 case SMIN:
14077 return LE;
14078 case UMIN:
14079 return LEU;
14080 case UMAX:
14081 return GEU;
14082 default:
14083 gcc_unreachable ();
14087 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14089 bool
14090 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14091 int *mask, bool *signed_sat)
14093 /* The high bound must be a power of two minus one. */
14094 int log = exact_log2 (INTVAL (hi_bound) + 1);
14095 if (log == -1)
14096 return false;
14098 /* The low bound is either zero (for usat) or one less than the
14099 negation of the high bound (for ssat). */
14100 if (INTVAL (lo_bound) == 0)
14102 if (mask)
14103 *mask = log;
14104 if (signed_sat)
14105 *signed_sat = false;
14107 return true;
14110 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14112 if (mask)
14113 *mask = log + 1;
14114 if (signed_sat)
14115 *signed_sat = true;
14117 return true;
14120 return false;
14123 /* Return 1 if memory locations are adjacent. */
14125 adjacent_mem_locations (rtx a, rtx b)
14127 /* We don't guarantee to preserve the order of these memory refs. */
14128 if (volatile_refs_p (a) || volatile_refs_p (b))
14129 return 0;
14131 if ((REG_P (XEXP (a, 0))
14132 || (GET_CODE (XEXP (a, 0)) == PLUS
14133 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14134 && (REG_P (XEXP (b, 0))
14135 || (GET_CODE (XEXP (b, 0)) == PLUS
14136 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14138 HOST_WIDE_INT val0 = 0, val1 = 0;
14139 rtx reg0, reg1;
14140 int val_diff;
14142 if (GET_CODE (XEXP (a, 0)) == PLUS)
14144 reg0 = XEXP (XEXP (a, 0), 0);
14145 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14147 else
14148 reg0 = XEXP (a, 0);
14150 if (GET_CODE (XEXP (b, 0)) == PLUS)
14152 reg1 = XEXP (XEXP (b, 0), 0);
14153 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14155 else
14156 reg1 = XEXP (b, 0);
14158 /* Don't accept any offset that will require multiple
14159 instructions to handle, since this would cause the
14160 arith_adjacentmem pattern to output an overlong sequence. */
14161 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14162 return 0;
14164 /* Don't allow an eliminable register: register elimination can make
14165 the offset too large. */
14166 if (arm_eliminable_register (reg0))
14167 return 0;
14169 val_diff = val1 - val0;
14171 if (arm_ld_sched)
14173 /* If the target has load delay slots, then there's no benefit
14174 to using an ldm instruction unless the offset is zero and
14175 we are optimizing for size. */
14176 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14177 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14178 && (val_diff == 4 || val_diff == -4));
14181 return ((REGNO (reg0) == REGNO (reg1))
14182 && (val_diff == 4 || val_diff == -4));
14185 return 0;
14188 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14189 for load operations, false for store operations. CONSECUTIVE is true
14190 if the register numbers in the operation must be consecutive in the register
14191 bank. RETURN_PC is true if value is to be loaded in PC.
14192 The pattern we are trying to match for load is:
14193 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14194 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14197 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14199 where
14200 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14201 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14202 3. If consecutive is TRUE, then for kth register being loaded,
14203 REGNO (R_dk) = REGNO (R_d0) + k.
14204 The pattern for store is similar. */
14205 bool
14206 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14207 bool consecutive, bool return_pc)
14209 HOST_WIDE_INT count = XVECLEN (op, 0);
14210 rtx reg, mem, addr;
14211 unsigned regno;
14212 unsigned first_regno;
14213 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14214 rtx elt;
14215 bool addr_reg_in_reglist = false;
14216 bool update = false;
14217 int reg_increment;
14218 int offset_adj;
14219 int regs_per_val;
14221 /* If not in SImode, then registers must be consecutive
14222 (e.g., VLDM instructions for DFmode). */
14223 gcc_assert ((mode == SImode) || consecutive);
14224 /* Setting return_pc for stores is illegal. */
14225 gcc_assert (!return_pc || load);
14227 /* Set up the increments and the regs per val based on the mode. */
14228 reg_increment = GET_MODE_SIZE (mode);
14229 regs_per_val = reg_increment / 4;
14230 offset_adj = return_pc ? 1 : 0;
14232 if (count <= 1
14233 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14234 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14235 return false;
14237 /* Check if this is a write-back. */
14238 elt = XVECEXP (op, 0, offset_adj);
14239 if (GET_CODE (SET_SRC (elt)) == PLUS)
14241 i++;
14242 base = 1;
14243 update = true;
14245 /* The offset adjustment must be the number of registers being
14246 popped times the size of a single register. */
14247 if (!REG_P (SET_DEST (elt))
14248 || !REG_P (XEXP (SET_SRC (elt), 0))
14249 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14250 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14251 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14252 ((count - 1 - offset_adj) * reg_increment))
14253 return false;
14256 i = i + offset_adj;
14257 base = base + offset_adj;
14258 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14259 success depends on the type: VLDM can do just one reg,
14260 LDM must do at least two. */
14261 if ((count <= i) && (mode == SImode))
14262 return false;
14264 elt = XVECEXP (op, 0, i - 1);
14265 if (GET_CODE (elt) != SET)
14266 return false;
14268 if (load)
14270 reg = SET_DEST (elt);
14271 mem = SET_SRC (elt);
14273 else
14275 reg = SET_SRC (elt);
14276 mem = SET_DEST (elt);
14279 if (!REG_P (reg) || !MEM_P (mem))
14280 return false;
14282 regno = REGNO (reg);
14283 first_regno = regno;
14284 addr = XEXP (mem, 0);
14285 if (GET_CODE (addr) == PLUS)
14287 if (!CONST_INT_P (XEXP (addr, 1)))
14288 return false;
14290 offset = INTVAL (XEXP (addr, 1));
14291 addr = XEXP (addr, 0);
14294 if (!REG_P (addr))
14295 return false;
14297 /* Don't allow SP to be loaded unless it is also the base register. It
14298 guarantees that SP is reset correctly when an LDM instruction
14299 is interrupted. Otherwise, we might end up with a corrupt stack. */
14300 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14301 return false;
14303 if (regno == REGNO (addr))
14304 addr_reg_in_reglist = true;
14306 for (; i < count; i++)
14308 elt = XVECEXP (op, 0, i);
14309 if (GET_CODE (elt) != SET)
14310 return false;
14312 if (load)
14314 reg = SET_DEST (elt);
14315 mem = SET_SRC (elt);
14317 else
14319 reg = SET_SRC (elt);
14320 mem = SET_DEST (elt);
14323 if (!REG_P (reg)
14324 || GET_MODE (reg) != mode
14325 || REGNO (reg) <= regno
14326 || (consecutive
14327 && (REGNO (reg) !=
14328 (unsigned int) (first_regno + regs_per_val * (i - base))))
14329 /* Don't allow SP to be loaded unless it is also the base register. It
14330 guarantees that SP is reset correctly when an LDM instruction
14331 is interrupted. Otherwise, we might end up with a corrupt stack. */
14332 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14333 || !MEM_P (mem)
14334 || GET_MODE (mem) != mode
14335 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14336 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14337 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14338 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14339 offset + (i - base) * reg_increment))
14340 && (!REG_P (XEXP (mem, 0))
14341 || offset + (i - base) * reg_increment != 0)))
14342 return false;
14344 regno = REGNO (reg);
14345 if (regno == REGNO (addr))
14346 addr_reg_in_reglist = true;
14349 if (load)
14351 if (update && addr_reg_in_reglist)
14352 return false;
14354 /* For Thumb-1, address register is always modified - either by write-back
14355 or by explicit load. If the pattern does not describe an update,
14356 then the address register must be in the list of loaded registers. */
14357 if (TARGET_THUMB1)
14358 return update || addr_reg_in_reglist;
14361 return true;
14364 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14365 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14366 following form:
14368 [(set (reg:SI <N>) (const_int 0))
14369 (set (reg:SI <M>) (const_int 0))
14371 (unspec_volatile [(const_int 0)]
14372 VUNSPEC_CLRM_APSR)
14373 (clobber (reg:CC CC_REGNUM))
14376 Any number (including 0) of set expressions is valid, the volatile unspec is
14377 optional. All registers but SP and PC are allowed and registers must be in
14378 strict increasing order.
14380 To be a valid VSCCLRM pattern, OP must have the following form:
14382 [(unspec_volatile [(const_int 0)]
14383 VUNSPEC_VSCCLRM_VPR)
14384 (set (reg:SF <N>) (const_int 0))
14385 (set (reg:SF <M>) (const_int 0))
14389 As with CLRM, any number (including 0) of set expressions is valid, however
14390 the volatile unspec is mandatory here. Any VFP single-precision register is
14391 accepted but all registers must be consecutive and in increasing order. */
14393 bool
14394 clear_operation_p (rtx op, bool vfp)
14396 unsigned regno;
14397 unsigned last_regno = INVALID_REGNUM;
14398 rtx elt, reg, zero;
14399 int count = XVECLEN (op, 0);
14400 int first_set = vfp ? 1 : 0;
14401 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14403 for (int i = first_set; i < count; i++)
14405 elt = XVECEXP (op, 0, i);
14407 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14409 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14410 || XVECLEN (elt, 0) != 1
14411 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14412 || i != count - 2)
14413 return false;
14415 continue;
14418 if (GET_CODE (elt) == CLOBBER)
14419 continue;
14421 if (GET_CODE (elt) != SET)
14422 return false;
14424 reg = SET_DEST (elt);
14425 zero = SET_SRC (elt);
14427 if (!REG_P (reg)
14428 || GET_MODE (reg) != expected_mode
14429 || zero != CONST0_RTX (SImode))
14430 return false;
14432 regno = REGNO (reg);
14434 if (vfp)
14436 if (i != first_set && regno != last_regno + 1)
14437 return false;
14439 else
14441 if (regno == SP_REGNUM || regno == PC_REGNUM)
14442 return false;
14443 if (i != first_set && regno <= last_regno)
14444 return false;
14447 last_regno = regno;
14450 return true;
14453 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14454 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14455 instruction. ADD_OFFSET is nonzero if the base address register needs
14456 to be modified with an add instruction before we can use it. */
14458 static bool
14459 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14460 int nops, HOST_WIDE_INT add_offset)
14462 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14463 if the offset isn't small enough. The reason 2 ldrs are faster
14464 is because these ARMs are able to do more than one cache access
14465 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14466 whilst the ARM8 has a double bandwidth cache. This means that
14467 these cores can do both an instruction fetch and a data fetch in
14468 a single cycle, so the trick of calculating the address into a
14469 scratch register (one of the result regs) and then doing a load
14470 multiple actually becomes slower (and no smaller in code size).
14471 That is the transformation
14473 ldr rd1, [rbase + offset]
14474 ldr rd2, [rbase + offset + 4]
14478 add rd1, rbase, offset
14479 ldmia rd1, {rd1, rd2}
14481 produces worse code -- '3 cycles + any stalls on rd2' instead of
14482 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14483 access per cycle, the first sequence could never complete in less
14484 than 6 cycles, whereas the ldm sequence would only take 5 and
14485 would make better use of sequential accesses if not hitting the
14486 cache.
14488 We cheat here and test 'arm_ld_sched' which we currently know to
14489 only be true for the ARM8, ARM9 and StrongARM. If this ever
14490 changes, then the test below needs to be reworked. */
14491 if (nops == 2 && arm_ld_sched && add_offset != 0)
14492 return false;
14494 /* XScale has load-store double instructions, but they have stricter
14495 alignment requirements than load-store multiple, so we cannot
14496 use them.
14498 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14499 the pipeline until completion.
14501 NREGS CYCLES
14507 An ldr instruction takes 1-3 cycles, but does not block the
14508 pipeline.
14510 NREGS CYCLES
14511 1 1-3
14512 2 2-6
14513 3 3-9
14514 4 4-12
14516 Best case ldr will always win. However, the more ldr instructions
14517 we issue, the less likely we are to be able to schedule them well.
14518 Using ldr instructions also increases code size.
14520 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14521 for counts of 3 or 4 regs. */
14522 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14523 return false;
14524 return true;
14527 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14528 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14529 an array ORDER which describes the sequence to use when accessing the
14530 offsets that produces an ascending order. In this sequence, each
14531 offset must be larger by exactly 4 than the previous one. ORDER[0]
14532 must have been filled in with the lowest offset by the caller.
14533 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14534 we use to verify that ORDER produces an ascending order of registers.
14535 Return true if it was possible to construct such an order, false if
14536 not. */
14538 static bool
14539 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14540 int *unsorted_regs)
14542 int i;
14543 for (i = 1; i < nops; i++)
14545 int j;
14547 order[i] = order[i - 1];
14548 for (j = 0; j < nops; j++)
14549 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14551 /* We must find exactly one offset that is higher than the
14552 previous one by 4. */
14553 if (order[i] != order[i - 1])
14554 return false;
14555 order[i] = j;
14557 if (order[i] == order[i - 1])
14558 return false;
14559 /* The register numbers must be ascending. */
14560 if (unsorted_regs != NULL
14561 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14562 return false;
14564 return true;
14567 /* Used to determine in a peephole whether a sequence of load
14568 instructions can be changed into a load-multiple instruction.
14569 NOPS is the number of separate load instructions we are examining. The
14570 first NOPS entries in OPERANDS are the destination registers, the
14571 next NOPS entries are memory operands. If this function is
14572 successful, *BASE is set to the common base register of the memory
14573 accesses; *LOAD_OFFSET is set to the first memory location's offset
14574 from that base register.
14575 REGS is an array filled in with the destination register numbers.
14576 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14577 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14578 the sequence of registers in REGS matches the loads from ascending memory
14579 locations, and the function verifies that the register numbers are
14580 themselves ascending. If CHECK_REGS is false, the register numbers
14581 are stored in the order they are found in the operands. */
14582 static int
14583 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14584 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14586 int unsorted_regs[MAX_LDM_STM_OPS];
14587 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14588 int order[MAX_LDM_STM_OPS];
14589 int base_reg = -1;
14590 int i, ldm_case;
14592 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14593 easily extended if required. */
14594 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14596 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14598 /* Loop over the operands and check that the memory references are
14599 suitable (i.e. immediate offsets from the same base register). At
14600 the same time, extract the target register, and the memory
14601 offsets. */
14602 for (i = 0; i < nops; i++)
14604 rtx reg;
14605 rtx offset;
14607 /* Convert a subreg of a mem into the mem itself. */
14608 if (GET_CODE (operands[nops + i]) == SUBREG)
14609 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14611 gcc_assert (MEM_P (operands[nops + i]));
14613 /* Don't reorder volatile memory references; it doesn't seem worth
14614 looking for the case where the order is ok anyway. */
14615 if (MEM_VOLATILE_P (operands[nops + i]))
14616 return 0;
14618 offset = const0_rtx;
14620 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14621 || (SUBREG_P (reg)
14622 && REG_P (reg = SUBREG_REG (reg))))
14623 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14624 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14625 || (SUBREG_P (reg)
14626 && REG_P (reg = SUBREG_REG (reg))))
14627 && (CONST_INT_P (offset
14628 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14630 if (i == 0)
14632 base_reg = REGNO (reg);
14633 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14634 return 0;
14636 else if (base_reg != (int) REGNO (reg))
14637 /* Not addressed from the same base register. */
14638 return 0;
14640 unsorted_regs[i] = (REG_P (operands[i])
14641 ? REGNO (operands[i])
14642 : REGNO (SUBREG_REG (operands[i])));
14644 /* If it isn't an integer register, or if it overwrites the
14645 base register but isn't the last insn in the list, then
14646 we can't do this. */
14647 if (unsorted_regs[i] < 0
14648 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14649 || unsorted_regs[i] > 14
14650 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14651 return 0;
14653 /* Don't allow SP to be loaded unless it is also the base
14654 register. It guarantees that SP is reset correctly when
14655 an LDM instruction is interrupted. Otherwise, we might
14656 end up with a corrupt stack. */
14657 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14658 return 0;
14660 unsorted_offsets[i] = INTVAL (offset);
14661 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14662 order[0] = i;
14664 else
14665 /* Not a suitable memory address. */
14666 return 0;
14669 /* All the useful information has now been extracted from the
14670 operands into unsorted_regs and unsorted_offsets; additionally,
14671 order[0] has been set to the lowest offset in the list. Sort
14672 the offsets into order, verifying that they are adjacent, and
14673 check that the register numbers are ascending. */
14674 if (!compute_offset_order (nops, unsorted_offsets, order,
14675 check_regs ? unsorted_regs : NULL))
14676 return 0;
14678 if (saved_order)
14679 memcpy (saved_order, order, sizeof order);
14681 if (base)
14683 *base = base_reg;
14685 for (i = 0; i < nops; i++)
14686 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14688 *load_offset = unsorted_offsets[order[0]];
14691 if (unsorted_offsets[order[0]] == 0)
14692 ldm_case = 1; /* ldmia */
14693 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14694 ldm_case = 2; /* ldmib */
14695 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14696 ldm_case = 3; /* ldmda */
14697 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14698 ldm_case = 4; /* ldmdb */
14699 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14700 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14701 ldm_case = 5;
14702 else
14703 return 0;
14705 if (!multiple_operation_profitable_p (false, nops,
14706 ldm_case == 5
14707 ? unsorted_offsets[order[0]] : 0))
14708 return 0;
14710 return ldm_case;
14713 /* Used to determine in a peephole whether a sequence of store instructions can
14714 be changed into a store-multiple instruction.
14715 NOPS is the number of separate store instructions we are examining.
14716 NOPS_TOTAL is the total number of instructions recognized by the peephole
14717 pattern.
14718 The first NOPS entries in OPERANDS are the source registers, the next
14719 NOPS entries are memory operands. If this function is successful, *BASE is
14720 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14721 to the first memory location's offset from that base register. REGS is an
14722 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14723 likewise filled with the corresponding rtx's.
14724 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14725 numbers to an ascending order of stores.
14726 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14727 from ascending memory locations, and the function verifies that the register
14728 numbers are themselves ascending. If CHECK_REGS is false, the register
14729 numbers are stored in the order they are found in the operands. */
14730 static int
14731 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14732 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14733 HOST_WIDE_INT *load_offset, bool check_regs)
14735 int unsorted_regs[MAX_LDM_STM_OPS];
14736 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14737 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14738 int order[MAX_LDM_STM_OPS];
14739 int base_reg = -1;
14740 rtx base_reg_rtx = NULL;
14741 int i, stm_case;
14743 /* Write back of base register is currently only supported for Thumb 1. */
14744 int base_writeback = TARGET_THUMB1;
14746 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14747 easily extended if required. */
14748 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14750 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14752 /* Loop over the operands and check that the memory references are
14753 suitable (i.e. immediate offsets from the same base register). At
14754 the same time, extract the target register, and the memory
14755 offsets. */
14756 for (i = 0; i < nops; i++)
14758 rtx reg;
14759 rtx offset;
14761 /* Convert a subreg of a mem into the mem itself. */
14762 if (GET_CODE (operands[nops + i]) == SUBREG)
14763 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14765 gcc_assert (MEM_P (operands[nops + i]));
14767 /* Don't reorder volatile memory references; it doesn't seem worth
14768 looking for the case where the order is ok anyway. */
14769 if (MEM_VOLATILE_P (operands[nops + i]))
14770 return 0;
14772 offset = const0_rtx;
14774 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14775 || (SUBREG_P (reg)
14776 && REG_P (reg = SUBREG_REG (reg))))
14777 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14778 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14779 || (SUBREG_P (reg)
14780 && REG_P (reg = SUBREG_REG (reg))))
14781 && (CONST_INT_P (offset
14782 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14784 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14785 ? operands[i] : SUBREG_REG (operands[i]));
14786 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14788 if (i == 0)
14790 base_reg = REGNO (reg);
14791 base_reg_rtx = reg;
14792 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14793 return 0;
14795 else if (base_reg != (int) REGNO (reg))
14796 /* Not addressed from the same base register. */
14797 return 0;
14799 /* If it isn't an integer register, then we can't do this. */
14800 if (unsorted_regs[i] < 0
14801 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14802 /* The effects are unpredictable if the base register is
14803 both updated and stored. */
14804 || (base_writeback && unsorted_regs[i] == base_reg)
14805 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14806 || unsorted_regs[i] > 14)
14807 return 0;
14809 unsorted_offsets[i] = INTVAL (offset);
14810 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14811 order[0] = i;
14813 else
14814 /* Not a suitable memory address. */
14815 return 0;
14818 /* All the useful information has now been extracted from the
14819 operands into unsorted_regs and unsorted_offsets; additionally,
14820 order[0] has been set to the lowest offset in the list. Sort
14821 the offsets into order, verifying that they are adjacent, and
14822 check that the register numbers are ascending. */
14823 if (!compute_offset_order (nops, unsorted_offsets, order,
14824 check_regs ? unsorted_regs : NULL))
14825 return 0;
14827 if (saved_order)
14828 memcpy (saved_order, order, sizeof order);
14830 if (base)
14832 *base = base_reg;
14834 for (i = 0; i < nops; i++)
14836 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14837 if (reg_rtxs)
14838 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14841 *load_offset = unsorted_offsets[order[0]];
14844 if (TARGET_THUMB1
14845 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14846 return 0;
14848 if (unsorted_offsets[order[0]] == 0)
14849 stm_case = 1; /* stmia */
14850 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14851 stm_case = 2; /* stmib */
14852 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14853 stm_case = 3; /* stmda */
14854 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14855 stm_case = 4; /* stmdb */
14856 else
14857 return 0;
14859 if (!multiple_operation_profitable_p (false, nops, 0))
14860 return 0;
14862 return stm_case;
14865 /* Routines for use in generating RTL. */
14867 /* Generate a load-multiple instruction. COUNT is the number of loads in
14868 the instruction; REGS and MEMS are arrays containing the operands.
14869 BASEREG is the base register to be used in addressing the memory operands.
14870 WBACK_OFFSET is nonzero if the instruction should update the base
14871 register. */
14873 static rtx
14874 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14875 HOST_WIDE_INT wback_offset)
14877 int i = 0, j;
14878 rtx result;
14880 if (!multiple_operation_profitable_p (false, count, 0))
14882 rtx seq;
14884 start_sequence ();
14886 for (i = 0; i < count; i++)
14887 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14889 if (wback_offset != 0)
14890 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14892 seq = get_insns ();
14893 end_sequence ();
14895 return seq;
14898 result = gen_rtx_PARALLEL (VOIDmode,
14899 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14900 if (wback_offset != 0)
14902 XVECEXP (result, 0, 0)
14903 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14904 i = 1;
14905 count++;
14908 for (j = 0; i < count; i++, j++)
14909 XVECEXP (result, 0, i)
14910 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14912 return result;
14915 /* Generate a store-multiple instruction. COUNT is the number of stores in
14916 the instruction; REGS and MEMS are arrays containing the operands.
14917 BASEREG is the base register to be used in addressing the memory operands.
14918 WBACK_OFFSET is nonzero if the instruction should update the base
14919 register. */
14921 static rtx
14922 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14923 HOST_WIDE_INT wback_offset)
14925 int i = 0, j;
14926 rtx result;
14928 if (GET_CODE (basereg) == PLUS)
14929 basereg = XEXP (basereg, 0);
14931 if (!multiple_operation_profitable_p (false, count, 0))
14933 rtx seq;
14935 start_sequence ();
14937 for (i = 0; i < count; i++)
14938 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14940 if (wback_offset != 0)
14941 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14943 seq = get_insns ();
14944 end_sequence ();
14946 return seq;
14949 result = gen_rtx_PARALLEL (VOIDmode,
14950 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14951 if (wback_offset != 0)
14953 XVECEXP (result, 0, 0)
14954 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14955 i = 1;
14956 count++;
14959 for (j = 0; i < count; i++, j++)
14960 XVECEXP (result, 0, i)
14961 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14963 return result;
14966 /* Generate either a load-multiple or a store-multiple instruction. This
14967 function can be used in situations where we can start with a single MEM
14968 rtx and adjust its address upwards.
14969 COUNT is the number of operations in the instruction, not counting a
14970 possible update of the base register. REGS is an array containing the
14971 register operands.
14972 BASEREG is the base register to be used in addressing the memory operands,
14973 which are constructed from BASEMEM.
14974 WRITE_BACK specifies whether the generated instruction should include an
14975 update of the base register.
14976 OFFSETP is used to pass an offset to and from this function; this offset
14977 is not used when constructing the address (instead BASEMEM should have an
14978 appropriate offset in its address), it is used only for setting
14979 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14981 static rtx
14982 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14983 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14985 rtx mems[MAX_LDM_STM_OPS];
14986 HOST_WIDE_INT offset = *offsetp;
14987 int i;
14989 gcc_assert (count <= MAX_LDM_STM_OPS);
14991 if (GET_CODE (basereg) == PLUS)
14992 basereg = XEXP (basereg, 0);
14994 for (i = 0; i < count; i++)
14996 rtx addr = plus_constant (Pmode, basereg, i * 4);
14997 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14998 offset += 4;
15001 if (write_back)
15002 *offsetp = offset;
15004 if (is_load)
15005 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15006 write_back ? 4 * count : 0);
15007 else
15008 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15009 write_back ? 4 * count : 0);
15013 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15014 rtx basemem, HOST_WIDE_INT *offsetp)
15016 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15017 offsetp);
15021 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15022 rtx basemem, HOST_WIDE_INT *offsetp)
15024 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15025 offsetp);
15028 /* Called from a peephole2 expander to turn a sequence of loads into an
15029 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15030 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15031 is true if we can reorder the registers because they are used commutatively
15032 subsequently.
15033 Returns true iff we could generate a new instruction. */
15035 bool
15036 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15038 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15039 rtx mems[MAX_LDM_STM_OPS];
15040 int i, j, base_reg;
15041 rtx base_reg_rtx;
15042 HOST_WIDE_INT offset;
15043 int write_back = FALSE;
15044 int ldm_case;
15045 rtx addr;
15047 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15048 &base_reg, &offset, !sort_regs);
15050 if (ldm_case == 0)
15051 return false;
15053 if (sort_regs)
15054 for (i = 0; i < nops - 1; i++)
15055 for (j = i + 1; j < nops; j++)
15056 if (regs[i] > regs[j])
15058 int t = regs[i];
15059 regs[i] = regs[j];
15060 regs[j] = t;
15062 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15064 if (TARGET_THUMB1)
15066 gcc_assert (ldm_case == 1 || ldm_case == 5);
15068 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15069 write_back = true;
15070 for (i = 0; i < nops; i++)
15071 if (base_reg == regs[i])
15072 write_back = false;
15074 /* Ensure the base is dead if it is updated. */
15075 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15076 return false;
15079 if (ldm_case == 5)
15081 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15082 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15083 offset = 0;
15084 base_reg_rtx = newbase;
15087 for (i = 0; i < nops; i++)
15089 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15090 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15091 SImode, addr, 0);
15093 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15094 write_back ? offset + i * 4 : 0));
15095 return true;
15098 /* Called from a peephole2 expander to turn a sequence of stores into an
15099 STM instruction. OPERANDS are the operands found by the peephole matcher;
15100 NOPS indicates how many separate stores we are trying to combine.
15101 Returns true iff we could generate a new instruction. */
15103 bool
15104 gen_stm_seq (rtx *operands, int nops)
15106 int i;
15107 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15108 rtx mems[MAX_LDM_STM_OPS];
15109 int base_reg;
15110 rtx base_reg_rtx;
15111 HOST_WIDE_INT offset;
15112 int write_back = FALSE;
15113 int stm_case;
15114 rtx addr;
15115 bool base_reg_dies;
15117 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15118 mem_order, &base_reg, &offset, true);
15120 if (stm_case == 0)
15121 return false;
15123 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15125 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15126 if (TARGET_THUMB1)
15128 gcc_assert (base_reg_dies);
15129 write_back = TRUE;
15132 if (stm_case == 5)
15134 gcc_assert (base_reg_dies);
15135 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15136 offset = 0;
15139 addr = plus_constant (Pmode, base_reg_rtx, offset);
15141 for (i = 0; i < nops; i++)
15143 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15144 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15145 SImode, addr, 0);
15147 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15148 write_back ? offset + i * 4 : 0));
15149 return true;
15152 /* Called from a peephole2 expander to turn a sequence of stores that are
15153 preceded by constant loads into an STM instruction. OPERANDS are the
15154 operands found by the peephole matcher; NOPS indicates how many
15155 separate stores we are trying to combine; there are 2 * NOPS
15156 instructions in the peephole.
15157 Returns true iff we could generate a new instruction. */
15159 bool
15160 gen_const_stm_seq (rtx *operands, int nops)
15162 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15163 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15164 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15165 rtx mems[MAX_LDM_STM_OPS];
15166 int base_reg;
15167 rtx base_reg_rtx;
15168 HOST_WIDE_INT offset;
15169 int write_back = FALSE;
15170 int stm_case;
15171 rtx addr;
15172 bool base_reg_dies;
15173 int i, j;
15174 HARD_REG_SET allocated;
15176 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15177 mem_order, &base_reg, &offset, false);
15179 if (stm_case == 0)
15180 return false;
15182 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15184 /* If the same register is used more than once, try to find a free
15185 register. */
15186 CLEAR_HARD_REG_SET (allocated);
15187 for (i = 0; i < nops; i++)
15189 for (j = i + 1; j < nops; j++)
15190 if (regs[i] == regs[j])
15192 rtx t = peep2_find_free_register (0, nops * 2,
15193 TARGET_THUMB1 ? "l" : "r",
15194 SImode, &allocated);
15195 if (t == NULL_RTX)
15196 return false;
15197 reg_rtxs[i] = t;
15198 regs[i] = REGNO (t);
15202 /* Compute an ordering that maps the register numbers to an ascending
15203 sequence. */
15204 reg_order[0] = 0;
15205 for (i = 0; i < nops; i++)
15206 if (regs[i] < regs[reg_order[0]])
15207 reg_order[0] = i;
15209 for (i = 1; i < nops; i++)
15211 int this_order = reg_order[i - 1];
15212 for (j = 0; j < nops; j++)
15213 if (regs[j] > regs[reg_order[i - 1]]
15214 && (this_order == reg_order[i - 1]
15215 || regs[j] < regs[this_order]))
15216 this_order = j;
15217 reg_order[i] = this_order;
15220 /* Ensure that registers that must be live after the instruction end
15221 up with the correct value. */
15222 for (i = 0; i < nops; i++)
15224 int this_order = reg_order[i];
15225 if ((this_order != mem_order[i]
15226 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15227 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15228 return false;
15231 /* Load the constants. */
15232 for (i = 0; i < nops; i++)
15234 rtx op = operands[2 * nops + mem_order[i]];
15235 sorted_regs[i] = regs[reg_order[i]];
15236 emit_move_insn (reg_rtxs[reg_order[i]], op);
15239 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15241 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15242 if (TARGET_THUMB1)
15244 gcc_assert (base_reg_dies);
15245 write_back = TRUE;
15248 if (stm_case == 5)
15250 gcc_assert (base_reg_dies);
15251 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15252 offset = 0;
15255 addr = plus_constant (Pmode, base_reg_rtx, offset);
15257 for (i = 0; i < nops; i++)
15259 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15260 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15261 SImode, addr, 0);
15263 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15264 write_back ? offset + i * 4 : 0));
15265 return true;
15268 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15269 unaligned copies on processors which support unaligned semantics for those
15270 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15271 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15272 An interleave factor of 1 (the minimum) will perform no interleaving.
15273 Load/store multiple are used for aligned addresses where possible. */
15275 static void
15276 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15277 HOST_WIDE_INT length,
15278 unsigned int interleave_factor)
15280 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15281 int *regnos = XALLOCAVEC (int, interleave_factor);
15282 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15283 HOST_WIDE_INT i, j;
15284 HOST_WIDE_INT remaining = length, words;
15285 rtx halfword_tmp = NULL, byte_tmp = NULL;
15286 rtx dst, src;
15287 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15288 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15289 HOST_WIDE_INT srcoffset, dstoffset;
15290 HOST_WIDE_INT src_autoinc, dst_autoinc;
15291 rtx mem, addr;
15293 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15295 /* Use hard registers if we have aligned source or destination so we can use
15296 load/store multiple with contiguous registers. */
15297 if (dst_aligned || src_aligned)
15298 for (i = 0; i < interleave_factor; i++)
15299 regs[i] = gen_rtx_REG (SImode, i);
15300 else
15301 for (i = 0; i < interleave_factor; i++)
15302 regs[i] = gen_reg_rtx (SImode);
15304 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15305 src = copy_addr_to_reg (XEXP (srcbase, 0));
15307 srcoffset = dstoffset = 0;
15309 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15310 For copying the last bytes we want to subtract this offset again. */
15311 src_autoinc = dst_autoinc = 0;
15313 for (i = 0; i < interleave_factor; i++)
15314 regnos[i] = i;
15316 /* Copy BLOCK_SIZE_BYTES chunks. */
15318 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15320 /* Load words. */
15321 if (src_aligned && interleave_factor > 1)
15323 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15324 TRUE, srcbase, &srcoffset));
15325 src_autoinc += UNITS_PER_WORD * interleave_factor;
15327 else
15329 for (j = 0; j < interleave_factor; j++)
15331 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15332 - src_autoinc));
15333 mem = adjust_automodify_address (srcbase, SImode, addr,
15334 srcoffset + j * UNITS_PER_WORD);
15335 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15337 srcoffset += block_size_bytes;
15340 /* Store words. */
15341 if (dst_aligned && interleave_factor > 1)
15343 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15344 TRUE, dstbase, &dstoffset));
15345 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15347 else
15349 for (j = 0; j < interleave_factor; j++)
15351 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15352 - dst_autoinc));
15353 mem = adjust_automodify_address (dstbase, SImode, addr,
15354 dstoffset + j * UNITS_PER_WORD);
15355 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15357 dstoffset += block_size_bytes;
15360 remaining -= block_size_bytes;
15363 /* Copy any whole words left (note these aren't interleaved with any
15364 subsequent halfword/byte load/stores in the interests of simplicity). */
15366 words = remaining / UNITS_PER_WORD;
15368 gcc_assert (words < interleave_factor);
15370 if (src_aligned && words > 1)
15372 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15373 &srcoffset));
15374 src_autoinc += UNITS_PER_WORD * words;
15376 else
15378 for (j = 0; j < words; j++)
15380 addr = plus_constant (Pmode, src,
15381 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15382 mem = adjust_automodify_address (srcbase, SImode, addr,
15383 srcoffset + j * UNITS_PER_WORD);
15384 if (src_aligned)
15385 emit_move_insn (regs[j], mem);
15386 else
15387 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15389 srcoffset += words * UNITS_PER_WORD;
15392 if (dst_aligned && words > 1)
15394 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15395 &dstoffset));
15396 dst_autoinc += words * UNITS_PER_WORD;
15398 else
15400 for (j = 0; j < words; j++)
15402 addr = plus_constant (Pmode, dst,
15403 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15404 mem = adjust_automodify_address (dstbase, SImode, addr,
15405 dstoffset + j * UNITS_PER_WORD);
15406 if (dst_aligned)
15407 emit_move_insn (mem, regs[j]);
15408 else
15409 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15411 dstoffset += words * UNITS_PER_WORD;
15414 remaining -= words * UNITS_PER_WORD;
15416 gcc_assert (remaining < 4);
15418 /* Copy a halfword if necessary. */
15420 if (remaining >= 2)
15422 halfword_tmp = gen_reg_rtx (SImode);
15424 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15425 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15426 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15428 /* Either write out immediately, or delay until we've loaded the last
15429 byte, depending on interleave factor. */
15430 if (interleave_factor == 1)
15432 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15433 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15434 emit_insn (gen_unaligned_storehi (mem,
15435 gen_lowpart (HImode, halfword_tmp)));
15436 halfword_tmp = NULL;
15437 dstoffset += 2;
15440 remaining -= 2;
15441 srcoffset += 2;
15444 gcc_assert (remaining < 2);
15446 /* Copy last byte. */
15448 if ((remaining & 1) != 0)
15450 byte_tmp = gen_reg_rtx (SImode);
15452 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15453 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15454 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15456 if (interleave_factor == 1)
15458 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15459 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15460 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15461 byte_tmp = NULL;
15462 dstoffset++;
15465 remaining--;
15466 srcoffset++;
15469 /* Store last halfword if we haven't done so already. */
15471 if (halfword_tmp)
15473 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15474 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15475 emit_insn (gen_unaligned_storehi (mem,
15476 gen_lowpart (HImode, halfword_tmp)));
15477 dstoffset += 2;
15480 /* Likewise for last byte. */
15482 if (byte_tmp)
15484 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15485 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15486 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15487 dstoffset++;
15490 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15493 /* From mips_adjust_block_mem:
15495 Helper function for doing a loop-based block operation on memory
15496 reference MEM. Each iteration of the loop will operate on LENGTH
15497 bytes of MEM.
15499 Create a new base register for use within the loop and point it to
15500 the start of MEM. Create a new memory reference that uses this
15501 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15503 static void
15504 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15505 rtx *loop_mem)
15507 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15509 /* Although the new mem does not refer to a known location,
15510 it does keep up to LENGTH bytes of alignment. */
15511 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15512 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15515 /* From mips_block_move_loop:
15517 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15518 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15519 the memory regions do not overlap. */
15521 static void
15522 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15523 unsigned int interleave_factor,
15524 HOST_WIDE_INT bytes_per_iter)
15526 rtx src_reg, dest_reg, final_src, test;
15527 HOST_WIDE_INT leftover;
15529 leftover = length % bytes_per_iter;
15530 length -= leftover;
15532 /* Create registers and memory references for use within the loop. */
15533 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15534 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15536 /* Calculate the value that SRC_REG should have after the last iteration of
15537 the loop. */
15538 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15539 0, 0, OPTAB_WIDEN);
15541 /* Emit the start of the loop. */
15542 rtx_code_label *label = gen_label_rtx ();
15543 emit_label (label);
15545 /* Emit the loop body. */
15546 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15547 interleave_factor);
15549 /* Move on to the next block. */
15550 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15551 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15553 /* Emit the loop condition. */
15554 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15555 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15557 /* Mop up any left-over bytes. */
15558 if (leftover)
15559 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15562 /* Emit a block move when either the source or destination is unaligned (not
15563 aligned to a four-byte boundary). This may need further tuning depending on
15564 core type, optimize_size setting, etc. */
15566 static int
15567 arm_cpymemqi_unaligned (rtx *operands)
15569 HOST_WIDE_INT length = INTVAL (operands[2]);
15571 if (optimize_size)
15573 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15574 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15575 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15576 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15577 or dst_aligned though: allow more interleaving in those cases since the
15578 resulting code can be smaller. */
15579 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15580 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15582 if (length > 12)
15583 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15584 interleave_factor, bytes_per_iter);
15585 else
15586 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15587 interleave_factor);
15589 else
15591 /* Note that the loop created by arm_block_move_unaligned_loop may be
15592 subject to loop unrolling, which makes tuning this condition a little
15593 redundant. */
15594 if (length > 32)
15595 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15596 else
15597 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15600 return 1;
15604 arm_gen_cpymemqi (rtx *operands)
15606 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15607 HOST_WIDE_INT srcoffset, dstoffset;
15608 rtx src, dst, srcbase, dstbase;
15609 rtx part_bytes_reg = NULL;
15610 rtx mem;
15612 if (!CONST_INT_P (operands[2])
15613 || !CONST_INT_P (operands[3])
15614 || INTVAL (operands[2]) > 64)
15615 return 0;
15617 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15618 return arm_cpymemqi_unaligned (operands);
15620 if (INTVAL (operands[3]) & 3)
15621 return 0;
15623 dstbase = operands[0];
15624 srcbase = operands[1];
15626 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15627 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15629 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15630 out_words_to_go = INTVAL (operands[2]) / 4;
15631 last_bytes = INTVAL (operands[2]) & 3;
15632 dstoffset = srcoffset = 0;
15634 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15635 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15637 while (in_words_to_go >= 2)
15639 if (in_words_to_go > 4)
15640 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15641 TRUE, srcbase, &srcoffset));
15642 else
15643 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15644 src, FALSE, srcbase,
15645 &srcoffset));
15647 if (out_words_to_go)
15649 if (out_words_to_go > 4)
15650 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15651 TRUE, dstbase, &dstoffset));
15652 else if (out_words_to_go != 1)
15653 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15654 out_words_to_go, dst,
15655 (last_bytes == 0
15656 ? FALSE : TRUE),
15657 dstbase, &dstoffset));
15658 else
15660 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15661 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15662 if (last_bytes != 0)
15664 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15665 dstoffset += 4;
15670 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15671 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15674 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15675 if (out_words_to_go)
15677 rtx sreg;
15679 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15680 sreg = copy_to_reg (mem);
15682 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15683 emit_move_insn (mem, sreg);
15684 in_words_to_go--;
15686 gcc_assert (!in_words_to_go); /* Sanity check */
15689 if (in_words_to_go)
15691 gcc_assert (in_words_to_go > 0);
15693 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15694 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15697 gcc_assert (!last_bytes || part_bytes_reg);
15699 if (BYTES_BIG_ENDIAN && last_bytes)
15701 rtx tmp = gen_reg_rtx (SImode);
15703 /* The bytes we want are in the top end of the word. */
15704 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15705 GEN_INT (8 * (4 - last_bytes))));
15706 part_bytes_reg = tmp;
15708 while (last_bytes)
15710 mem = adjust_automodify_address (dstbase, QImode,
15711 plus_constant (Pmode, dst,
15712 last_bytes - 1),
15713 dstoffset + last_bytes - 1);
15714 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15716 if (--last_bytes)
15718 tmp = gen_reg_rtx (SImode);
15719 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15720 part_bytes_reg = tmp;
15725 else
15727 if (last_bytes > 1)
15729 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15730 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15731 last_bytes -= 2;
15732 if (last_bytes)
15734 rtx tmp = gen_reg_rtx (SImode);
15735 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15736 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15737 part_bytes_reg = tmp;
15738 dstoffset += 2;
15742 if (last_bytes)
15744 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15745 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15749 return 1;
15752 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15753 by mode size. */
15754 inline static rtx
15755 next_consecutive_mem (rtx mem)
15757 machine_mode mode = GET_MODE (mem);
15758 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15759 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15761 return adjust_automodify_address (mem, mode, addr, offset);
15764 /* Copy using LDRD/STRD instructions whenever possible.
15765 Returns true upon success. */
15766 bool
15767 gen_cpymem_ldrd_strd (rtx *operands)
15769 unsigned HOST_WIDE_INT len;
15770 HOST_WIDE_INT align;
15771 rtx src, dst, base;
15772 rtx reg0;
15773 bool src_aligned, dst_aligned;
15774 bool src_volatile, dst_volatile;
15776 gcc_assert (CONST_INT_P (operands[2]));
15777 gcc_assert (CONST_INT_P (operands[3]));
15779 len = UINTVAL (operands[2]);
15780 if (len > 64)
15781 return false;
15783 /* Maximum alignment we can assume for both src and dst buffers. */
15784 align = INTVAL (operands[3]);
15786 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15787 return false;
15789 /* Place src and dst addresses in registers
15790 and update the corresponding mem rtx. */
15791 dst = operands[0];
15792 dst_volatile = MEM_VOLATILE_P (dst);
15793 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15794 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15795 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15797 src = operands[1];
15798 src_volatile = MEM_VOLATILE_P (src);
15799 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15800 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15801 src = adjust_automodify_address (src, VOIDmode, base, 0);
15803 if (!unaligned_access && !(src_aligned && dst_aligned))
15804 return false;
15806 if (src_volatile || dst_volatile)
15807 return false;
15809 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15810 if (!(dst_aligned || src_aligned))
15811 return arm_gen_cpymemqi (operands);
15813 /* If the either src or dst is unaligned we'll be accessing it as pairs
15814 of unaligned SImode accesses. Otherwise we can generate DImode
15815 ldrd/strd instructions. */
15816 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15817 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15819 while (len >= 8)
15821 len -= 8;
15822 reg0 = gen_reg_rtx (DImode);
15823 rtx first_reg = NULL_RTX;
15824 rtx second_reg = NULL_RTX;
15826 if (!src_aligned || !dst_aligned)
15828 if (BYTES_BIG_ENDIAN)
15830 second_reg = gen_lowpart (SImode, reg0);
15831 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15833 else
15835 first_reg = gen_lowpart (SImode, reg0);
15836 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15839 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15840 emit_move_insn (reg0, src);
15841 else if (src_aligned)
15842 emit_insn (gen_unaligned_loaddi (reg0, src));
15843 else
15845 emit_insn (gen_unaligned_loadsi (first_reg, src));
15846 src = next_consecutive_mem (src);
15847 emit_insn (gen_unaligned_loadsi (second_reg, src));
15850 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15851 emit_move_insn (dst, reg0);
15852 else if (dst_aligned)
15853 emit_insn (gen_unaligned_storedi (dst, reg0));
15854 else
15856 emit_insn (gen_unaligned_storesi (dst, first_reg));
15857 dst = next_consecutive_mem (dst);
15858 emit_insn (gen_unaligned_storesi (dst, second_reg));
15861 src = next_consecutive_mem (src);
15862 dst = next_consecutive_mem (dst);
15865 gcc_assert (len < 8);
15866 if (len >= 4)
15868 /* More than a word but less than a double-word to copy. Copy a word. */
15869 reg0 = gen_reg_rtx (SImode);
15870 src = adjust_address (src, SImode, 0);
15871 dst = adjust_address (dst, SImode, 0);
15872 if (src_aligned)
15873 emit_move_insn (reg0, src);
15874 else
15875 emit_insn (gen_unaligned_loadsi (reg0, src));
15877 if (dst_aligned)
15878 emit_move_insn (dst, reg0);
15879 else
15880 emit_insn (gen_unaligned_storesi (dst, reg0));
15882 src = next_consecutive_mem (src);
15883 dst = next_consecutive_mem (dst);
15884 len -= 4;
15887 if (len == 0)
15888 return true;
15890 /* Copy the remaining bytes. */
15891 if (len >= 2)
15893 dst = adjust_address (dst, HImode, 0);
15894 src = adjust_address (src, HImode, 0);
15895 reg0 = gen_reg_rtx (SImode);
15896 if (src_aligned)
15897 emit_insn (gen_zero_extendhisi2 (reg0, src));
15898 else
15899 emit_insn (gen_unaligned_loadhiu (reg0, src));
15901 if (dst_aligned)
15902 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15903 else
15904 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15906 src = next_consecutive_mem (src);
15907 dst = next_consecutive_mem (dst);
15908 if (len == 2)
15909 return true;
15912 dst = adjust_address (dst, QImode, 0);
15913 src = adjust_address (src, QImode, 0);
15914 reg0 = gen_reg_rtx (QImode);
15915 emit_move_insn (reg0, src);
15916 emit_move_insn (dst, reg0);
15917 return true;
15920 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15921 into its component 32-bit subregs. OP2 may be an immediate
15922 constant and we want to simplify it in that case. */
15923 void
15924 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15925 rtx *lo_op2, rtx *hi_op2)
15927 *lo_op1 = gen_lowpart (SImode, op1);
15928 *hi_op1 = gen_highpart (SImode, op1);
15929 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15930 subreg_lowpart_offset (SImode, DImode));
15931 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15932 subreg_highpart_offset (SImode, DImode));
15935 /* Select a dominance comparison mode if possible for a test of the general
15936 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15937 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15938 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15939 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15940 In all cases OP will be either EQ or NE, but we don't need to know which
15941 here. If we are unable to support a dominance comparison we return
15942 CC mode. This will then fail to match for the RTL expressions that
15943 generate this call. */
15944 machine_mode
15945 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15947 enum rtx_code cond1, cond2;
15948 int swapped = 0;
15950 /* Currently we will probably get the wrong result if the individual
15951 comparisons are not simple. This also ensures that it is safe to
15952 reverse a comparison if necessary. */
15953 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15954 != CCmode)
15955 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15956 != CCmode))
15957 return CCmode;
15959 /* The if_then_else variant of this tests the second condition if the
15960 first passes, but is true if the first fails. Reverse the first
15961 condition to get a true "inclusive-or" expression. */
15962 if (cond_or == DOM_CC_NX_OR_Y)
15963 cond1 = reverse_condition (cond1);
15965 /* If the comparisons are not equal, and one doesn't dominate the other,
15966 then we can't do this. */
15967 if (cond1 != cond2
15968 && !comparison_dominates_p (cond1, cond2)
15969 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15970 return CCmode;
15972 if (swapped)
15973 std::swap (cond1, cond2);
15975 switch (cond1)
15977 case EQ:
15978 if (cond_or == DOM_CC_X_AND_Y)
15979 return CC_DEQmode;
15981 switch (cond2)
15983 case EQ: return CC_DEQmode;
15984 case LE: return CC_DLEmode;
15985 case LEU: return CC_DLEUmode;
15986 case GE: return CC_DGEmode;
15987 case GEU: return CC_DGEUmode;
15988 default: gcc_unreachable ();
15991 case LT:
15992 if (cond_or == DOM_CC_X_AND_Y)
15993 return CC_DLTmode;
15995 switch (cond2)
15997 case LT:
15998 return CC_DLTmode;
15999 case LE:
16000 return CC_DLEmode;
16001 case NE:
16002 return CC_DNEmode;
16003 default:
16004 gcc_unreachable ();
16007 case GT:
16008 if (cond_or == DOM_CC_X_AND_Y)
16009 return CC_DGTmode;
16011 switch (cond2)
16013 case GT:
16014 return CC_DGTmode;
16015 case GE:
16016 return CC_DGEmode;
16017 case NE:
16018 return CC_DNEmode;
16019 default:
16020 gcc_unreachable ();
16023 case LTU:
16024 if (cond_or == DOM_CC_X_AND_Y)
16025 return CC_DLTUmode;
16027 switch (cond2)
16029 case LTU:
16030 return CC_DLTUmode;
16031 case LEU:
16032 return CC_DLEUmode;
16033 case NE:
16034 return CC_DNEmode;
16035 default:
16036 gcc_unreachable ();
16039 case GTU:
16040 if (cond_or == DOM_CC_X_AND_Y)
16041 return CC_DGTUmode;
16043 switch (cond2)
16045 case GTU:
16046 return CC_DGTUmode;
16047 case GEU:
16048 return CC_DGEUmode;
16049 case NE:
16050 return CC_DNEmode;
16051 default:
16052 gcc_unreachable ();
16055 /* The remaining cases only occur when both comparisons are the
16056 same. */
16057 case NE:
16058 gcc_assert (cond1 == cond2);
16059 return CC_DNEmode;
16061 case LE:
16062 gcc_assert (cond1 == cond2);
16063 return CC_DLEmode;
16065 case GE:
16066 gcc_assert (cond1 == cond2);
16067 return CC_DGEmode;
16069 case LEU:
16070 gcc_assert (cond1 == cond2);
16071 return CC_DLEUmode;
16073 case GEU:
16074 gcc_assert (cond1 == cond2);
16075 return CC_DGEUmode;
16077 default:
16078 gcc_unreachable ();
16082 machine_mode
16083 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16085 /* All floating point compares return CCFP if it is an equality
16086 comparison, and CCFPE otherwise. */
16087 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16089 switch (op)
16091 case EQ:
16092 case NE:
16093 case UNORDERED:
16094 case ORDERED:
16095 case UNLT:
16096 case UNLE:
16097 case UNGT:
16098 case UNGE:
16099 case UNEQ:
16100 case LTGT:
16101 return CCFPmode;
16103 case LT:
16104 case LE:
16105 case GT:
16106 case GE:
16107 return CCFPEmode;
16109 default:
16110 gcc_unreachable ();
16114 /* A compare with a shifted operand. Because of canonicalization, the
16115 comparison will have to be swapped when we emit the assembler. */
16116 if (GET_MODE (y) == SImode
16117 && (REG_P (y) || (SUBREG_P (y)))
16118 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16119 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16120 || GET_CODE (x) == ROTATERT))
16121 return CC_SWPmode;
16123 /* A widened compare of the sum of a value plus a carry against a
16124 constant. This is a representation of RSC. We want to swap the
16125 result of the comparison at output. Not valid if the Z bit is
16126 needed. */
16127 if (GET_MODE (x) == DImode
16128 && GET_CODE (x) == PLUS
16129 && arm_borrow_operation (XEXP (x, 1), DImode)
16130 && CONST_INT_P (y)
16131 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16132 && (op == LE || op == GT))
16133 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16134 && (op == LEU || op == GTU))))
16135 return CC_SWPmode;
16137 /* If X is a constant we want to use CC_RSBmode. This is
16138 non-canonical, but arm_gen_compare_reg uses this to generate the
16139 correct canonical form. */
16140 if (GET_MODE (y) == SImode
16141 && (REG_P (y) || SUBREG_P (y))
16142 && CONST_INT_P (x))
16143 return CC_RSBmode;
16145 /* This operation is performed swapped, but since we only rely on the Z
16146 flag we don't need an additional mode. */
16147 if (GET_MODE (y) == SImode
16148 && (REG_P (y) || (SUBREG_P (y)))
16149 && GET_CODE (x) == NEG
16150 && (op == EQ || op == NE))
16151 return CC_Zmode;
16153 /* This is a special case that is used by combine to allow a
16154 comparison of a shifted byte load to be split into a zero-extend
16155 followed by a comparison of the shifted integer (only valid for
16156 equalities and unsigned inequalities). */
16157 if (GET_MODE (x) == SImode
16158 && GET_CODE (x) == ASHIFT
16159 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16160 && GET_CODE (XEXP (x, 0)) == SUBREG
16161 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16162 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16163 && (op == EQ || op == NE
16164 || op == GEU || op == GTU || op == LTU || op == LEU)
16165 && CONST_INT_P (y))
16166 return CC_Zmode;
16168 /* A construct for a conditional compare, if the false arm contains
16169 0, then both conditions must be true, otherwise either condition
16170 must be true. Not all conditions are possible, so CCmode is
16171 returned if it can't be done. */
16172 if (GET_CODE (x) == IF_THEN_ELSE
16173 && (XEXP (x, 2) == const0_rtx
16174 || XEXP (x, 2) == const1_rtx)
16175 && COMPARISON_P (XEXP (x, 0))
16176 && COMPARISON_P (XEXP (x, 1)))
16177 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16178 INTVAL (XEXP (x, 2)));
16180 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16181 if (GET_CODE (x) == AND
16182 && (op == EQ || op == NE)
16183 && COMPARISON_P (XEXP (x, 0))
16184 && COMPARISON_P (XEXP (x, 1)))
16185 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16186 DOM_CC_X_AND_Y);
16188 if (GET_CODE (x) == IOR
16189 && (op == EQ || op == NE)
16190 && COMPARISON_P (XEXP (x, 0))
16191 && COMPARISON_P (XEXP (x, 1)))
16192 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16193 DOM_CC_X_OR_Y);
16195 /* An operation (on Thumb) where we want to test for a single bit.
16196 This is done by shifting that bit up into the top bit of a
16197 scratch register; we can then branch on the sign bit. */
16198 if (TARGET_THUMB1
16199 && GET_MODE (x) == SImode
16200 && (op == EQ || op == NE)
16201 && GET_CODE (x) == ZERO_EXTRACT
16202 && XEXP (x, 1) == const1_rtx)
16203 return CC_Nmode;
16205 /* An operation that sets the condition codes as a side-effect, the
16206 V flag is not set correctly, so we can only use comparisons where
16207 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16208 instead.) */
16209 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16210 if (GET_MODE (x) == SImode
16211 && y == const0_rtx
16212 && (op == EQ || op == NE || op == LT || op == GE)
16213 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16214 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16215 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16216 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16217 || GET_CODE (x) == LSHIFTRT
16218 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16219 || GET_CODE (x) == ROTATERT
16220 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16221 return CC_NZmode;
16223 /* A comparison of ~reg with a const is really a special
16224 canoncialization of compare (~const, reg), which is a reverse
16225 subtract operation. We may not get here if CONST is 0, but that
16226 doesn't matter because ~0 isn't a valid immediate for RSB. */
16227 if (GET_MODE (x) == SImode
16228 && GET_CODE (x) == NOT
16229 && CONST_INT_P (y))
16230 return CC_RSBmode;
16232 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16233 return CC_Zmode;
16235 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16236 && GET_CODE (x) == PLUS
16237 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16238 return CC_Cmode;
16240 if (GET_MODE (x) == DImode
16241 && GET_CODE (x) == PLUS
16242 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16243 && CONST_INT_P (y)
16244 && UINTVAL (y) == 0x800000000
16245 && (op == GEU || op == LTU))
16246 return CC_ADCmode;
16248 if (GET_MODE (x) == DImode
16249 && (op == GE || op == LT)
16250 && GET_CODE (x) == SIGN_EXTEND
16251 && ((GET_CODE (y) == PLUS
16252 && arm_borrow_operation (XEXP (y, 0), DImode))
16253 || arm_borrow_operation (y, DImode)))
16254 return CC_NVmode;
16256 if (GET_MODE (x) == DImode
16257 && (op == GEU || op == LTU)
16258 && GET_CODE (x) == ZERO_EXTEND
16259 && ((GET_CODE (y) == PLUS
16260 && arm_borrow_operation (XEXP (y, 0), DImode))
16261 || arm_borrow_operation (y, DImode)))
16262 return CC_Bmode;
16264 if (GET_MODE (x) == DImode
16265 && (op == EQ || op == NE)
16266 && (GET_CODE (x) == PLUS
16267 || GET_CODE (x) == MINUS)
16268 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16269 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16270 && GET_CODE (y) == SIGN_EXTEND
16271 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16272 return CC_Vmode;
16274 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16275 return GET_MODE (x);
16277 return CCmode;
16280 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16281 the sequence of instructions needed to generate a suitable condition
16282 code register. Return the CC register result. */
16283 static rtx
16284 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16286 machine_mode mode;
16287 rtx cc_reg;
16289 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16290 gcc_assert (TARGET_32BIT);
16291 gcc_assert (!CONST_INT_P (x));
16293 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16294 subreg_lowpart_offset (SImode, DImode));
16295 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16296 subreg_highpart_offset (SImode, DImode));
16297 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16298 subreg_lowpart_offset (SImode, DImode));
16299 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16300 subreg_highpart_offset (SImode, DImode));
16301 switch (code)
16303 case EQ:
16304 case NE:
16306 if (y_lo == const0_rtx || y_hi == const0_rtx)
16308 if (y_lo != const0_rtx)
16310 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16312 gcc_assert (y_hi == const0_rtx);
16313 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16314 if (!arm_add_operand (y_lo, SImode))
16315 y_lo = force_reg (SImode, y_lo);
16316 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16317 x_lo = scratch2;
16319 else if (y_hi != const0_rtx)
16321 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16323 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16324 if (!arm_add_operand (y_hi, SImode))
16325 y_hi = force_reg (SImode, y_hi);
16326 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16327 x_hi = scratch2;
16330 if (!scratch)
16332 gcc_assert (!reload_completed);
16333 scratch = gen_rtx_SCRATCH (SImode);
16336 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16337 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16339 rtx set
16340 = gen_rtx_SET (cc_reg,
16341 gen_rtx_COMPARE (CC_NZmode,
16342 gen_rtx_IOR (SImode, x_lo, x_hi),
16343 const0_rtx));
16344 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16345 clobber)));
16346 return cc_reg;
16349 if (!arm_add_operand (y_lo, SImode))
16350 y_lo = force_reg (SImode, y_lo);
16352 if (!arm_add_operand (y_hi, SImode))
16353 y_hi = force_reg (SImode, y_hi);
16355 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16356 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16357 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16358 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16359 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16361 emit_insn (gen_rtx_SET (cc_reg,
16362 gen_rtx_COMPARE (mode, conjunction,
16363 const0_rtx)));
16364 return cc_reg;
16367 case LT:
16368 case GE:
16370 if (y_lo == const0_rtx)
16372 /* If the low word of y is 0, then this is simply a normal
16373 compare of the upper words. */
16374 if (!arm_add_operand (y_hi, SImode))
16375 y_hi = force_reg (SImode, y_hi);
16377 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16380 if (!arm_add_operand (y_lo, SImode))
16381 y_lo = force_reg (SImode, y_lo);
16383 rtx cmp1
16384 = gen_rtx_LTU (DImode,
16385 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16386 const0_rtx);
16388 if (!scratch)
16389 scratch = gen_rtx_SCRATCH (SImode);
16391 if (!arm_not_operand (y_hi, SImode))
16392 y_hi = force_reg (SImode, y_hi);
16394 rtx_insn *insn;
16395 if (y_hi == const0_rtx)
16396 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16397 cmp1));
16398 else if (CONST_INT_P (y_hi))
16399 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16400 y_hi, cmp1));
16401 else
16402 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16403 cmp1));
16404 return SET_DEST (single_set (insn));
16407 case LE:
16408 case GT:
16410 /* During expansion, we only expect to get here if y is a
16411 constant that we want to handle, otherwise we should have
16412 swapped the operands already. */
16413 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16415 if (!const_ok_for_arm (INTVAL (y_lo)))
16416 y_lo = force_reg (SImode, y_lo);
16418 /* Perform a reverse subtract and compare. */
16419 rtx cmp1
16420 = gen_rtx_LTU (DImode,
16421 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16422 const0_rtx);
16423 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16424 x_hi, cmp1));
16425 return SET_DEST (single_set (insn));
16428 case LTU:
16429 case GEU:
16431 if (y_lo == const0_rtx)
16433 /* If the low word of y is 0, then this is simply a normal
16434 compare of the upper words. */
16435 if (!arm_add_operand (y_hi, SImode))
16436 y_hi = force_reg (SImode, y_hi);
16438 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16441 if (!arm_add_operand (y_lo, SImode))
16442 y_lo = force_reg (SImode, y_lo);
16444 rtx cmp1
16445 = gen_rtx_LTU (DImode,
16446 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16447 const0_rtx);
16449 if (!scratch)
16450 scratch = gen_rtx_SCRATCH (SImode);
16451 if (!arm_not_operand (y_hi, SImode))
16452 y_hi = force_reg (SImode, y_hi);
16454 rtx_insn *insn;
16455 if (y_hi == const0_rtx)
16456 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16457 cmp1));
16458 else if (CONST_INT_P (y_hi))
16460 /* Constant is viewed as unsigned when zero-extended. */
16461 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16462 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16463 y_hi, cmp1));
16465 else
16466 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16467 cmp1));
16468 return SET_DEST (single_set (insn));
16471 case LEU:
16472 case GTU:
16474 /* During expansion, we only expect to get here if y is a
16475 constant that we want to handle, otherwise we should have
16476 swapped the operands already. */
16477 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16479 if (!const_ok_for_arm (INTVAL (y_lo)))
16480 y_lo = force_reg (SImode, y_lo);
16482 /* Perform a reverse subtract and compare. */
16483 rtx cmp1
16484 = gen_rtx_LTU (DImode,
16485 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16486 const0_rtx);
16487 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16488 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16489 x_hi, cmp1));
16490 return SET_DEST (single_set (insn));
16493 default:
16494 gcc_unreachable ();
16498 /* X and Y are two things to compare using CODE. Emit the compare insn and
16499 return the rtx for register 0 in the proper mode. */
16501 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16503 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16504 return arm_gen_dicompare_reg (code, x, y, scratch);
16506 machine_mode mode = SELECT_CC_MODE (code, x, y);
16507 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16508 if (mode == CC_RSBmode)
16510 if (!scratch)
16511 scratch = gen_rtx_SCRATCH (SImode);
16512 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16513 GEN_INT (~UINTVAL (x)), y));
16515 else
16516 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16518 return cc_reg;
16521 /* Generate a sequence of insns that will generate the correct return
16522 address mask depending on the physical architecture that the program
16523 is running on. */
16525 arm_gen_return_addr_mask (void)
16527 rtx reg = gen_reg_rtx (Pmode);
16529 emit_insn (gen_return_addr_mask (reg));
16530 return reg;
16533 void
16534 arm_reload_in_hi (rtx *operands)
16536 rtx ref = operands[1];
16537 rtx base, scratch;
16538 HOST_WIDE_INT offset = 0;
16540 if (SUBREG_P (ref))
16542 offset = SUBREG_BYTE (ref);
16543 ref = SUBREG_REG (ref);
16546 if (REG_P (ref))
16548 /* We have a pseudo which has been spilt onto the stack; there
16549 are two cases here: the first where there is a simple
16550 stack-slot replacement and a second where the stack-slot is
16551 out of range, or is used as a subreg. */
16552 if (reg_equiv_mem (REGNO (ref)))
16554 ref = reg_equiv_mem (REGNO (ref));
16555 base = find_replacement (&XEXP (ref, 0));
16557 else
16558 /* The slot is out of range, or was dressed up in a SUBREG. */
16559 base = reg_equiv_address (REGNO (ref));
16561 /* PR 62554: If there is no equivalent memory location then just move
16562 the value as an SImode register move. This happens when the target
16563 architecture variant does not have an HImode register move. */
16564 if (base == NULL)
16566 gcc_assert (REG_P (operands[0]));
16567 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16568 gen_rtx_SUBREG (SImode, ref, 0)));
16569 return;
16572 else
16573 base = find_replacement (&XEXP (ref, 0));
16575 /* Handle the case where the address is too complex to be offset by 1. */
16576 if (GET_CODE (base) == MINUS
16577 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16579 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16581 emit_set_insn (base_plus, base);
16582 base = base_plus;
16584 else if (GET_CODE (base) == PLUS)
16586 /* The addend must be CONST_INT, or we would have dealt with it above. */
16587 HOST_WIDE_INT hi, lo;
16589 offset += INTVAL (XEXP (base, 1));
16590 base = XEXP (base, 0);
16592 /* Rework the address into a legal sequence of insns. */
16593 /* Valid range for lo is -4095 -> 4095 */
16594 lo = (offset >= 0
16595 ? (offset & 0xfff)
16596 : -((-offset) & 0xfff));
16598 /* Corner case, if lo is the max offset then we would be out of range
16599 once we have added the additional 1 below, so bump the msb into the
16600 pre-loading insn(s). */
16601 if (lo == 4095)
16602 lo &= 0x7ff;
16604 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16605 ^ (HOST_WIDE_INT) 0x80000000)
16606 - (HOST_WIDE_INT) 0x80000000);
16608 gcc_assert (hi + lo == offset);
16610 if (hi != 0)
16612 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16614 /* Get the base address; addsi3 knows how to handle constants
16615 that require more than one insn. */
16616 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16617 base = base_plus;
16618 offset = lo;
16622 /* Operands[2] may overlap operands[0] (though it won't overlap
16623 operands[1]), that's why we asked for a DImode reg -- so we can
16624 use the bit that does not overlap. */
16625 if (REGNO (operands[2]) == REGNO (operands[0]))
16626 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16627 else
16628 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16630 emit_insn (gen_zero_extendqisi2 (scratch,
16631 gen_rtx_MEM (QImode,
16632 plus_constant (Pmode, base,
16633 offset))));
16634 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16635 gen_rtx_MEM (QImode,
16636 plus_constant (Pmode, base,
16637 offset + 1))));
16638 if (!BYTES_BIG_ENDIAN)
16639 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16640 gen_rtx_IOR (SImode,
16641 gen_rtx_ASHIFT
16642 (SImode,
16643 gen_rtx_SUBREG (SImode, operands[0], 0),
16644 GEN_INT (8)),
16645 scratch));
16646 else
16647 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16648 gen_rtx_IOR (SImode,
16649 gen_rtx_ASHIFT (SImode, scratch,
16650 GEN_INT (8)),
16651 gen_rtx_SUBREG (SImode, operands[0], 0)));
16654 /* Handle storing a half-word to memory during reload by synthesizing as two
16655 byte stores. Take care not to clobber the input values until after we
16656 have moved them somewhere safe. This code assumes that if the DImode
16657 scratch in operands[2] overlaps either the input value or output address
16658 in some way, then that value must die in this insn (we absolutely need
16659 two scratch registers for some corner cases). */
16660 void
16661 arm_reload_out_hi (rtx *operands)
16663 rtx ref = operands[0];
16664 rtx outval = operands[1];
16665 rtx base, scratch;
16666 HOST_WIDE_INT offset = 0;
16668 if (SUBREG_P (ref))
16670 offset = SUBREG_BYTE (ref);
16671 ref = SUBREG_REG (ref);
16674 if (REG_P (ref))
16676 /* We have a pseudo which has been spilt onto the stack; there
16677 are two cases here: the first where there is a simple
16678 stack-slot replacement and a second where the stack-slot is
16679 out of range, or is used as a subreg. */
16680 if (reg_equiv_mem (REGNO (ref)))
16682 ref = reg_equiv_mem (REGNO (ref));
16683 base = find_replacement (&XEXP (ref, 0));
16685 else
16686 /* The slot is out of range, or was dressed up in a SUBREG. */
16687 base = reg_equiv_address (REGNO (ref));
16689 /* PR 62254: If there is no equivalent memory location then just move
16690 the value as an SImode register move. This happens when the target
16691 architecture variant does not have an HImode register move. */
16692 if (base == NULL)
16694 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16696 if (REG_P (outval))
16698 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16699 gen_rtx_SUBREG (SImode, outval, 0)));
16701 else /* SUBREG_P (outval) */
16703 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16704 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16705 SUBREG_REG (outval)));
16706 else
16707 /* FIXME: Handle other cases ? */
16708 gcc_unreachable ();
16710 return;
16713 else
16714 base = find_replacement (&XEXP (ref, 0));
16716 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16718 /* Handle the case where the address is too complex to be offset by 1. */
16719 if (GET_CODE (base) == MINUS
16720 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16722 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16724 /* Be careful not to destroy OUTVAL. */
16725 if (reg_overlap_mentioned_p (base_plus, outval))
16727 /* Updating base_plus might destroy outval, see if we can
16728 swap the scratch and base_plus. */
16729 if (!reg_overlap_mentioned_p (scratch, outval))
16730 std::swap (scratch, base_plus);
16731 else
16733 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16735 /* Be conservative and copy OUTVAL into the scratch now,
16736 this should only be necessary if outval is a subreg
16737 of something larger than a word. */
16738 /* XXX Might this clobber base? I can't see how it can,
16739 since scratch is known to overlap with OUTVAL, and
16740 must be wider than a word. */
16741 emit_insn (gen_movhi (scratch_hi, outval));
16742 outval = scratch_hi;
16746 emit_set_insn (base_plus, base);
16747 base = base_plus;
16749 else if (GET_CODE (base) == PLUS)
16751 /* The addend must be CONST_INT, or we would have dealt with it above. */
16752 HOST_WIDE_INT hi, lo;
16754 offset += INTVAL (XEXP (base, 1));
16755 base = XEXP (base, 0);
16757 /* Rework the address into a legal sequence of insns. */
16758 /* Valid range for lo is -4095 -> 4095 */
16759 lo = (offset >= 0
16760 ? (offset & 0xfff)
16761 : -((-offset) & 0xfff));
16763 /* Corner case, if lo is the max offset then we would be out of range
16764 once we have added the additional 1 below, so bump the msb into the
16765 pre-loading insn(s). */
16766 if (lo == 4095)
16767 lo &= 0x7ff;
16769 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16770 ^ (HOST_WIDE_INT) 0x80000000)
16771 - (HOST_WIDE_INT) 0x80000000);
16773 gcc_assert (hi + lo == offset);
16775 if (hi != 0)
16777 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16779 /* Be careful not to destroy OUTVAL. */
16780 if (reg_overlap_mentioned_p (base_plus, outval))
16782 /* Updating base_plus might destroy outval, see if we
16783 can swap the scratch and base_plus. */
16784 if (!reg_overlap_mentioned_p (scratch, outval))
16785 std::swap (scratch, base_plus);
16786 else
16788 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16790 /* Be conservative and copy outval into scratch now,
16791 this should only be necessary if outval is a
16792 subreg of something larger than a word. */
16793 /* XXX Might this clobber base? I can't see how it
16794 can, since scratch is known to overlap with
16795 outval. */
16796 emit_insn (gen_movhi (scratch_hi, outval));
16797 outval = scratch_hi;
16801 /* Get the base address; addsi3 knows how to handle constants
16802 that require more than one insn. */
16803 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16804 base = base_plus;
16805 offset = lo;
16809 if (BYTES_BIG_ENDIAN)
16811 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16812 plus_constant (Pmode, base,
16813 offset + 1)),
16814 gen_lowpart (QImode, outval)));
16815 emit_insn (gen_lshrsi3 (scratch,
16816 gen_rtx_SUBREG (SImode, outval, 0),
16817 GEN_INT (8)));
16818 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16819 offset)),
16820 gen_lowpart (QImode, scratch)));
16822 else
16824 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16825 offset)),
16826 gen_lowpart (QImode, outval)));
16827 emit_insn (gen_lshrsi3 (scratch,
16828 gen_rtx_SUBREG (SImode, outval, 0),
16829 GEN_INT (8)));
16830 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16831 plus_constant (Pmode, base,
16832 offset + 1)),
16833 gen_lowpart (QImode, scratch)));
16837 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16838 (padded to the size of a word) should be passed in a register. */
16840 static bool
16841 arm_must_pass_in_stack (const function_arg_info &arg)
16843 if (TARGET_AAPCS_BASED)
16844 return must_pass_in_stack_var_size (arg);
16845 else
16846 return must_pass_in_stack_var_size_or_pad (arg);
16850 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16851 byte of a stack argument has useful data. For legacy APCS ABIs we use
16852 the default. For AAPCS based ABIs small aggregate types are placed
16853 in the lowest memory address. */
16855 static pad_direction
16856 arm_function_arg_padding (machine_mode mode, const_tree type)
16858 if (!TARGET_AAPCS_BASED)
16859 return default_function_arg_padding (mode, type);
16861 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16862 return PAD_DOWNWARD;
16864 return PAD_UPWARD;
16868 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16869 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16870 register has useful data, and return the opposite if the most
16871 significant byte does. */
16873 bool
16874 arm_pad_reg_upward (machine_mode mode,
16875 tree type, int first ATTRIBUTE_UNUSED)
16877 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16879 /* For AAPCS, small aggregates, small fixed-point types,
16880 and small complex types are always padded upwards. */
16881 if (type)
16883 if ((AGGREGATE_TYPE_P (type)
16884 || TREE_CODE (type) == COMPLEX_TYPE
16885 || FIXED_POINT_TYPE_P (type))
16886 && int_size_in_bytes (type) <= 4)
16887 return true;
16889 else
16891 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16892 && GET_MODE_SIZE (mode) <= 4)
16893 return true;
16897 /* Otherwise, use default padding. */
16898 return !BYTES_BIG_ENDIAN;
16901 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16902 assuming that the address in the base register is word aligned. */
16903 bool
16904 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16906 HOST_WIDE_INT max_offset;
16908 /* Offset must be a multiple of 4 in Thumb mode. */
16909 if (TARGET_THUMB2 && ((offset & 3) != 0))
16910 return false;
16912 if (TARGET_THUMB2)
16913 max_offset = 1020;
16914 else if (TARGET_ARM)
16915 max_offset = 255;
16916 else
16917 return false;
16919 return ((offset <= max_offset) && (offset >= -max_offset));
16922 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16923 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16924 Assumes that the address in the base register RN is word aligned. Pattern
16925 guarantees that both memory accesses use the same base register,
16926 the offsets are constants within the range, and the gap between the offsets is 4.
16927 If preload complete then check that registers are legal. WBACK indicates whether
16928 address is updated. LOAD indicates whether memory access is load or store. */
16929 bool
16930 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16931 bool wback, bool load)
16933 unsigned int t, t2, n;
16935 if (!reload_completed)
16936 return true;
16938 if (!offset_ok_for_ldrd_strd (offset))
16939 return false;
16941 t = REGNO (rt);
16942 t2 = REGNO (rt2);
16943 n = REGNO (rn);
16945 if ((TARGET_THUMB2)
16946 && ((wback && (n == t || n == t2))
16947 || (t == SP_REGNUM)
16948 || (t == PC_REGNUM)
16949 || (t2 == SP_REGNUM)
16950 || (t2 == PC_REGNUM)
16951 || (!load && (n == PC_REGNUM))
16952 || (load && (t == t2))
16953 /* Triggers Cortex-M3 LDRD errata. */
16954 || (!wback && load && fix_cm3_ldrd && (n == t))))
16955 return false;
16957 if ((TARGET_ARM)
16958 && ((wback && (n == t || n == t2))
16959 || (t2 == PC_REGNUM)
16960 || (t % 2 != 0) /* First destination register is not even. */
16961 || (t2 != t + 1)
16962 /* PC can be used as base register (for offset addressing only),
16963 but it is depricated. */
16964 || (n == PC_REGNUM)))
16965 return false;
16967 return true;
16970 /* Return true if a 64-bit access with alignment ALIGN and with a
16971 constant offset OFFSET from the base pointer is permitted on this
16972 architecture. */
16973 static bool
16974 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16976 return (unaligned_access
16977 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16978 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16981 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16982 operand MEM's address contains an immediate offset from the base
16983 register and has no side effects, in which case it sets BASE,
16984 OFFSET and ALIGN accordingly. */
16985 static bool
16986 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16988 rtx addr;
16990 gcc_assert (base != NULL && offset != NULL);
16992 /* TODO: Handle more general memory operand patterns, such as
16993 PRE_DEC and PRE_INC. */
16995 if (side_effects_p (mem))
16996 return false;
16998 /* Can't deal with subregs. */
16999 if (SUBREG_P (mem))
17000 return false;
17002 gcc_assert (MEM_P (mem));
17004 *offset = const0_rtx;
17005 *align = MEM_ALIGN (mem);
17007 addr = XEXP (mem, 0);
17009 /* If addr isn't valid for DImode, then we can't handle it. */
17010 if (!arm_legitimate_address_p (DImode, addr,
17011 reload_in_progress || reload_completed))
17012 return false;
17014 if (REG_P (addr))
17016 *base = addr;
17017 return true;
17019 else if (GET_CODE (addr) == PLUS)
17021 *base = XEXP (addr, 0);
17022 *offset = XEXP (addr, 1);
17023 return (REG_P (*base) && CONST_INT_P (*offset));
17026 return false;
17029 /* Called from a peephole2 to replace two word-size accesses with a
17030 single LDRD/STRD instruction. Returns true iff we can generate a
17031 new instruction sequence. That is, both accesses use the same base
17032 register and the gap between constant offsets is 4. This function
17033 may reorder its operands to match ldrd/strd RTL templates.
17034 OPERANDS are the operands found by the peephole matcher;
17035 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17036 corresponding memory operands. LOAD indicaates whether the access
17037 is load or store. CONST_STORE indicates a store of constant
17038 integer values held in OPERANDS[4,5] and assumes that the pattern
17039 is of length 4 insn, for the purpose of checking dead registers.
17040 COMMUTE indicates that register operands may be reordered. */
17041 bool
17042 gen_operands_ldrd_strd (rtx *operands, bool load,
17043 bool const_store, bool commute)
17045 int nops = 2;
17046 HOST_WIDE_INT offsets[2], offset, align[2];
17047 rtx base = NULL_RTX;
17048 rtx cur_base, cur_offset, tmp;
17049 int i, gap;
17050 HARD_REG_SET regset;
17052 gcc_assert (!const_store || !load);
17053 /* Check that the memory references are immediate offsets from the
17054 same base register. Extract the base register, the destination
17055 registers, and the corresponding memory offsets. */
17056 for (i = 0; i < nops; i++)
17058 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17059 &align[i]))
17060 return false;
17062 if (i == 0)
17063 base = cur_base;
17064 else if (REGNO (base) != REGNO (cur_base))
17065 return false;
17067 offsets[i] = INTVAL (cur_offset);
17068 if (GET_CODE (operands[i]) == SUBREG)
17070 tmp = SUBREG_REG (operands[i]);
17071 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17072 operands[i] = tmp;
17076 /* Make sure there is no dependency between the individual loads. */
17077 if (load && REGNO (operands[0]) == REGNO (base))
17078 return false; /* RAW */
17080 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17081 return false; /* WAW */
17083 /* If the same input register is used in both stores
17084 when storing different constants, try to find a free register.
17085 For example, the code
17086 mov r0, 0
17087 str r0, [r2]
17088 mov r0, 1
17089 str r0, [r2, #4]
17090 can be transformed into
17091 mov r1, 0
17092 mov r0, 1
17093 strd r1, r0, [r2]
17094 in Thumb mode assuming that r1 is free.
17095 For ARM mode do the same but only if the starting register
17096 can be made to be even. */
17097 if (const_store
17098 && REGNO (operands[0]) == REGNO (operands[1])
17099 && INTVAL (operands[4]) != INTVAL (operands[5]))
17101 if (TARGET_THUMB2)
17103 CLEAR_HARD_REG_SET (regset);
17104 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17105 if (tmp == NULL_RTX)
17106 return false;
17108 /* Use the new register in the first load to ensure that
17109 if the original input register is not dead after peephole,
17110 then it will have the correct constant value. */
17111 operands[0] = tmp;
17113 else if (TARGET_ARM)
17115 int regno = REGNO (operands[0]);
17116 if (!peep2_reg_dead_p (4, operands[0]))
17118 /* When the input register is even and is not dead after the
17119 pattern, it has to hold the second constant but we cannot
17120 form a legal STRD in ARM mode with this register as the second
17121 register. */
17122 if (regno % 2 == 0)
17123 return false;
17125 /* Is regno-1 free? */
17126 SET_HARD_REG_SET (regset);
17127 CLEAR_HARD_REG_BIT(regset, regno - 1);
17128 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17129 if (tmp == NULL_RTX)
17130 return false;
17132 operands[0] = tmp;
17134 else
17136 /* Find a DImode register. */
17137 CLEAR_HARD_REG_SET (regset);
17138 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17139 if (tmp != NULL_RTX)
17141 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17142 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17144 else
17146 /* Can we use the input register to form a DI register? */
17147 SET_HARD_REG_SET (regset);
17148 CLEAR_HARD_REG_BIT(regset,
17149 regno % 2 == 0 ? regno + 1 : regno - 1);
17150 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17151 if (tmp == NULL_RTX)
17152 return false;
17153 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17157 gcc_assert (operands[0] != NULL_RTX);
17158 gcc_assert (operands[1] != NULL_RTX);
17159 gcc_assert (REGNO (operands[0]) % 2 == 0);
17160 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17164 /* Make sure the instructions are ordered with lower memory access first. */
17165 if (offsets[0] > offsets[1])
17167 gap = offsets[0] - offsets[1];
17168 offset = offsets[1];
17170 /* Swap the instructions such that lower memory is accessed first. */
17171 std::swap (operands[0], operands[1]);
17172 std::swap (operands[2], operands[3]);
17173 std::swap (align[0], align[1]);
17174 if (const_store)
17175 std::swap (operands[4], operands[5]);
17177 else
17179 gap = offsets[1] - offsets[0];
17180 offset = offsets[0];
17183 /* Make sure accesses are to consecutive memory locations. */
17184 if (gap != GET_MODE_SIZE (SImode))
17185 return false;
17187 if (!align_ok_ldrd_strd (align[0], offset))
17188 return false;
17190 /* Make sure we generate legal instructions. */
17191 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17192 false, load))
17193 return true;
17195 /* In Thumb state, where registers are almost unconstrained, there
17196 is little hope to fix it. */
17197 if (TARGET_THUMB2)
17198 return false;
17200 if (load && commute)
17202 /* Try reordering registers. */
17203 std::swap (operands[0], operands[1]);
17204 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17205 false, load))
17206 return true;
17209 if (const_store)
17211 /* If input registers are dead after this pattern, they can be
17212 reordered or replaced by other registers that are free in the
17213 current pattern. */
17214 if (!peep2_reg_dead_p (4, operands[0])
17215 || !peep2_reg_dead_p (4, operands[1]))
17216 return false;
17218 /* Try to reorder the input registers. */
17219 /* For example, the code
17220 mov r0, 0
17221 mov r1, 1
17222 str r1, [r2]
17223 str r0, [r2, #4]
17224 can be transformed into
17225 mov r1, 0
17226 mov r0, 1
17227 strd r0, [r2]
17229 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17230 false, false))
17232 std::swap (operands[0], operands[1]);
17233 return true;
17236 /* Try to find a free DI register. */
17237 CLEAR_HARD_REG_SET (regset);
17238 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17239 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17240 while (true)
17242 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17243 if (tmp == NULL_RTX)
17244 return false;
17246 /* DREG must be an even-numbered register in DImode.
17247 Split it into SI registers. */
17248 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17249 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17250 gcc_assert (operands[0] != NULL_RTX);
17251 gcc_assert (operands[1] != NULL_RTX);
17252 gcc_assert (REGNO (operands[0]) % 2 == 0);
17253 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17255 return (operands_ok_ldrd_strd (operands[0], operands[1],
17256 base, offset,
17257 false, load));
17261 return false;
17265 /* Return true if parallel execution of the two word-size accesses provided
17266 could be satisfied with a single LDRD/STRD instruction. Two word-size
17267 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17268 register operands and OPERANDS[2,3] are the corresponding memory operands.
17270 bool
17271 valid_operands_ldrd_strd (rtx *operands, bool load)
17273 int nops = 2;
17274 HOST_WIDE_INT offsets[2], offset, align[2];
17275 rtx base = NULL_RTX;
17276 rtx cur_base, cur_offset;
17277 int i, gap;
17279 /* Check that the memory references are immediate offsets from the
17280 same base register. Extract the base register, the destination
17281 registers, and the corresponding memory offsets. */
17282 for (i = 0; i < nops; i++)
17284 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17285 &align[i]))
17286 return false;
17288 if (i == 0)
17289 base = cur_base;
17290 else if (REGNO (base) != REGNO (cur_base))
17291 return false;
17293 offsets[i] = INTVAL (cur_offset);
17294 if (GET_CODE (operands[i]) == SUBREG)
17295 return false;
17298 if (offsets[0] > offsets[1])
17299 return false;
17301 gap = offsets[1] - offsets[0];
17302 offset = offsets[0];
17304 /* Make sure accesses are to consecutive memory locations. */
17305 if (gap != GET_MODE_SIZE (SImode))
17306 return false;
17308 if (!align_ok_ldrd_strd (align[0], offset))
17309 return false;
17311 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17312 false, load);
17316 /* Print a symbolic form of X to the debug file, F. */
17317 static void
17318 arm_print_value (FILE *f, rtx x)
17320 switch (GET_CODE (x))
17322 case CONST_INT:
17323 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17324 return;
17326 case CONST_DOUBLE:
17328 char fpstr[20];
17329 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17330 sizeof (fpstr), 0, 1);
17331 fputs (fpstr, f);
17333 return;
17335 case CONST_VECTOR:
17337 int i;
17339 fprintf (f, "<");
17340 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17342 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17343 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17344 fputc (',', f);
17346 fprintf (f, ">");
17348 return;
17350 case CONST_STRING:
17351 fprintf (f, "\"%s\"", XSTR (x, 0));
17352 return;
17354 case SYMBOL_REF:
17355 fprintf (f, "`%s'", XSTR (x, 0));
17356 return;
17358 case LABEL_REF:
17359 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17360 return;
17362 case CONST:
17363 arm_print_value (f, XEXP (x, 0));
17364 return;
17366 case PLUS:
17367 arm_print_value (f, XEXP (x, 0));
17368 fprintf (f, "+");
17369 arm_print_value (f, XEXP (x, 1));
17370 return;
17372 case PC:
17373 fprintf (f, "pc");
17374 return;
17376 default:
17377 fprintf (f, "????");
17378 return;
17382 /* Routines for manipulation of the constant pool. */
17384 /* Arm instructions cannot load a large constant directly into a
17385 register; they have to come from a pc relative load. The constant
17386 must therefore be placed in the addressable range of the pc
17387 relative load. Depending on the precise pc relative load
17388 instruction the range is somewhere between 256 bytes and 4k. This
17389 means that we often have to dump a constant inside a function, and
17390 generate code to branch around it.
17392 It is important to minimize this, since the branches will slow
17393 things down and make the code larger.
17395 Normally we can hide the table after an existing unconditional
17396 branch so that there is no interruption of the flow, but in the
17397 worst case the code looks like this:
17399 ldr rn, L1
17401 b L2
17402 align
17403 L1: .long value
17407 ldr rn, L3
17409 b L4
17410 align
17411 L3: .long value
17415 We fix this by performing a scan after scheduling, which notices
17416 which instructions need to have their operands fetched from the
17417 constant table and builds the table.
17419 The algorithm starts by building a table of all the constants that
17420 need fixing up and all the natural barriers in the function (places
17421 where a constant table can be dropped without breaking the flow).
17422 For each fixup we note how far the pc-relative replacement will be
17423 able to reach and the offset of the instruction into the function.
17425 Having built the table we then group the fixes together to form
17426 tables that are as large as possible (subject to addressing
17427 constraints) and emit each table of constants after the last
17428 barrier that is within range of all the instructions in the group.
17429 If a group does not contain a barrier, then we forcibly create one
17430 by inserting a jump instruction into the flow. Once the table has
17431 been inserted, the insns are then modified to reference the
17432 relevant entry in the pool.
17434 Possible enhancements to the algorithm (not implemented) are:
17436 1) For some processors and object formats, there may be benefit in
17437 aligning the pools to the start of cache lines; this alignment
17438 would need to be taken into account when calculating addressability
17439 of a pool. */
17441 /* These typedefs are located at the start of this file, so that
17442 they can be used in the prototypes there. This comment is to
17443 remind readers of that fact so that the following structures
17444 can be understood more easily.
17446 typedef struct minipool_node Mnode;
17447 typedef struct minipool_fixup Mfix; */
17449 struct minipool_node
17451 /* Doubly linked chain of entries. */
17452 Mnode * next;
17453 Mnode * prev;
17454 /* The maximum offset into the code that this entry can be placed. While
17455 pushing fixes for forward references, all entries are sorted in order
17456 of increasing max_address. */
17457 HOST_WIDE_INT max_address;
17458 /* Similarly for an entry inserted for a backwards ref. */
17459 HOST_WIDE_INT min_address;
17460 /* The number of fixes referencing this entry. This can become zero
17461 if we "unpush" an entry. In this case we ignore the entry when we
17462 come to emit the code. */
17463 int refcount;
17464 /* The offset from the start of the minipool. */
17465 HOST_WIDE_INT offset;
17466 /* The value in table. */
17467 rtx value;
17468 /* The mode of value. */
17469 machine_mode mode;
17470 /* The size of the value. With iWMMXt enabled
17471 sizes > 4 also imply an alignment of 8-bytes. */
17472 int fix_size;
17475 struct minipool_fixup
17477 Mfix * next;
17478 rtx_insn * insn;
17479 HOST_WIDE_INT address;
17480 rtx * loc;
17481 machine_mode mode;
17482 int fix_size;
17483 rtx value;
17484 Mnode * minipool;
17485 HOST_WIDE_INT forwards;
17486 HOST_WIDE_INT backwards;
17489 /* Fixes less than a word need padding out to a word boundary. */
17490 #define MINIPOOL_FIX_SIZE(mode) \
17491 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17493 static Mnode * minipool_vector_head;
17494 static Mnode * minipool_vector_tail;
17495 static rtx_code_label *minipool_vector_label;
17496 static int minipool_pad;
17498 /* The linked list of all minipool fixes required for this function. */
17499 Mfix * minipool_fix_head;
17500 Mfix * minipool_fix_tail;
17501 /* The fix entry for the current minipool, once it has been placed. */
17502 Mfix * minipool_barrier;
17504 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17505 #define JUMP_TABLES_IN_TEXT_SECTION 0
17506 #endif
17508 static HOST_WIDE_INT
17509 get_jump_table_size (rtx_jump_table_data *insn)
17511 /* ADDR_VECs only take room if read-only data does into the text
17512 section. */
17513 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17515 rtx body = PATTERN (insn);
17516 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17517 HOST_WIDE_INT size;
17518 HOST_WIDE_INT modesize;
17520 modesize = GET_MODE_SIZE (GET_MODE (body));
17521 size = modesize * XVECLEN (body, elt);
17522 switch (modesize)
17524 case 1:
17525 /* Round up size of TBB table to a halfword boundary. */
17526 size = (size + 1) & ~HOST_WIDE_INT_1;
17527 break;
17528 case 2:
17529 /* No padding necessary for TBH. */
17530 break;
17531 case 4:
17532 /* Add two bytes for alignment on Thumb. */
17533 if (TARGET_THUMB)
17534 size += 2;
17535 break;
17536 default:
17537 gcc_unreachable ();
17539 return size;
17542 return 0;
17545 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17546 function descriptor) into a register and the GOT address into the
17547 FDPIC register, returning an rtx for the register holding the
17548 function address. */
17551 arm_load_function_descriptor (rtx funcdesc)
17553 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17554 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17555 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17556 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17558 emit_move_insn (fnaddr_reg, fnaddr);
17560 /* The ABI requires the entry point address to be loaded first, but
17561 since we cannot support lazy binding for lack of atomic load of
17562 two 32-bits values, we do not need to bother to prevent the
17563 previous load from being moved after that of the GOT address. */
17564 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17566 return fnaddr_reg;
17569 /* Return the maximum amount of padding that will be inserted before
17570 label LABEL. */
17571 static HOST_WIDE_INT
17572 get_label_padding (rtx label)
17574 HOST_WIDE_INT align, min_insn_size;
17576 align = 1 << label_to_alignment (label).levels[0].log;
17577 min_insn_size = TARGET_THUMB ? 2 : 4;
17578 return align > min_insn_size ? align - min_insn_size : 0;
17581 /* Move a minipool fix MP from its current location to before MAX_MP.
17582 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17583 constraints may need updating. */
17584 static Mnode *
17585 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17586 HOST_WIDE_INT max_address)
17588 /* The code below assumes these are different. */
17589 gcc_assert (mp != max_mp);
17591 if (max_mp == NULL)
17593 if (max_address < mp->max_address)
17594 mp->max_address = max_address;
17596 else
17598 if (max_address > max_mp->max_address - mp->fix_size)
17599 mp->max_address = max_mp->max_address - mp->fix_size;
17600 else
17601 mp->max_address = max_address;
17603 /* Unlink MP from its current position. Since max_mp is non-null,
17604 mp->prev must be non-null. */
17605 mp->prev->next = mp->next;
17606 if (mp->next != NULL)
17607 mp->next->prev = mp->prev;
17608 else
17609 minipool_vector_tail = mp->prev;
17611 /* Re-insert it before MAX_MP. */
17612 mp->next = max_mp;
17613 mp->prev = max_mp->prev;
17614 max_mp->prev = mp;
17616 if (mp->prev != NULL)
17617 mp->prev->next = mp;
17618 else
17619 minipool_vector_head = mp;
17622 /* Save the new entry. */
17623 max_mp = mp;
17625 /* Scan over the preceding entries and adjust their addresses as
17626 required. */
17627 while (mp->prev != NULL
17628 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17630 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17631 mp = mp->prev;
17634 return max_mp;
17637 /* Add a constant to the minipool for a forward reference. Returns the
17638 node added or NULL if the constant will not fit in this pool. */
17639 static Mnode *
17640 add_minipool_forward_ref (Mfix *fix)
17642 /* If set, max_mp is the first pool_entry that has a lower
17643 constraint than the one we are trying to add. */
17644 Mnode * max_mp = NULL;
17645 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17646 Mnode * mp;
17648 /* If the minipool starts before the end of FIX->INSN then this FIX
17649 cannot be placed into the current pool. Furthermore, adding the
17650 new constant pool entry may cause the pool to start FIX_SIZE bytes
17651 earlier. */
17652 if (minipool_vector_head &&
17653 (fix->address + get_attr_length (fix->insn)
17654 >= minipool_vector_head->max_address - fix->fix_size))
17655 return NULL;
17657 /* Scan the pool to see if a constant with the same value has
17658 already been added. While we are doing this, also note the
17659 location where we must insert the constant if it doesn't already
17660 exist. */
17661 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17663 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17664 && fix->mode == mp->mode
17665 && (!LABEL_P (fix->value)
17666 || (CODE_LABEL_NUMBER (fix->value)
17667 == CODE_LABEL_NUMBER (mp->value)))
17668 && rtx_equal_p (fix->value, mp->value))
17670 /* More than one fix references this entry. */
17671 mp->refcount++;
17672 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17675 /* Note the insertion point if necessary. */
17676 if (max_mp == NULL
17677 && mp->max_address > max_address)
17678 max_mp = mp;
17680 /* If we are inserting an 8-bytes aligned quantity and
17681 we have not already found an insertion point, then
17682 make sure that all such 8-byte aligned quantities are
17683 placed at the start of the pool. */
17684 if (ARM_DOUBLEWORD_ALIGN
17685 && max_mp == NULL
17686 && fix->fix_size >= 8
17687 && mp->fix_size < 8)
17689 max_mp = mp;
17690 max_address = mp->max_address;
17694 /* The value is not currently in the minipool, so we need to create
17695 a new entry for it. If MAX_MP is NULL, the entry will be put on
17696 the end of the list since the placement is less constrained than
17697 any existing entry. Otherwise, we insert the new fix before
17698 MAX_MP and, if necessary, adjust the constraints on the other
17699 entries. */
17700 mp = XNEW (Mnode);
17701 mp->fix_size = fix->fix_size;
17702 mp->mode = fix->mode;
17703 mp->value = fix->value;
17704 mp->refcount = 1;
17705 /* Not yet required for a backwards ref. */
17706 mp->min_address = -65536;
17708 if (max_mp == NULL)
17710 mp->max_address = max_address;
17711 mp->next = NULL;
17712 mp->prev = minipool_vector_tail;
17714 if (mp->prev == NULL)
17716 minipool_vector_head = mp;
17717 minipool_vector_label = gen_label_rtx ();
17719 else
17720 mp->prev->next = mp;
17722 minipool_vector_tail = mp;
17724 else
17726 if (max_address > max_mp->max_address - mp->fix_size)
17727 mp->max_address = max_mp->max_address - mp->fix_size;
17728 else
17729 mp->max_address = max_address;
17731 mp->next = max_mp;
17732 mp->prev = max_mp->prev;
17733 max_mp->prev = mp;
17734 if (mp->prev != NULL)
17735 mp->prev->next = mp;
17736 else
17737 minipool_vector_head = mp;
17740 /* Save the new entry. */
17741 max_mp = mp;
17743 /* Scan over the preceding entries and adjust their addresses as
17744 required. */
17745 while (mp->prev != NULL
17746 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17748 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17749 mp = mp->prev;
17752 return max_mp;
17755 static Mnode *
17756 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17757 HOST_WIDE_INT min_address)
17759 HOST_WIDE_INT offset;
17761 /* The code below assumes these are different. */
17762 gcc_assert (mp != min_mp);
17764 if (min_mp == NULL)
17766 if (min_address > mp->min_address)
17767 mp->min_address = min_address;
17769 else
17771 /* We will adjust this below if it is too loose. */
17772 mp->min_address = min_address;
17774 /* Unlink MP from its current position. Since min_mp is non-null,
17775 mp->next must be non-null. */
17776 mp->next->prev = mp->prev;
17777 if (mp->prev != NULL)
17778 mp->prev->next = mp->next;
17779 else
17780 minipool_vector_head = mp->next;
17782 /* Reinsert it after MIN_MP. */
17783 mp->prev = min_mp;
17784 mp->next = min_mp->next;
17785 min_mp->next = mp;
17786 if (mp->next != NULL)
17787 mp->next->prev = mp;
17788 else
17789 minipool_vector_tail = mp;
17792 min_mp = mp;
17794 offset = 0;
17795 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17797 mp->offset = offset;
17798 if (mp->refcount > 0)
17799 offset += mp->fix_size;
17801 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17802 mp->next->min_address = mp->min_address + mp->fix_size;
17805 return min_mp;
17808 /* Add a constant to the minipool for a backward reference. Returns the
17809 node added or NULL if the constant will not fit in this pool.
17811 Note that the code for insertion for a backwards reference can be
17812 somewhat confusing because the calculated offsets for each fix do
17813 not take into account the size of the pool (which is still under
17814 construction. */
17815 static Mnode *
17816 add_minipool_backward_ref (Mfix *fix)
17818 /* If set, min_mp is the last pool_entry that has a lower constraint
17819 than the one we are trying to add. */
17820 Mnode *min_mp = NULL;
17821 /* This can be negative, since it is only a constraint. */
17822 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17823 Mnode *mp;
17825 /* If we can't reach the current pool from this insn, or if we can't
17826 insert this entry at the end of the pool without pushing other
17827 fixes out of range, then we don't try. This ensures that we
17828 can't fail later on. */
17829 if (min_address >= minipool_barrier->address
17830 || (minipool_vector_tail->min_address + fix->fix_size
17831 >= minipool_barrier->address))
17832 return NULL;
17834 /* Scan the pool to see if a constant with the same value has
17835 already been added. While we are doing this, also note the
17836 location where we must insert the constant if it doesn't already
17837 exist. */
17838 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17840 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17841 && fix->mode == mp->mode
17842 && (!LABEL_P (fix->value)
17843 || (CODE_LABEL_NUMBER (fix->value)
17844 == CODE_LABEL_NUMBER (mp->value)))
17845 && rtx_equal_p (fix->value, mp->value)
17846 /* Check that there is enough slack to move this entry to the
17847 end of the table (this is conservative). */
17848 && (mp->max_address
17849 > (minipool_barrier->address
17850 + minipool_vector_tail->offset
17851 + minipool_vector_tail->fix_size)))
17853 mp->refcount++;
17854 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17857 if (min_mp != NULL)
17858 mp->min_address += fix->fix_size;
17859 else
17861 /* Note the insertion point if necessary. */
17862 if (mp->min_address < min_address)
17864 /* For now, we do not allow the insertion of 8-byte alignment
17865 requiring nodes anywhere but at the start of the pool. */
17866 if (ARM_DOUBLEWORD_ALIGN
17867 && fix->fix_size >= 8 && mp->fix_size < 8)
17868 return NULL;
17869 else
17870 min_mp = mp;
17872 else if (mp->max_address
17873 < minipool_barrier->address + mp->offset + fix->fix_size)
17875 /* Inserting before this entry would push the fix beyond
17876 its maximum address (which can happen if we have
17877 re-located a forwards fix); force the new fix to come
17878 after it. */
17879 if (ARM_DOUBLEWORD_ALIGN
17880 && fix->fix_size >= 8 && mp->fix_size < 8)
17881 return NULL;
17882 else
17884 min_mp = mp;
17885 min_address = mp->min_address + fix->fix_size;
17888 /* Do not insert a non-8-byte aligned quantity before 8-byte
17889 aligned quantities. */
17890 else if (ARM_DOUBLEWORD_ALIGN
17891 && fix->fix_size < 8
17892 && mp->fix_size >= 8)
17894 min_mp = mp;
17895 min_address = mp->min_address + fix->fix_size;
17900 /* We need to create a new entry. */
17901 mp = XNEW (Mnode);
17902 mp->fix_size = fix->fix_size;
17903 mp->mode = fix->mode;
17904 mp->value = fix->value;
17905 mp->refcount = 1;
17906 mp->max_address = minipool_barrier->address + 65536;
17908 mp->min_address = min_address;
17910 if (min_mp == NULL)
17912 mp->prev = NULL;
17913 mp->next = minipool_vector_head;
17915 if (mp->next == NULL)
17917 minipool_vector_tail = mp;
17918 minipool_vector_label = gen_label_rtx ();
17920 else
17921 mp->next->prev = mp;
17923 minipool_vector_head = mp;
17925 else
17927 mp->next = min_mp->next;
17928 mp->prev = min_mp;
17929 min_mp->next = mp;
17931 if (mp->next != NULL)
17932 mp->next->prev = mp;
17933 else
17934 minipool_vector_tail = mp;
17937 /* Save the new entry. */
17938 min_mp = mp;
17940 if (mp->prev)
17941 mp = mp->prev;
17942 else
17943 mp->offset = 0;
17945 /* Scan over the following entries and adjust their offsets. */
17946 while (mp->next != NULL)
17948 if (mp->next->min_address < mp->min_address + mp->fix_size)
17949 mp->next->min_address = mp->min_address + mp->fix_size;
17951 if (mp->refcount)
17952 mp->next->offset = mp->offset + mp->fix_size;
17953 else
17954 mp->next->offset = mp->offset;
17956 mp = mp->next;
17959 return min_mp;
17962 static void
17963 assign_minipool_offsets (Mfix *barrier)
17965 HOST_WIDE_INT offset = 0;
17966 Mnode *mp;
17968 minipool_barrier = barrier;
17970 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17972 mp->offset = offset;
17974 if (mp->refcount > 0)
17975 offset += mp->fix_size;
17979 /* Output the literal table */
17980 static void
17981 dump_minipool (rtx_insn *scan)
17983 Mnode * mp;
17984 Mnode * nmp;
17985 int align64 = 0;
17987 if (ARM_DOUBLEWORD_ALIGN)
17988 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17989 if (mp->refcount > 0 && mp->fix_size >= 8)
17991 align64 = 1;
17992 break;
17995 if (dump_file)
17996 fprintf (dump_file,
17997 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17998 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18000 scan = emit_label_after (gen_label_rtx (), scan);
18001 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18002 scan = emit_label_after (minipool_vector_label, scan);
18004 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18006 if (mp->refcount > 0)
18008 if (dump_file)
18010 fprintf (dump_file,
18011 ";; Offset %u, min %ld, max %ld ",
18012 (unsigned) mp->offset, (unsigned long) mp->min_address,
18013 (unsigned long) mp->max_address);
18014 arm_print_value (dump_file, mp->value);
18015 fputc ('\n', dump_file);
18018 rtx val = copy_rtx (mp->value);
18020 switch (GET_MODE_SIZE (mp->mode))
18022 #ifdef HAVE_consttable_1
18023 case 1:
18024 scan = emit_insn_after (gen_consttable_1 (val), scan);
18025 break;
18027 #endif
18028 #ifdef HAVE_consttable_2
18029 case 2:
18030 scan = emit_insn_after (gen_consttable_2 (val), scan);
18031 break;
18033 #endif
18034 #ifdef HAVE_consttable_4
18035 case 4:
18036 scan = emit_insn_after (gen_consttable_4 (val), scan);
18037 break;
18039 #endif
18040 #ifdef HAVE_consttable_8
18041 case 8:
18042 scan = emit_insn_after (gen_consttable_8 (val), scan);
18043 break;
18045 #endif
18046 #ifdef HAVE_consttable_16
18047 case 16:
18048 scan = emit_insn_after (gen_consttable_16 (val), scan);
18049 break;
18051 #endif
18052 default:
18053 gcc_unreachable ();
18057 nmp = mp->next;
18058 free (mp);
18061 minipool_vector_head = minipool_vector_tail = NULL;
18062 scan = emit_insn_after (gen_consttable_end (), scan);
18063 scan = emit_barrier_after (scan);
18066 /* Return the cost of forcibly inserting a barrier after INSN. */
18067 static int
18068 arm_barrier_cost (rtx_insn *insn)
18070 /* Basing the location of the pool on the loop depth is preferable,
18071 but at the moment, the basic block information seems to be
18072 corrupt by this stage of the compilation. */
18073 int base_cost = 50;
18074 rtx_insn *next = next_nonnote_insn (insn);
18076 if (next != NULL && LABEL_P (next))
18077 base_cost -= 20;
18079 switch (GET_CODE (insn))
18081 case CODE_LABEL:
18082 /* It will always be better to place the table before the label, rather
18083 than after it. */
18084 return 50;
18086 case INSN:
18087 case CALL_INSN:
18088 return base_cost;
18090 case JUMP_INSN:
18091 return base_cost - 10;
18093 default:
18094 return base_cost + 10;
18098 /* Find the best place in the insn stream in the range
18099 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18100 Create the barrier by inserting a jump and add a new fix entry for
18101 it. */
18102 static Mfix *
18103 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18105 HOST_WIDE_INT count = 0;
18106 rtx_barrier *barrier;
18107 rtx_insn *from = fix->insn;
18108 /* The instruction after which we will insert the jump. */
18109 rtx_insn *selected = NULL;
18110 int selected_cost;
18111 /* The address at which the jump instruction will be placed. */
18112 HOST_WIDE_INT selected_address;
18113 Mfix * new_fix;
18114 HOST_WIDE_INT max_count = max_address - fix->address;
18115 rtx_code_label *label = gen_label_rtx ();
18117 selected_cost = arm_barrier_cost (from);
18118 selected_address = fix->address;
18120 while (from && count < max_count)
18122 rtx_jump_table_data *tmp;
18123 int new_cost;
18125 /* This code shouldn't have been called if there was a natural barrier
18126 within range. */
18127 gcc_assert (!BARRIER_P (from));
18129 /* Count the length of this insn. This must stay in sync with the
18130 code that pushes minipool fixes. */
18131 if (LABEL_P (from))
18132 count += get_label_padding (from);
18133 else
18134 count += get_attr_length (from);
18136 /* If there is a jump table, add its length. */
18137 if (tablejump_p (from, NULL, &tmp))
18139 count += get_jump_table_size (tmp);
18141 /* Jump tables aren't in a basic block, so base the cost on
18142 the dispatch insn. If we select this location, we will
18143 still put the pool after the table. */
18144 new_cost = arm_barrier_cost (from);
18146 if (count < max_count
18147 && (!selected || new_cost <= selected_cost))
18149 selected = tmp;
18150 selected_cost = new_cost;
18151 selected_address = fix->address + count;
18154 /* Continue after the dispatch table. */
18155 from = NEXT_INSN (tmp);
18156 continue;
18159 new_cost = arm_barrier_cost (from);
18161 if (count < max_count
18162 && (!selected || new_cost <= selected_cost))
18164 selected = from;
18165 selected_cost = new_cost;
18166 selected_address = fix->address + count;
18169 from = NEXT_INSN (from);
18172 /* Make sure that we found a place to insert the jump. */
18173 gcc_assert (selected);
18175 /* Create a new JUMP_INSN that branches around a barrier. */
18176 from = emit_jump_insn_after (gen_jump (label), selected);
18177 JUMP_LABEL (from) = label;
18178 barrier = emit_barrier_after (from);
18179 emit_label_after (label, barrier);
18181 /* Create a minipool barrier entry for the new barrier. */
18182 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18183 new_fix->insn = barrier;
18184 new_fix->address = selected_address;
18185 new_fix->next = fix->next;
18186 fix->next = new_fix;
18188 return new_fix;
18191 /* Record that there is a natural barrier in the insn stream at
18192 ADDRESS. */
18193 static void
18194 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18196 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18198 fix->insn = insn;
18199 fix->address = address;
18201 fix->next = NULL;
18202 if (minipool_fix_head != NULL)
18203 minipool_fix_tail->next = fix;
18204 else
18205 minipool_fix_head = fix;
18207 minipool_fix_tail = fix;
18210 /* Record INSN, which will need fixing up to load a value from the
18211 minipool. ADDRESS is the offset of the insn since the start of the
18212 function; LOC is a pointer to the part of the insn which requires
18213 fixing; VALUE is the constant that must be loaded, which is of type
18214 MODE. */
18215 static void
18216 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18217 machine_mode mode, rtx value)
18219 gcc_assert (!arm_disable_literal_pool);
18220 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18222 fix->insn = insn;
18223 fix->address = address;
18224 fix->loc = loc;
18225 fix->mode = mode;
18226 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18227 fix->value = value;
18228 fix->forwards = get_attr_pool_range (insn);
18229 fix->backwards = get_attr_neg_pool_range (insn);
18230 fix->minipool = NULL;
18232 /* If an insn doesn't have a range defined for it, then it isn't
18233 expecting to be reworked by this code. Better to stop now than
18234 to generate duff assembly code. */
18235 gcc_assert (fix->forwards || fix->backwards);
18237 /* If an entry requires 8-byte alignment then assume all constant pools
18238 require 4 bytes of padding. Trying to do this later on a per-pool
18239 basis is awkward because existing pool entries have to be modified. */
18240 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18241 minipool_pad = 4;
18243 if (dump_file)
18245 fprintf (dump_file,
18246 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18247 GET_MODE_NAME (mode),
18248 INSN_UID (insn), (unsigned long) address,
18249 -1 * (long)fix->backwards, (long)fix->forwards);
18250 arm_print_value (dump_file, fix->value);
18251 fprintf (dump_file, "\n");
18254 /* Add it to the chain of fixes. */
18255 fix->next = NULL;
18257 if (minipool_fix_head != NULL)
18258 minipool_fix_tail->next = fix;
18259 else
18260 minipool_fix_head = fix;
18262 minipool_fix_tail = fix;
18265 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18266 Returns the number of insns needed, or 99 if we always want to synthesize
18267 the value. */
18269 arm_max_const_double_inline_cost ()
18271 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18274 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18275 Returns the number of insns needed, or 99 if we don't know how to
18276 do it. */
18278 arm_const_double_inline_cost (rtx val)
18280 rtx lowpart, highpart;
18281 machine_mode mode;
18283 mode = GET_MODE (val);
18285 if (mode == VOIDmode)
18286 mode = DImode;
18288 gcc_assert (GET_MODE_SIZE (mode) == 8);
18290 lowpart = gen_lowpart (SImode, val);
18291 highpart = gen_highpart_mode (SImode, mode, val);
18293 gcc_assert (CONST_INT_P (lowpart));
18294 gcc_assert (CONST_INT_P (highpart));
18296 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18297 NULL_RTX, NULL_RTX, 0, 0)
18298 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18299 NULL_RTX, NULL_RTX, 0, 0));
18302 /* Cost of loading a SImode constant. */
18303 static inline int
18304 arm_const_inline_cost (enum rtx_code code, rtx val)
18306 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18307 NULL_RTX, NULL_RTX, 1, 0);
18310 /* Return true if it is worthwhile to split a 64-bit constant into two
18311 32-bit operations. This is the case if optimizing for size, or
18312 if we have load delay slots, or if one 32-bit part can be done with
18313 a single data operation. */
18314 bool
18315 arm_const_double_by_parts (rtx val)
18317 machine_mode mode = GET_MODE (val);
18318 rtx part;
18320 if (optimize_size || arm_ld_sched)
18321 return true;
18323 if (mode == VOIDmode)
18324 mode = DImode;
18326 part = gen_highpart_mode (SImode, mode, val);
18328 gcc_assert (CONST_INT_P (part));
18330 if (const_ok_for_arm (INTVAL (part))
18331 || const_ok_for_arm (~INTVAL (part)))
18332 return true;
18334 part = gen_lowpart (SImode, val);
18336 gcc_assert (CONST_INT_P (part));
18338 if (const_ok_for_arm (INTVAL (part))
18339 || const_ok_for_arm (~INTVAL (part)))
18340 return true;
18342 return false;
18345 /* Return true if it is possible to inline both the high and low parts
18346 of a 64-bit constant into 32-bit data processing instructions. */
18347 bool
18348 arm_const_double_by_immediates (rtx val)
18350 machine_mode mode = GET_MODE (val);
18351 rtx part;
18353 if (mode == VOIDmode)
18354 mode = DImode;
18356 part = gen_highpart_mode (SImode, mode, val);
18358 gcc_assert (CONST_INT_P (part));
18360 if (!const_ok_for_arm (INTVAL (part)))
18361 return false;
18363 part = gen_lowpart (SImode, val);
18365 gcc_assert (CONST_INT_P (part));
18367 if (!const_ok_for_arm (INTVAL (part)))
18368 return false;
18370 return true;
18373 /* Scan INSN and note any of its operands that need fixing.
18374 If DO_PUSHES is false we do not actually push any of the fixups
18375 needed. */
18376 static void
18377 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18379 int opno;
18381 extract_constrain_insn (insn);
18383 if (recog_data.n_alternatives == 0)
18384 return;
18386 /* Fill in recog_op_alt with information about the constraints of
18387 this insn. */
18388 preprocess_constraints (insn);
18390 const operand_alternative *op_alt = which_op_alt ();
18391 for (opno = 0; opno < recog_data.n_operands; opno++)
18393 /* Things we need to fix can only occur in inputs. */
18394 if (recog_data.operand_type[opno] != OP_IN)
18395 continue;
18397 /* If this alternative is a memory reference, then any mention
18398 of constants in this alternative is really to fool reload
18399 into allowing us to accept one there. We need to fix them up
18400 now so that we output the right code. */
18401 if (op_alt[opno].memory_ok)
18403 rtx op = recog_data.operand[opno];
18405 if (CONSTANT_P (op))
18407 if (do_pushes)
18408 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18409 recog_data.operand_mode[opno], op);
18411 else if (MEM_P (op)
18412 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18413 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18415 if (do_pushes)
18417 rtx cop = avoid_constant_pool_reference (op);
18419 /* Casting the address of something to a mode narrower
18420 than a word can cause avoid_constant_pool_reference()
18421 to return the pool reference itself. That's no good to
18422 us here. Lets just hope that we can use the
18423 constant pool value directly. */
18424 if (op == cop)
18425 cop = get_pool_constant (XEXP (op, 0));
18427 push_minipool_fix (insn, address,
18428 recog_data.operand_loc[opno],
18429 recog_data.operand_mode[opno], cop);
18436 return;
18439 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18440 and unions in the context of ARMv8-M Security Extensions. It is used as a
18441 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18442 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18443 or four masks, depending on whether it is being computed for a
18444 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18445 respectively. The tree for the type of the argument or a field within an
18446 argument is passed in ARG_TYPE, the current register this argument or field
18447 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18448 argument or field starts at is passed in STARTING_BIT and the last used bit
18449 is kept in LAST_USED_BIT which is also updated accordingly. */
18451 static unsigned HOST_WIDE_INT
18452 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18453 uint32_t * padding_bits_to_clear,
18454 unsigned starting_bit, int * last_used_bit)
18457 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18459 if (TREE_CODE (arg_type) == RECORD_TYPE)
18461 unsigned current_bit = starting_bit;
18462 tree field;
18463 long int offset, size;
18466 field = TYPE_FIELDS (arg_type);
18467 while (field)
18469 /* The offset within a structure is always an offset from
18470 the start of that structure. Make sure we take that into the
18471 calculation of the register based offset that we use here. */
18472 offset = starting_bit;
18473 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18474 offset %= 32;
18476 /* This is the actual size of the field, for bitfields this is the
18477 bitfield width and not the container size. */
18478 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18480 if (*last_used_bit != offset)
18482 if (offset < *last_used_bit)
18484 /* This field's offset is before the 'last_used_bit', that
18485 means this field goes on the next register. So we need to
18486 pad the rest of the current register and increase the
18487 register number. */
18488 uint32_t mask;
18489 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18490 mask++;
18492 padding_bits_to_clear[*regno] |= mask;
18493 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18494 (*regno)++;
18496 else
18498 /* Otherwise we pad the bits between the last field's end and
18499 the start of the new field. */
18500 uint32_t mask;
18502 mask = ((uint32_t)-1) >> (32 - offset);
18503 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18504 padding_bits_to_clear[*regno] |= mask;
18506 current_bit = offset;
18509 /* Calculate further padding bits for inner structs/unions too. */
18510 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18512 *last_used_bit = current_bit;
18513 not_to_clear_reg_mask
18514 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18515 padding_bits_to_clear, offset,
18516 last_used_bit);
18518 else
18520 /* Update 'current_bit' with this field's size. If the
18521 'current_bit' lies in a subsequent register, update 'regno' and
18522 reset 'current_bit' to point to the current bit in that new
18523 register. */
18524 current_bit += size;
18525 while (current_bit >= 32)
18527 current_bit-=32;
18528 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18529 (*regno)++;
18531 *last_used_bit = current_bit;
18534 field = TREE_CHAIN (field);
18536 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18538 else if (TREE_CODE (arg_type) == UNION_TYPE)
18540 tree field, field_t;
18541 int i, regno_t, field_size;
18542 int max_reg = -1;
18543 int max_bit = -1;
18544 uint32_t mask;
18545 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18546 = {-1, -1, -1, -1};
18548 /* To compute the padding bits in a union we only consider bits as
18549 padding bits if they are always either a padding bit or fall outside a
18550 fields size for all fields in the union. */
18551 field = TYPE_FIELDS (arg_type);
18552 while (field)
18554 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18555 = {0U, 0U, 0U, 0U};
18556 int last_used_bit_t = *last_used_bit;
18557 regno_t = *regno;
18558 field_t = TREE_TYPE (field);
18560 /* If the field's type is either a record or a union make sure to
18561 compute their padding bits too. */
18562 if (RECORD_OR_UNION_TYPE_P (field_t))
18563 not_to_clear_reg_mask
18564 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18565 &padding_bits_to_clear_t[0],
18566 starting_bit, &last_used_bit_t);
18567 else
18569 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18570 regno_t = (field_size / 32) + *regno;
18571 last_used_bit_t = (starting_bit + field_size) % 32;
18574 for (i = *regno; i < regno_t; i++)
18576 /* For all but the last register used by this field only keep the
18577 padding bits that were padding bits in this field. */
18578 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18581 /* For the last register, keep all padding bits that were padding
18582 bits in this field and any padding bits that are still valid
18583 as padding bits but fall outside of this field's size. */
18584 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18585 padding_bits_to_clear_res[regno_t]
18586 &= padding_bits_to_clear_t[regno_t] | mask;
18588 /* Update the maximum size of the fields in terms of registers used
18589 ('max_reg') and the 'last_used_bit' in said register. */
18590 if (max_reg < regno_t)
18592 max_reg = regno_t;
18593 max_bit = last_used_bit_t;
18595 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18596 max_bit = last_used_bit_t;
18598 field = TREE_CHAIN (field);
18601 /* Update the current padding_bits_to_clear using the intersection of the
18602 padding bits of all the fields. */
18603 for (i=*regno; i < max_reg; i++)
18604 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18606 /* Do not keep trailing padding bits, we do not know yet whether this
18607 is the end of the argument. */
18608 mask = ((uint32_t) 1 << max_bit) - 1;
18609 padding_bits_to_clear[max_reg]
18610 |= padding_bits_to_clear_res[max_reg] & mask;
18612 *regno = max_reg;
18613 *last_used_bit = max_bit;
18615 else
18616 /* This function should only be used for structs and unions. */
18617 gcc_unreachable ();
18619 return not_to_clear_reg_mask;
18622 /* In the context of ARMv8-M Security Extensions, this function is used for both
18623 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18624 registers are used when returning or passing arguments, which is then
18625 returned as a mask. It will also compute a mask to indicate padding/unused
18626 bits for each of these registers, and passes this through the
18627 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18628 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18629 the starting register used to pass this argument or return value is passed
18630 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18631 for struct and union types. */
18633 static unsigned HOST_WIDE_INT
18634 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18635 uint32_t * padding_bits_to_clear)
18638 int last_used_bit = 0;
18639 unsigned HOST_WIDE_INT not_to_clear_mask;
18641 if (RECORD_OR_UNION_TYPE_P (arg_type))
18643 not_to_clear_mask
18644 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18645 padding_bits_to_clear, 0,
18646 &last_used_bit);
18649 /* If the 'last_used_bit' is not zero, that means we are still using a
18650 part of the last 'regno'. In such cases we must clear the trailing
18651 bits. Otherwise we are not using regno and we should mark it as to
18652 clear. */
18653 if (last_used_bit != 0)
18654 padding_bits_to_clear[regno]
18655 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18656 else
18657 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18659 else
18661 not_to_clear_mask = 0;
18662 /* We are not dealing with structs nor unions. So these arguments may be
18663 passed in floating point registers too. In some cases a BLKmode is
18664 used when returning or passing arguments in multiple VFP registers. */
18665 if (GET_MODE (arg_rtx) == BLKmode)
18667 int i, arg_regs;
18668 rtx reg;
18670 /* This should really only occur when dealing with the hard-float
18671 ABI. */
18672 gcc_assert (TARGET_HARD_FLOAT_ABI);
18674 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18676 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18677 gcc_assert (REG_P (reg));
18679 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18681 /* If we are dealing with DF mode, make sure we don't
18682 clear either of the registers it addresses. */
18683 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18684 if (arg_regs > 1)
18686 unsigned HOST_WIDE_INT mask;
18687 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18688 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18689 not_to_clear_mask |= mask;
18693 else
18695 /* Otherwise we can rely on the MODE to determine how many registers
18696 are being used by this argument. */
18697 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18698 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18699 if (arg_regs > 1)
18701 unsigned HOST_WIDE_INT
18702 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18703 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18704 not_to_clear_mask |= mask;
18709 return not_to_clear_mask;
18712 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18713 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18714 are to be fully cleared, using the value in register CLEARING_REG if more
18715 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18716 the bits that needs to be cleared in caller-saved core registers, with
18717 SCRATCH_REG used as a scratch register for that clearing.
18719 NOTE: one of three following assertions must hold:
18720 - SCRATCH_REG is a low register
18721 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18722 in TO_CLEAR_BITMAP)
18723 - CLEARING_REG is a low register. */
18725 static void
18726 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18727 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18729 bool saved_clearing = false;
18730 rtx saved_clearing_reg = NULL_RTX;
18731 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18733 gcc_assert (arm_arch_cmse);
18735 if (!bitmap_empty_p (to_clear_bitmap))
18737 minregno = bitmap_first_set_bit (to_clear_bitmap);
18738 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18740 clearing_regno = REGNO (clearing_reg);
18742 /* Clear padding bits. */
18743 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18744 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18746 uint64_t mask;
18747 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18749 if (padding_bits_to_clear[i] == 0)
18750 continue;
18752 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18753 CLEARING_REG as scratch. */
18754 if (TARGET_THUMB1
18755 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18757 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18758 such that we can use clearing_reg to clear the unused bits in the
18759 arguments. */
18760 if ((clearing_regno > maxregno
18761 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18762 && !saved_clearing)
18764 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18765 emit_move_insn (scratch_reg, clearing_reg);
18766 saved_clearing = true;
18767 saved_clearing_reg = scratch_reg;
18769 scratch_reg = clearing_reg;
18772 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18773 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18774 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18776 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18777 mask = (~padding_bits_to_clear[i]) >> 16;
18778 rtx16 = gen_int_mode (16, SImode);
18779 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18780 if (mask)
18781 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18783 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18785 if (saved_clearing)
18786 emit_move_insn (clearing_reg, saved_clearing_reg);
18789 /* Clear full registers. */
18791 if (TARGET_HAVE_FPCXT_CMSE)
18793 rtvec vunspec_vec;
18794 int i, j, k, nb_regs;
18795 rtx use_seq, par, reg, set, vunspec;
18796 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18797 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18798 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18800 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18802 /* Find next register to clear and exit if none. */
18803 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18804 if (i > maxregno)
18805 break;
18807 /* Compute number of consecutive registers to clear. */
18808 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18809 j++);
18810 nb_regs = j - i;
18812 /* Create VSCCLRM RTX pattern. */
18813 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18814 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18815 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18816 VUNSPEC_VSCCLRM_VPR);
18817 XVECEXP (par, 0, 0) = vunspec;
18819 /* Insert VFP register clearing RTX in the pattern. */
18820 start_sequence ();
18821 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18823 if (!bitmap_bit_p (to_clear_bitmap, j))
18824 continue;
18826 reg = gen_rtx_REG (SFmode, j);
18827 set = gen_rtx_SET (reg, const0_rtx);
18828 XVECEXP (par, 0, k++) = set;
18829 emit_use (reg);
18831 use_seq = get_insns ();
18832 end_sequence ();
18834 emit_insn_after (use_seq, emit_insn (par));
18837 /* Get set of core registers to clear. */
18838 bitmap_clear (core_regs_bitmap);
18839 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18840 IP_REGNUM - R0_REGNUM + 1);
18841 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18842 core_regs_bitmap);
18843 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18845 if (bitmap_empty_p (to_clear_core_bitmap))
18846 return;
18848 /* Create clrm RTX pattern. */
18849 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18850 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18852 /* Insert core register clearing RTX in the pattern. */
18853 start_sequence ();
18854 for (j = 0, i = minregno; j < nb_regs; i++)
18856 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18857 continue;
18859 reg = gen_rtx_REG (SImode, i);
18860 set = gen_rtx_SET (reg, const0_rtx);
18861 XVECEXP (par, 0, j++) = set;
18862 emit_use (reg);
18865 /* Insert APSR register clearing RTX in the pattern
18866 * along with clobbering CC. */
18867 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18868 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18869 VUNSPEC_CLRM_APSR);
18871 XVECEXP (par, 0, j++) = vunspec;
18873 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18874 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18875 XVECEXP (par, 0, j) = clobber;
18877 use_seq = get_insns ();
18878 end_sequence ();
18880 emit_insn_after (use_seq, emit_insn (par));
18882 else
18884 /* If not marked for clearing, clearing_reg already does not contain
18885 any secret. */
18886 if (clearing_regno <= maxregno
18887 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18889 emit_move_insn (clearing_reg, const0_rtx);
18890 emit_use (clearing_reg);
18891 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18894 for (regno = minregno; regno <= maxregno; regno++)
18896 if (!bitmap_bit_p (to_clear_bitmap, regno))
18897 continue;
18899 if (IS_VFP_REGNUM (regno))
18901 /* If regno is an even vfp register and its successor is also to
18902 be cleared, use vmov. */
18903 if (TARGET_VFP_DOUBLE
18904 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18905 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18907 emit_move_insn (gen_rtx_REG (DFmode, regno),
18908 CONST1_RTX (DFmode));
18909 emit_use (gen_rtx_REG (DFmode, regno));
18910 regno++;
18912 else
18914 emit_move_insn (gen_rtx_REG (SFmode, regno),
18915 CONST1_RTX (SFmode));
18916 emit_use (gen_rtx_REG (SFmode, regno));
18919 else
18921 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18922 emit_use (gen_rtx_REG (SImode, regno));
18928 /* Clear core and caller-saved VFP registers not used to pass arguments before
18929 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18930 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18931 libgcc/config/arm/cmse_nonsecure_call.S. */
18933 static void
18934 cmse_nonsecure_call_inline_register_clear (void)
18936 basic_block bb;
18938 FOR_EACH_BB_FN (bb, cfun)
18940 rtx_insn *insn;
18942 FOR_BB_INSNS (bb, insn)
18944 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18945 /* frame = VFP regs + FPSCR + VPR. */
18946 unsigned lazy_store_stack_frame_size
18947 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18948 unsigned long callee_saved_mask
18949 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18950 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18951 unsigned address_regnum, regno;
18952 unsigned max_int_regno
18953 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18954 unsigned max_fp_regno
18955 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18956 unsigned maxregno
18957 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18958 auto_sbitmap to_clear_bitmap (maxregno + 1);
18959 rtx_insn *seq;
18960 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18961 rtx address;
18962 CUMULATIVE_ARGS args_so_far_v;
18963 cumulative_args_t args_so_far;
18964 tree arg_type, fntype;
18965 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18966 function_args_iterator args_iter;
18967 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18969 if (!NONDEBUG_INSN_P (insn))
18970 continue;
18972 if (!CALL_P (insn))
18973 continue;
18975 pat = PATTERN (insn);
18976 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18977 call = XVECEXP (pat, 0, 0);
18979 /* Get the real call RTX if the insn sets a value, ie. returns. */
18980 if (GET_CODE (call) == SET)
18981 call = SET_SRC (call);
18983 /* Check if it is a cmse_nonsecure_call. */
18984 unspec = XEXP (call, 0);
18985 if (GET_CODE (unspec) != UNSPEC
18986 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18987 continue;
18989 /* Mark registers that needs to be cleared. Those that holds a
18990 parameter are removed from the set further below. */
18991 bitmap_clear (to_clear_bitmap);
18992 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18993 max_int_regno - R0_REGNUM + 1);
18995 /* Only look at the caller-saved floating point registers in case of
18996 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18997 lazy store and loads which clear both caller- and callee-saved
18998 registers. */
18999 if (!lazy_fpclear)
19001 auto_sbitmap float_bitmap (maxregno + 1);
19003 bitmap_clear (float_bitmap);
19004 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19005 max_fp_regno - FIRST_VFP_REGNUM + 1);
19006 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19009 /* Make sure the register used to hold the function address is not
19010 cleared. */
19011 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19012 gcc_assert (MEM_P (address));
19013 gcc_assert (REG_P (XEXP (address, 0)));
19014 address_regnum = REGNO (XEXP (address, 0));
19015 if (address_regnum <= max_int_regno)
19016 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19018 /* Set basic block of call insn so that df rescan is performed on
19019 insns inserted here. */
19020 set_block_for_insn (insn, bb);
19021 df_set_flags (DF_DEFER_INSN_RESCAN);
19022 start_sequence ();
19024 /* Make sure the scheduler doesn't schedule other insns beyond
19025 here. */
19026 emit_insn (gen_blockage ());
19028 /* Walk through all arguments and clear registers appropriately.
19030 fntype = TREE_TYPE (MEM_EXPR (address));
19031 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19032 NULL_TREE);
19033 args_so_far = pack_cumulative_args (&args_so_far_v);
19034 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19036 rtx arg_rtx;
19037 uint64_t to_clear_args_mask;
19039 if (VOID_TYPE_P (arg_type))
19040 continue;
19042 function_arg_info arg (arg_type, /*named=*/true);
19043 if (!first_param)
19044 /* ??? We should advance after processing the argument and pass
19045 the argument we're advancing past. */
19046 arm_function_arg_advance (args_so_far, arg);
19048 arg_rtx = arm_function_arg (args_so_far, arg);
19049 gcc_assert (REG_P (arg_rtx));
19050 to_clear_args_mask
19051 = compute_not_to_clear_mask (arg_type, arg_rtx,
19052 REGNO (arg_rtx),
19053 &padding_bits_to_clear[0]);
19054 if (to_clear_args_mask)
19056 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19058 if (to_clear_args_mask & (1ULL << regno))
19059 bitmap_clear_bit (to_clear_bitmap, regno);
19063 first_param = false;
19066 /* We use right shift and left shift to clear the LSB of the address
19067 we jump to instead of using bic, to avoid having to use an extra
19068 register on Thumb-1. */
19069 clearing_reg = XEXP (address, 0);
19070 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19071 emit_insn (gen_rtx_SET (clearing_reg, shift));
19072 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19073 emit_insn (gen_rtx_SET (clearing_reg, shift));
19075 if (clear_callee_saved)
19077 rtx push_insn =
19078 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19079 /* Disable frame debug info in push because it needs to be
19080 disabled for pop (see below). */
19081 RTX_FRAME_RELATED_P (push_insn) = 0;
19083 /* Lazy store multiple. */
19084 if (lazy_fpclear)
19086 rtx imm;
19087 rtx_insn *add_insn;
19089 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19090 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19091 stack_pointer_rtx, imm));
19092 /* If we have the frame pointer, then it will be the
19093 CFA reg. Otherwise, the stack pointer is the CFA
19094 reg, so we need to emit a CFA adjust. */
19095 if (!frame_pointer_needed)
19096 arm_add_cfa_adjust_cfa_note (add_insn,
19097 - lazy_store_stack_frame_size,
19098 stack_pointer_rtx,
19099 stack_pointer_rtx);
19100 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19102 /* Save VFP callee-saved registers. */
19103 else
19105 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19106 (max_fp_regno - D7_VFP_REGNUM) / 2);
19107 /* Disable frame debug info in push because it needs to be
19108 disabled for vpop (see below). */
19109 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19113 /* Clear caller-saved registers that leak before doing a non-secure
19114 call. */
19115 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19116 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19117 NUM_ARG_REGS, ip_reg, clearing_reg);
19119 seq = get_insns ();
19120 end_sequence ();
19121 emit_insn_before (seq, insn);
19123 if (TARGET_HAVE_FPCXT_CMSE)
19125 rtx_insn *last, *pop_insn, *after = insn;
19127 start_sequence ();
19129 /* Lazy load multiple done as part of libcall in Armv8-M. */
19130 if (lazy_fpclear)
19132 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19133 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19134 rtx_insn *add_insn =
19135 emit_insn (gen_addsi3 (stack_pointer_rtx,
19136 stack_pointer_rtx, imm));
19137 if (!frame_pointer_needed)
19138 arm_add_cfa_adjust_cfa_note (add_insn,
19139 lazy_store_stack_frame_size,
19140 stack_pointer_rtx,
19141 stack_pointer_rtx);
19143 /* Restore VFP callee-saved registers. */
19144 else
19146 int nb_callee_saved_vfp_regs =
19147 (max_fp_regno - D7_VFP_REGNUM) / 2;
19148 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19149 nb_callee_saved_vfp_regs,
19150 stack_pointer_rtx);
19151 /* Disable frame debug info in vpop because the SP adjustment
19152 is made using a CFA adjustment note while CFA used is
19153 sometimes R7. This then causes an assert failure in the
19154 CFI note creation code. */
19155 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19158 arm_emit_multi_reg_pop (callee_saved_mask);
19159 pop_insn = get_last_insn ();
19161 /* Disable frame debug info in pop because they reset the state
19162 of popped registers to what it was at the beginning of the
19163 function, before the prologue. This leads to incorrect state
19164 when doing the pop after the nonsecure call for registers that
19165 are pushed both in prologue and before the nonsecure call.
19167 It also occasionally triggers an assert failure in CFI note
19168 creation code when there are two codepaths to the epilogue,
19169 one of which does not go through the nonsecure call.
19170 Obviously this mean that debugging between the push and pop is
19171 not reliable. */
19172 RTX_FRAME_RELATED_P (pop_insn) = 0;
19174 seq = get_insns ();
19175 last = get_last_insn ();
19176 end_sequence ();
19178 emit_insn_after (seq, after);
19180 /* Skip pop we have just inserted after nonsecure call, we know
19181 it does not contain a nonsecure call. */
19182 insn = last;
19188 /* Rewrite move insn into subtract of 0 if the condition codes will
19189 be useful in next conditional jump insn. */
19191 static void
19192 thumb1_reorg (void)
19194 basic_block bb;
19196 FOR_EACH_BB_FN (bb, cfun)
19198 rtx dest, src;
19199 rtx cmp, op0, op1, set = NULL;
19200 rtx_insn *prev, *insn = BB_END (bb);
19201 bool insn_clobbered = false;
19203 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19204 insn = PREV_INSN (insn);
19206 /* Find the last cbranchsi4_insn in basic block BB. */
19207 if (insn == BB_HEAD (bb)
19208 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19209 continue;
19211 /* Get the register with which we are comparing. */
19212 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19213 op0 = XEXP (cmp, 0);
19214 op1 = XEXP (cmp, 1);
19216 /* Check that comparison is against ZERO. */
19217 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19218 continue;
19220 /* Find the first flag setting insn before INSN in basic block BB. */
19221 gcc_assert (insn != BB_HEAD (bb));
19222 for (prev = PREV_INSN (insn);
19223 (!insn_clobbered
19224 && prev != BB_HEAD (bb)
19225 && (NOTE_P (prev)
19226 || DEBUG_INSN_P (prev)
19227 || ((set = single_set (prev)) != NULL
19228 && get_attr_conds (prev) == CONDS_NOCOND)));
19229 prev = PREV_INSN (prev))
19231 if (reg_set_p (op0, prev))
19232 insn_clobbered = true;
19235 /* Skip if op0 is clobbered by insn other than prev. */
19236 if (insn_clobbered)
19237 continue;
19239 if (!set)
19240 continue;
19242 dest = SET_DEST (set);
19243 src = SET_SRC (set);
19244 if (!low_register_operand (dest, SImode)
19245 || !low_register_operand (src, SImode))
19246 continue;
19248 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19249 in INSN. Both src and dest of the move insn are checked. */
19250 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19252 dest = copy_rtx (dest);
19253 src = copy_rtx (src);
19254 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19255 PATTERN (prev) = gen_rtx_SET (dest, src);
19256 INSN_CODE (prev) = -1;
19257 /* Set test register in INSN to dest. */
19258 XEXP (cmp, 0) = copy_rtx (dest);
19259 INSN_CODE (insn) = -1;
19264 /* Convert instructions to their cc-clobbering variant if possible, since
19265 that allows us to use smaller encodings. */
19267 static void
19268 thumb2_reorg (void)
19270 basic_block bb;
19271 regset_head live;
19273 INIT_REG_SET (&live);
19275 /* We are freeing block_for_insn in the toplev to keep compatibility
19276 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19277 compute_bb_for_insn ();
19278 df_analyze ();
19280 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19282 FOR_EACH_BB_FN (bb, cfun)
19284 if ((current_tune->disparage_flag_setting_t16_encodings
19285 == tune_params::DISPARAGE_FLAGS_ALL)
19286 && optimize_bb_for_speed_p (bb))
19287 continue;
19289 rtx_insn *insn;
19290 Convert_Action action = SKIP;
19291 Convert_Action action_for_partial_flag_setting
19292 = ((current_tune->disparage_flag_setting_t16_encodings
19293 != tune_params::DISPARAGE_FLAGS_NEITHER)
19294 && optimize_bb_for_speed_p (bb))
19295 ? SKIP : CONV;
19297 COPY_REG_SET (&live, DF_LR_OUT (bb));
19298 df_simulate_initialize_backwards (bb, &live);
19299 FOR_BB_INSNS_REVERSE (bb, insn)
19301 if (NONJUMP_INSN_P (insn)
19302 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19303 && GET_CODE (PATTERN (insn)) == SET)
19305 action = SKIP;
19306 rtx pat = PATTERN (insn);
19307 rtx dst = XEXP (pat, 0);
19308 rtx src = XEXP (pat, 1);
19309 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19311 if (UNARY_P (src) || BINARY_P (src))
19312 op0 = XEXP (src, 0);
19314 if (BINARY_P (src))
19315 op1 = XEXP (src, 1);
19317 if (low_register_operand (dst, SImode))
19319 switch (GET_CODE (src))
19321 case PLUS:
19322 /* Adding two registers and storing the result
19323 in the first source is already a 16-bit
19324 operation. */
19325 if (rtx_equal_p (dst, op0)
19326 && register_operand (op1, SImode))
19327 break;
19329 if (low_register_operand (op0, SImode))
19331 /* ADDS <Rd>,<Rn>,<Rm> */
19332 if (low_register_operand (op1, SImode))
19333 action = CONV;
19334 /* ADDS <Rdn>,#<imm8> */
19335 /* SUBS <Rdn>,#<imm8> */
19336 else if (rtx_equal_p (dst, op0)
19337 && CONST_INT_P (op1)
19338 && IN_RANGE (INTVAL (op1), -255, 255))
19339 action = CONV;
19340 /* ADDS <Rd>,<Rn>,#<imm3> */
19341 /* SUBS <Rd>,<Rn>,#<imm3> */
19342 else if (CONST_INT_P (op1)
19343 && IN_RANGE (INTVAL (op1), -7, 7))
19344 action = CONV;
19346 /* ADCS <Rd>, <Rn> */
19347 else if (GET_CODE (XEXP (src, 0)) == PLUS
19348 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19349 && low_register_operand (XEXP (XEXP (src, 0), 1),
19350 SImode)
19351 && COMPARISON_P (op1)
19352 && cc_register (XEXP (op1, 0), VOIDmode)
19353 && maybe_get_arm_condition_code (op1) == ARM_CS
19354 && XEXP (op1, 1) == const0_rtx)
19355 action = CONV;
19356 break;
19358 case MINUS:
19359 /* RSBS <Rd>,<Rn>,#0
19360 Not handled here: see NEG below. */
19361 /* SUBS <Rd>,<Rn>,#<imm3>
19362 SUBS <Rdn>,#<imm8>
19363 Not handled here: see PLUS above. */
19364 /* SUBS <Rd>,<Rn>,<Rm> */
19365 if (low_register_operand (op0, SImode)
19366 && low_register_operand (op1, SImode))
19367 action = CONV;
19368 break;
19370 case MULT:
19371 /* MULS <Rdm>,<Rn>,<Rdm>
19372 As an exception to the rule, this is only used
19373 when optimizing for size since MULS is slow on all
19374 known implementations. We do not even want to use
19375 MULS in cold code, if optimizing for speed, so we
19376 test the global flag here. */
19377 if (!optimize_size)
19378 break;
19379 /* Fall through. */
19380 case AND:
19381 case IOR:
19382 case XOR:
19383 /* ANDS <Rdn>,<Rm> */
19384 if (rtx_equal_p (dst, op0)
19385 && low_register_operand (op1, SImode))
19386 action = action_for_partial_flag_setting;
19387 else if (rtx_equal_p (dst, op1)
19388 && low_register_operand (op0, SImode))
19389 action = action_for_partial_flag_setting == SKIP
19390 ? SKIP : SWAP_CONV;
19391 break;
19393 case ASHIFTRT:
19394 case ASHIFT:
19395 case LSHIFTRT:
19396 /* ASRS <Rdn>,<Rm> */
19397 /* LSRS <Rdn>,<Rm> */
19398 /* LSLS <Rdn>,<Rm> */
19399 if (rtx_equal_p (dst, op0)
19400 && low_register_operand (op1, SImode))
19401 action = action_for_partial_flag_setting;
19402 /* ASRS <Rd>,<Rm>,#<imm5> */
19403 /* LSRS <Rd>,<Rm>,#<imm5> */
19404 /* LSLS <Rd>,<Rm>,#<imm5> */
19405 else if (low_register_operand (op0, SImode)
19406 && CONST_INT_P (op1)
19407 && IN_RANGE (INTVAL (op1), 0, 31))
19408 action = action_for_partial_flag_setting;
19409 break;
19411 case ROTATERT:
19412 /* RORS <Rdn>,<Rm> */
19413 if (rtx_equal_p (dst, op0)
19414 && low_register_operand (op1, SImode))
19415 action = action_for_partial_flag_setting;
19416 break;
19418 case NOT:
19419 /* MVNS <Rd>,<Rm> */
19420 if (low_register_operand (op0, SImode))
19421 action = action_for_partial_flag_setting;
19422 break;
19424 case NEG:
19425 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19426 if (low_register_operand (op0, SImode))
19427 action = CONV;
19428 break;
19430 case CONST_INT:
19431 /* MOVS <Rd>,#<imm8> */
19432 if (CONST_INT_P (src)
19433 && IN_RANGE (INTVAL (src), 0, 255))
19434 action = action_for_partial_flag_setting;
19435 break;
19437 case REG:
19438 /* MOVS and MOV<c> with registers have different
19439 encodings, so are not relevant here. */
19440 break;
19442 default:
19443 break;
19447 if (action != SKIP)
19449 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19450 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19451 rtvec vec;
19453 if (action == SWAP_CONV)
19455 src = copy_rtx (src);
19456 XEXP (src, 0) = op1;
19457 XEXP (src, 1) = op0;
19458 pat = gen_rtx_SET (dst, src);
19459 vec = gen_rtvec (2, pat, clobber);
19461 else /* action == CONV */
19462 vec = gen_rtvec (2, pat, clobber);
19464 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19465 INSN_CODE (insn) = -1;
19469 if (NONDEBUG_INSN_P (insn))
19470 df_simulate_one_insn_backwards (bb, insn, &live);
19474 CLEAR_REG_SET (&live);
19477 /* Gcc puts the pool in the wrong place for ARM, since we can only
19478 load addresses a limited distance around the pc. We do some
19479 special munging to move the constant pool values to the correct
19480 point in the code. */
19481 static void
19482 arm_reorg (void)
19484 rtx_insn *insn;
19485 HOST_WIDE_INT address = 0;
19486 Mfix * fix;
19488 if (use_cmse)
19489 cmse_nonsecure_call_inline_register_clear ();
19491 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19492 if (cfun->is_thunk)
19494 else if (TARGET_THUMB1)
19495 thumb1_reorg ();
19496 else if (TARGET_THUMB2)
19497 thumb2_reorg ();
19499 /* Ensure all insns that must be split have been split at this point.
19500 Otherwise, the pool placement code below may compute incorrect
19501 insn lengths. Note that when optimizing, all insns have already
19502 been split at this point. */
19503 if (!optimize)
19504 split_all_insns_noflow ();
19506 /* Make sure we do not attempt to create a literal pool even though it should
19507 no longer be necessary to create any. */
19508 if (arm_disable_literal_pool)
19509 return ;
19511 minipool_fix_head = minipool_fix_tail = NULL;
19513 /* The first insn must always be a note, or the code below won't
19514 scan it properly. */
19515 insn = get_insns ();
19516 gcc_assert (NOTE_P (insn));
19517 minipool_pad = 0;
19519 /* Scan all the insns and record the operands that will need fixing. */
19520 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19522 if (BARRIER_P (insn))
19523 push_minipool_barrier (insn, address);
19524 else if (INSN_P (insn))
19526 rtx_jump_table_data *table;
19528 note_invalid_constants (insn, address, true);
19529 address += get_attr_length (insn);
19531 /* If the insn is a vector jump, add the size of the table
19532 and skip the table. */
19533 if (tablejump_p (insn, NULL, &table))
19535 address += get_jump_table_size (table);
19536 insn = table;
19539 else if (LABEL_P (insn))
19540 /* Add the worst-case padding due to alignment. We don't add
19541 the _current_ padding because the minipool insertions
19542 themselves might change it. */
19543 address += get_label_padding (insn);
19546 fix = minipool_fix_head;
19548 /* Now scan the fixups and perform the required changes. */
19549 while (fix)
19551 Mfix * ftmp;
19552 Mfix * fdel;
19553 Mfix * last_added_fix;
19554 Mfix * last_barrier = NULL;
19555 Mfix * this_fix;
19557 /* Skip any further barriers before the next fix. */
19558 while (fix && BARRIER_P (fix->insn))
19559 fix = fix->next;
19561 /* No more fixes. */
19562 if (fix == NULL)
19563 break;
19565 last_added_fix = NULL;
19567 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19569 if (BARRIER_P (ftmp->insn))
19571 if (ftmp->address >= minipool_vector_head->max_address)
19572 break;
19574 last_barrier = ftmp;
19576 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19577 break;
19579 last_added_fix = ftmp; /* Keep track of the last fix added. */
19582 /* If we found a barrier, drop back to that; any fixes that we
19583 could have reached but come after the barrier will now go in
19584 the next mini-pool. */
19585 if (last_barrier != NULL)
19587 /* Reduce the refcount for those fixes that won't go into this
19588 pool after all. */
19589 for (fdel = last_barrier->next;
19590 fdel && fdel != ftmp;
19591 fdel = fdel->next)
19593 fdel->minipool->refcount--;
19594 fdel->minipool = NULL;
19597 ftmp = last_barrier;
19599 else
19601 /* ftmp is first fix that we can't fit into this pool and
19602 there no natural barriers that we could use. Insert a
19603 new barrier in the code somewhere between the previous
19604 fix and this one, and arrange to jump around it. */
19605 HOST_WIDE_INT max_address;
19607 /* The last item on the list of fixes must be a barrier, so
19608 we can never run off the end of the list of fixes without
19609 last_barrier being set. */
19610 gcc_assert (ftmp);
19612 max_address = minipool_vector_head->max_address;
19613 /* Check that there isn't another fix that is in range that
19614 we couldn't fit into this pool because the pool was
19615 already too large: we need to put the pool before such an
19616 instruction. The pool itself may come just after the
19617 fix because create_fix_barrier also allows space for a
19618 jump instruction. */
19619 if (ftmp->address < max_address)
19620 max_address = ftmp->address + 1;
19622 last_barrier = create_fix_barrier (last_added_fix, max_address);
19625 assign_minipool_offsets (last_barrier);
19627 while (ftmp)
19629 if (!BARRIER_P (ftmp->insn)
19630 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19631 == NULL))
19632 break;
19634 ftmp = ftmp->next;
19637 /* Scan over the fixes we have identified for this pool, fixing them
19638 up and adding the constants to the pool itself. */
19639 for (this_fix = fix; this_fix && ftmp != this_fix;
19640 this_fix = this_fix->next)
19641 if (!BARRIER_P (this_fix->insn))
19643 rtx addr
19644 = plus_constant (Pmode,
19645 gen_rtx_LABEL_REF (VOIDmode,
19646 minipool_vector_label),
19647 this_fix->minipool->offset);
19648 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19651 dump_minipool (last_barrier->insn);
19652 fix = ftmp;
19655 /* From now on we must synthesize any constants that we can't handle
19656 directly. This can happen if the RTL gets split during final
19657 instruction generation. */
19658 cfun->machine->after_arm_reorg = 1;
19660 /* Free the minipool memory. */
19661 obstack_free (&minipool_obstack, minipool_startobj);
19664 /* Routines to output assembly language. */
19666 /* Return string representation of passed in real value. */
19667 static const char *
19668 fp_const_from_val (REAL_VALUE_TYPE *r)
19670 if (!fp_consts_inited)
19671 init_fp_table ();
19673 gcc_assert (real_equal (r, &value_fp0));
19674 return "0";
19677 /* OPERANDS[0] is the entire list of insns that constitute pop,
19678 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19679 is in the list, UPDATE is true iff the list contains explicit
19680 update of base register. */
19681 void
19682 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19683 bool update)
19685 int i;
19686 char pattern[100];
19687 int offset;
19688 const char *conditional;
19689 int num_saves = XVECLEN (operands[0], 0);
19690 unsigned int regno;
19691 unsigned int regno_base = REGNO (operands[1]);
19692 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19694 offset = 0;
19695 offset += update ? 1 : 0;
19696 offset += return_pc ? 1 : 0;
19698 /* Is the base register in the list? */
19699 for (i = offset; i < num_saves; i++)
19701 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19702 /* If SP is in the list, then the base register must be SP. */
19703 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19704 /* If base register is in the list, there must be no explicit update. */
19705 if (regno == regno_base)
19706 gcc_assert (!update);
19709 conditional = reverse ? "%?%D0" : "%?%d0";
19710 /* Can't use POP if returning from an interrupt. */
19711 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19712 sprintf (pattern, "pop%s\t{", conditional);
19713 else
19715 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19716 It's just a convention, their semantics are identical. */
19717 if (regno_base == SP_REGNUM)
19718 sprintf (pattern, "ldmfd%s\t", conditional);
19719 else if (update)
19720 sprintf (pattern, "ldmia%s\t", conditional);
19721 else
19722 sprintf (pattern, "ldm%s\t", conditional);
19724 strcat (pattern, reg_names[regno_base]);
19725 if (update)
19726 strcat (pattern, "!, {");
19727 else
19728 strcat (pattern, ", {");
19731 /* Output the first destination register. */
19732 strcat (pattern,
19733 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19735 /* Output the rest of the destination registers. */
19736 for (i = offset + 1; i < num_saves; i++)
19738 strcat (pattern, ", ");
19739 strcat (pattern,
19740 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19743 strcat (pattern, "}");
19745 if (interrupt_p && return_pc)
19746 strcat (pattern, "^");
19748 output_asm_insn (pattern, &cond);
19752 /* Output the assembly for a store multiple. */
19754 const char *
19755 vfp_output_vstmd (rtx * operands)
19757 char pattern[100];
19758 int p;
19759 int base;
19760 int i;
19761 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19762 ? XEXP (operands[0], 0)
19763 : XEXP (XEXP (operands[0], 0), 0);
19764 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19766 if (push_p)
19767 strcpy (pattern, "vpush%?.64\t{%P1");
19768 else
19769 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19771 p = strlen (pattern);
19773 gcc_assert (REG_P (operands[1]));
19775 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19776 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19778 p += sprintf (&pattern[p], ", d%d", base + i);
19780 strcpy (&pattern[p], "}");
19782 output_asm_insn (pattern, operands);
19783 return "";
19787 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19788 number of bytes pushed. */
19790 static int
19791 vfp_emit_fstmd (int base_reg, int count)
19793 rtx par;
19794 rtx dwarf;
19795 rtx tmp, reg;
19796 int i;
19798 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19799 register pairs are stored by a store multiple insn. We avoid this
19800 by pushing an extra pair. */
19801 if (count == 2 && !arm_arch6)
19803 if (base_reg == LAST_VFP_REGNUM - 3)
19804 base_reg -= 2;
19805 count++;
19808 /* FSTMD may not store more than 16 doubleword registers at once. Split
19809 larger stores into multiple parts (up to a maximum of two, in
19810 practice). */
19811 if (count > 16)
19813 int saved;
19814 /* NOTE: base_reg is an internal register number, so each D register
19815 counts as 2. */
19816 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19817 saved += vfp_emit_fstmd (base_reg, 16);
19818 return saved;
19821 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19822 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19824 reg = gen_rtx_REG (DFmode, base_reg);
19825 base_reg += 2;
19827 XVECEXP (par, 0, 0)
19828 = gen_rtx_SET (gen_frame_mem
19829 (BLKmode,
19830 gen_rtx_PRE_MODIFY (Pmode,
19831 stack_pointer_rtx,
19832 plus_constant
19833 (Pmode, stack_pointer_rtx,
19834 - (count * 8)))
19836 gen_rtx_UNSPEC (BLKmode,
19837 gen_rtvec (1, reg),
19838 UNSPEC_PUSH_MULT));
19840 tmp = gen_rtx_SET (stack_pointer_rtx,
19841 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19842 RTX_FRAME_RELATED_P (tmp) = 1;
19843 XVECEXP (dwarf, 0, 0) = tmp;
19845 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19846 RTX_FRAME_RELATED_P (tmp) = 1;
19847 XVECEXP (dwarf, 0, 1) = tmp;
19849 for (i = 1; i < count; i++)
19851 reg = gen_rtx_REG (DFmode, base_reg);
19852 base_reg += 2;
19853 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19855 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19856 plus_constant (Pmode,
19857 stack_pointer_rtx,
19858 i * 8)),
19859 reg);
19860 RTX_FRAME_RELATED_P (tmp) = 1;
19861 XVECEXP (dwarf, 0, i + 1) = tmp;
19864 par = emit_insn (par);
19865 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19866 RTX_FRAME_RELATED_P (par) = 1;
19868 return count * 8;
19871 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19872 has the cmse_nonsecure_call attribute and returns false otherwise. */
19874 bool
19875 detect_cmse_nonsecure_call (tree addr)
19877 if (!addr)
19878 return FALSE;
19880 tree fntype = TREE_TYPE (addr);
19881 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19882 TYPE_ATTRIBUTES (fntype)))
19883 return TRUE;
19884 return FALSE;
19888 /* Emit a call instruction with pattern PAT. ADDR is the address of
19889 the call target. */
19891 void
19892 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19894 rtx insn;
19896 insn = emit_call_insn (pat);
19898 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19899 If the call might use such an entry, add a use of the PIC register
19900 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19901 if (TARGET_VXWORKS_RTP
19902 && flag_pic
19903 && !sibcall
19904 && SYMBOL_REF_P (addr)
19905 && (SYMBOL_REF_DECL (addr)
19906 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19907 : !SYMBOL_REF_LOCAL_P (addr)))
19909 require_pic_register (NULL_RTX, false /*compute_now*/);
19910 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19913 if (TARGET_FDPIC)
19915 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19916 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19919 if (TARGET_AAPCS_BASED)
19921 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19922 linker. We need to add an IP clobber to allow setting
19923 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19924 is not needed since it's a fixed register. */
19925 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19926 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19930 /* Output a 'call' insn. */
19931 const char *
19932 output_call (rtx *operands)
19934 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19936 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19937 if (REGNO (operands[0]) == LR_REGNUM)
19939 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19940 output_asm_insn ("mov%?\t%0, %|lr", operands);
19943 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19945 if (TARGET_INTERWORK || arm_arch4t)
19946 output_asm_insn ("bx%?\t%0", operands);
19947 else
19948 output_asm_insn ("mov%?\t%|pc, %0", operands);
19950 return "";
19953 /* Output a move from arm registers to arm registers of a long double
19954 OPERANDS[0] is the destination.
19955 OPERANDS[1] is the source. */
19956 const char *
19957 output_mov_long_double_arm_from_arm (rtx *operands)
19959 /* We have to be careful here because the two might overlap. */
19960 int dest_start = REGNO (operands[0]);
19961 int src_start = REGNO (operands[1]);
19962 rtx ops[2];
19963 int i;
19965 if (dest_start < src_start)
19967 for (i = 0; i < 3; i++)
19969 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19970 ops[1] = gen_rtx_REG (SImode, src_start + i);
19971 output_asm_insn ("mov%?\t%0, %1", ops);
19974 else
19976 for (i = 2; i >= 0; i--)
19978 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19979 ops[1] = gen_rtx_REG (SImode, src_start + i);
19980 output_asm_insn ("mov%?\t%0, %1", ops);
19984 return "";
19987 void
19988 arm_emit_movpair (rtx dest, rtx src)
19990 /* If the src is an immediate, simplify it. */
19991 if (CONST_INT_P (src))
19993 HOST_WIDE_INT val = INTVAL (src);
19994 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19995 if ((val >> 16) & 0x0000ffff)
19997 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19998 GEN_INT (16)),
19999 GEN_INT ((val >> 16) & 0x0000ffff));
20000 rtx_insn *insn = get_last_insn ();
20001 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20003 return;
20005 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20006 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20007 rtx_insn *insn = get_last_insn ();
20008 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20011 /* Output a move between double words. It must be REG<-MEM
20012 or MEM<-REG. */
20013 const char *
20014 output_move_double (rtx *operands, bool emit, int *count)
20016 enum rtx_code code0 = GET_CODE (operands[0]);
20017 enum rtx_code code1 = GET_CODE (operands[1]);
20018 rtx otherops[3];
20019 if (count)
20020 *count = 1;
20022 /* The only case when this might happen is when
20023 you are looking at the length of a DImode instruction
20024 that has an invalid constant in it. */
20025 if (code0 == REG && code1 != MEM)
20027 gcc_assert (!emit);
20028 *count = 2;
20029 return "";
20032 if (code0 == REG)
20034 unsigned int reg0 = REGNO (operands[0]);
20035 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20037 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20039 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20041 switch (GET_CODE (XEXP (operands[1], 0)))
20043 case REG:
20045 if (emit)
20047 if (can_ldrd
20048 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20049 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20050 else
20051 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20053 break;
20055 case PRE_INC:
20056 gcc_assert (can_ldrd);
20057 if (emit)
20058 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20059 break;
20061 case PRE_DEC:
20062 if (emit)
20064 if (can_ldrd)
20065 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20066 else
20067 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20069 break;
20071 case POST_INC:
20072 if (emit)
20074 if (can_ldrd)
20075 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20076 else
20077 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20079 break;
20081 case POST_DEC:
20082 gcc_assert (can_ldrd);
20083 if (emit)
20084 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20085 break;
20087 case PRE_MODIFY:
20088 case POST_MODIFY:
20089 /* Autoicrement addressing modes should never have overlapping
20090 base and destination registers, and overlapping index registers
20091 are already prohibited, so this doesn't need to worry about
20092 fix_cm3_ldrd. */
20093 otherops[0] = operands[0];
20094 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20095 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20097 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20099 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20101 /* Registers overlap so split out the increment. */
20102 if (emit)
20104 gcc_assert (can_ldrd);
20105 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20106 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20108 if (count)
20109 *count = 2;
20111 else
20113 /* Use a single insn if we can.
20114 FIXME: IWMMXT allows offsets larger than ldrd can
20115 handle, fix these up with a pair of ldr. */
20116 if (can_ldrd
20117 && (TARGET_THUMB2
20118 || !CONST_INT_P (otherops[2])
20119 || (INTVAL (otherops[2]) > -256
20120 && INTVAL (otherops[2]) < 256)))
20122 if (emit)
20123 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20125 else
20127 if (emit)
20129 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20130 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20132 if (count)
20133 *count = 2;
20138 else
20140 /* Use a single insn if we can.
20141 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20142 fix these up with a pair of ldr. */
20143 if (can_ldrd
20144 && (TARGET_THUMB2
20145 || !CONST_INT_P (otherops[2])
20146 || (INTVAL (otherops[2]) > -256
20147 && INTVAL (otherops[2]) < 256)))
20149 if (emit)
20150 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20152 else
20154 if (emit)
20156 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20157 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20159 if (count)
20160 *count = 2;
20163 break;
20165 case LABEL_REF:
20166 case CONST:
20167 /* We might be able to use ldrd %0, %1 here. However the range is
20168 different to ldr/adr, and it is broken on some ARMv7-M
20169 implementations. */
20170 /* Use the second register of the pair to avoid problematic
20171 overlap. */
20172 otherops[1] = operands[1];
20173 if (emit)
20174 output_asm_insn ("adr%?\t%0, %1", otherops);
20175 operands[1] = otherops[0];
20176 if (emit)
20178 if (can_ldrd)
20179 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20180 else
20181 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20184 if (count)
20185 *count = 2;
20186 break;
20188 /* ??? This needs checking for thumb2. */
20189 default:
20190 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20191 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20193 otherops[0] = operands[0];
20194 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20195 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20197 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20199 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20201 switch ((int) INTVAL (otherops[2]))
20203 case -8:
20204 if (emit)
20205 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20206 return "";
20207 case -4:
20208 if (TARGET_THUMB2)
20209 break;
20210 if (emit)
20211 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20212 return "";
20213 case 4:
20214 if (TARGET_THUMB2)
20215 break;
20216 if (emit)
20217 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20218 return "";
20221 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20222 operands[1] = otherops[0];
20223 if (can_ldrd
20224 && (REG_P (otherops[2])
20225 || TARGET_THUMB2
20226 || (CONST_INT_P (otherops[2])
20227 && INTVAL (otherops[2]) > -256
20228 && INTVAL (otherops[2]) < 256)))
20230 if (reg_overlap_mentioned_p (operands[0],
20231 otherops[2]))
20233 /* Swap base and index registers over to
20234 avoid a conflict. */
20235 std::swap (otherops[1], otherops[2]);
20237 /* If both registers conflict, it will usually
20238 have been fixed by a splitter. */
20239 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20240 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20242 if (emit)
20244 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20245 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20247 if (count)
20248 *count = 2;
20250 else
20252 otherops[0] = operands[0];
20253 if (emit)
20254 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20256 return "";
20259 if (CONST_INT_P (otherops[2]))
20261 if (emit)
20263 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20264 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20265 else
20266 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20269 else
20271 if (emit)
20272 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20275 else
20277 if (emit)
20278 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20281 if (count)
20282 *count = 2;
20284 if (can_ldrd)
20285 return "ldrd%?\t%0, [%1]";
20287 return "ldmia%?\t%1, %M0";
20289 else
20291 otherops[1] = adjust_address (operands[1], SImode, 4);
20292 /* Take care of overlapping base/data reg. */
20293 if (reg_mentioned_p (operands[0], operands[1]))
20295 if (emit)
20297 output_asm_insn ("ldr%?\t%0, %1", otherops);
20298 output_asm_insn ("ldr%?\t%0, %1", operands);
20300 if (count)
20301 *count = 2;
20304 else
20306 if (emit)
20308 output_asm_insn ("ldr%?\t%0, %1", operands);
20309 output_asm_insn ("ldr%?\t%0, %1", otherops);
20311 if (count)
20312 *count = 2;
20317 else
20319 /* Constraints should ensure this. */
20320 gcc_assert (code0 == MEM && code1 == REG);
20321 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20322 || (TARGET_ARM && TARGET_LDRD));
20324 /* For TARGET_ARM the first source register of an STRD
20325 must be even. This is usually the case for double-word
20326 values but user assembly constraints can force an odd
20327 starting register. */
20328 bool allow_strd = TARGET_LDRD
20329 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20330 switch (GET_CODE (XEXP (operands[0], 0)))
20332 case REG:
20333 if (emit)
20335 if (allow_strd)
20336 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20337 else
20338 output_asm_insn ("stm%?\t%m0, %M1", operands);
20340 break;
20342 case PRE_INC:
20343 gcc_assert (allow_strd);
20344 if (emit)
20345 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20346 break;
20348 case PRE_DEC:
20349 if (emit)
20351 if (allow_strd)
20352 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20353 else
20354 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20356 break;
20358 case POST_INC:
20359 if (emit)
20361 if (allow_strd)
20362 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20363 else
20364 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20366 break;
20368 case POST_DEC:
20369 gcc_assert (allow_strd);
20370 if (emit)
20371 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20372 break;
20374 case PRE_MODIFY:
20375 case POST_MODIFY:
20376 otherops[0] = operands[1];
20377 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20378 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20380 /* IWMMXT allows offsets larger than strd can handle,
20381 fix these up with a pair of str. */
20382 if (!TARGET_THUMB2
20383 && CONST_INT_P (otherops[2])
20384 && (INTVAL(otherops[2]) <= -256
20385 || INTVAL(otherops[2]) >= 256))
20387 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20389 if (emit)
20391 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20392 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20394 if (count)
20395 *count = 2;
20397 else
20399 if (emit)
20401 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20402 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20404 if (count)
20405 *count = 2;
20408 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20410 if (emit)
20411 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20413 else
20415 if (emit)
20416 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20418 break;
20420 case PLUS:
20421 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20422 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20424 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20426 case -8:
20427 if (emit)
20428 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20429 return "";
20431 case -4:
20432 if (TARGET_THUMB2)
20433 break;
20434 if (emit)
20435 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20436 return "";
20438 case 4:
20439 if (TARGET_THUMB2)
20440 break;
20441 if (emit)
20442 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20443 return "";
20446 if (allow_strd
20447 && (REG_P (otherops[2])
20448 || TARGET_THUMB2
20449 || (CONST_INT_P (otherops[2])
20450 && INTVAL (otherops[2]) > -256
20451 && INTVAL (otherops[2]) < 256)))
20453 otherops[0] = operands[1];
20454 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20455 if (emit)
20456 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20457 return "";
20459 /* Fall through */
20461 default:
20462 otherops[0] = adjust_address (operands[0], SImode, 4);
20463 otherops[1] = operands[1];
20464 if (emit)
20466 output_asm_insn ("str%?\t%1, %0", operands);
20467 output_asm_insn ("str%?\t%H1, %0", otherops);
20469 if (count)
20470 *count = 2;
20474 return "";
20477 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20478 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20480 const char *
20481 output_move_quad (rtx *operands)
20483 if (REG_P (operands[0]))
20485 /* Load, or reg->reg move. */
20487 if (MEM_P (operands[1]))
20489 switch (GET_CODE (XEXP (operands[1], 0)))
20491 case REG:
20492 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20493 break;
20495 case LABEL_REF:
20496 case CONST:
20497 output_asm_insn ("adr%?\t%0, %1", operands);
20498 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20499 break;
20501 default:
20502 gcc_unreachable ();
20505 else
20507 rtx ops[2];
20508 int dest, src, i;
20510 gcc_assert (REG_P (operands[1]));
20512 dest = REGNO (operands[0]);
20513 src = REGNO (operands[1]);
20515 /* This seems pretty dumb, but hopefully GCC won't try to do it
20516 very often. */
20517 if (dest < src)
20518 for (i = 0; i < 4; i++)
20520 ops[0] = gen_rtx_REG (SImode, dest + i);
20521 ops[1] = gen_rtx_REG (SImode, src + i);
20522 output_asm_insn ("mov%?\t%0, %1", ops);
20524 else
20525 for (i = 3; i >= 0; i--)
20527 ops[0] = gen_rtx_REG (SImode, dest + i);
20528 ops[1] = gen_rtx_REG (SImode, src + i);
20529 output_asm_insn ("mov%?\t%0, %1", ops);
20533 else
20535 gcc_assert (MEM_P (operands[0]));
20536 gcc_assert (REG_P (operands[1]));
20537 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20539 switch (GET_CODE (XEXP (operands[0], 0)))
20541 case REG:
20542 output_asm_insn ("stm%?\t%m0, %M1", operands);
20543 break;
20545 default:
20546 gcc_unreachable ();
20550 return "";
20553 /* Output a VFP load or store instruction. */
20555 const char *
20556 output_move_vfp (rtx *operands)
20558 rtx reg, mem, addr, ops[2];
20559 int load = REG_P (operands[0]);
20560 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20561 int sp = (!TARGET_VFP_FP16INST
20562 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20563 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20564 const char *templ;
20565 char buff[50];
20566 machine_mode mode;
20568 reg = operands[!load];
20569 mem = operands[load];
20571 mode = GET_MODE (reg);
20573 gcc_assert (REG_P (reg));
20574 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20575 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20576 || mode == SFmode
20577 || mode == DFmode
20578 || mode == HImode
20579 || mode == SImode
20580 || mode == DImode
20581 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20582 gcc_assert (MEM_P (mem));
20584 addr = XEXP (mem, 0);
20586 switch (GET_CODE (addr))
20588 case PRE_DEC:
20589 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20590 ops[0] = XEXP (addr, 0);
20591 ops[1] = reg;
20592 break;
20594 case POST_INC:
20595 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20596 ops[0] = XEXP (addr, 0);
20597 ops[1] = reg;
20598 break;
20600 default:
20601 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20602 ops[0] = reg;
20603 ops[1] = mem;
20604 break;
20607 sprintf (buff, templ,
20608 load ? "ld" : "st",
20609 dp ? "64" : sp ? "32" : "16",
20610 dp ? "P" : "",
20611 integer_p ? "\t%@ int" : "");
20612 output_asm_insn (buff, ops);
20614 return "";
20617 /* Output a Neon double-word or quad-word load or store, or a load
20618 or store for larger structure modes.
20620 WARNING: The ordering of elements is weird in big-endian mode,
20621 because the EABI requires that vectors stored in memory appear
20622 as though they were stored by a VSTM, as required by the EABI.
20623 GCC RTL defines element ordering based on in-memory order.
20624 This can be different from the architectural ordering of elements
20625 within a NEON register. The intrinsics defined in arm_neon.h use the
20626 NEON register element ordering, not the GCC RTL element ordering.
20628 For example, the in-memory ordering of a big-endian a quadword
20629 vector with 16-bit elements when stored from register pair {d0,d1}
20630 will be (lowest address first, d0[N] is NEON register element N):
20632 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20634 When necessary, quadword registers (dN, dN+1) are moved to ARM
20635 registers from rN in the order:
20637 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20639 So that STM/LDM can be used on vectors in ARM registers, and the
20640 same memory layout will result as if VSTM/VLDM were used.
20642 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20643 possible, which allows use of appropriate alignment tags.
20644 Note that the choice of "64" is independent of the actual vector
20645 element size; this size simply ensures that the behavior is
20646 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20648 Due to limitations of those instructions, use of VST1.64/VLD1.64
20649 is not possible if:
20650 - the address contains PRE_DEC, or
20651 - the mode refers to more than 4 double-word registers
20653 In those cases, it would be possible to replace VSTM/VLDM by a
20654 sequence of instructions; this is not currently implemented since
20655 this is not certain to actually improve performance. */
20657 const char *
20658 output_move_neon (rtx *operands)
20660 rtx reg, mem, addr, ops[2];
20661 int regno, nregs, load = REG_P (operands[0]);
20662 const char *templ;
20663 char buff[50];
20664 machine_mode mode;
20666 reg = operands[!load];
20667 mem = operands[load];
20669 mode = GET_MODE (reg);
20671 gcc_assert (REG_P (reg));
20672 regno = REGNO (reg);
20673 nregs = REG_NREGS (reg) / 2;
20674 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20675 || NEON_REGNO_OK_FOR_QUAD (regno));
20676 gcc_assert (VALID_NEON_DREG_MODE (mode)
20677 || VALID_NEON_QREG_MODE (mode)
20678 || VALID_NEON_STRUCT_MODE (mode));
20679 gcc_assert (MEM_P (mem));
20681 addr = XEXP (mem, 0);
20683 /* Strip off const from addresses like (const (plus (...))). */
20684 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20685 addr = XEXP (addr, 0);
20687 switch (GET_CODE (addr))
20689 case POST_INC:
20690 /* We have to use vldm / vstm for too-large modes. */
20691 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20693 templ = "v%smia%%?\t%%0!, %%h1";
20694 ops[0] = XEXP (addr, 0);
20696 else
20698 templ = "v%s1.64\t%%h1, %%A0";
20699 ops[0] = mem;
20701 ops[1] = reg;
20702 break;
20704 case PRE_DEC:
20705 /* We have to use vldm / vstm in this case, since there is no
20706 pre-decrement form of the vld1 / vst1 instructions. */
20707 templ = "v%smdb%%?\t%%0!, %%h1";
20708 ops[0] = XEXP (addr, 0);
20709 ops[1] = reg;
20710 break;
20712 case POST_MODIFY:
20713 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20714 gcc_unreachable ();
20716 case REG:
20717 /* We have to use vldm / vstm for too-large modes. */
20718 if (nregs > 1)
20720 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20721 templ = "v%smia%%?\t%%m0, %%h1";
20722 else
20723 templ = "v%s1.64\t%%h1, %%A0";
20725 ops[0] = mem;
20726 ops[1] = reg;
20727 break;
20729 /* Fall through. */
20730 case PLUS:
20731 if (GET_CODE (addr) == PLUS)
20732 addr = XEXP (addr, 0);
20733 /* Fall through. */
20734 case LABEL_REF:
20736 int i;
20737 int overlap = -1;
20738 for (i = 0; i < nregs; i++)
20740 /* We're only using DImode here because it's a convenient
20741 size. */
20742 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20743 ops[1] = adjust_address (mem, DImode, 8 * i);
20744 if (reg_overlap_mentioned_p (ops[0], mem))
20746 gcc_assert (overlap == -1);
20747 overlap = i;
20749 else
20751 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20752 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20753 else
20754 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20755 output_asm_insn (buff, ops);
20758 if (overlap != -1)
20760 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20761 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20762 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20763 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20764 else
20765 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20766 output_asm_insn (buff, ops);
20769 return "";
20772 default:
20773 gcc_unreachable ();
20776 sprintf (buff, templ, load ? "ld" : "st");
20777 output_asm_insn (buff, ops);
20779 return "";
20782 /* Compute and return the length of neon_mov<mode>, where <mode> is
20783 one of VSTRUCT modes: EI, OI, CI or XI. */
20785 arm_attr_length_move_neon (rtx_insn *insn)
20787 rtx reg, mem, addr;
20788 int load;
20789 machine_mode mode;
20791 extract_insn_cached (insn);
20793 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20795 mode = GET_MODE (recog_data.operand[0]);
20796 switch (mode)
20798 case E_EImode:
20799 case E_OImode:
20800 return 8;
20801 case E_CImode:
20802 return 12;
20803 case E_XImode:
20804 return 16;
20805 default:
20806 gcc_unreachable ();
20810 load = REG_P (recog_data.operand[0]);
20811 reg = recog_data.operand[!load];
20812 mem = recog_data.operand[load];
20814 gcc_assert (MEM_P (mem));
20816 addr = XEXP (mem, 0);
20818 /* Strip off const from addresses like (const (plus (...))). */
20819 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20820 addr = XEXP (addr, 0);
20822 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20824 int insns = REG_NREGS (reg) / 2;
20825 return insns * 4;
20827 else
20828 return 4;
20831 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20832 return zero. */
20835 arm_address_offset_is_imm (rtx_insn *insn)
20837 rtx mem, addr;
20839 extract_insn_cached (insn);
20841 if (REG_P (recog_data.operand[0]))
20842 return 0;
20844 mem = recog_data.operand[0];
20846 gcc_assert (MEM_P (mem));
20848 addr = XEXP (mem, 0);
20850 if (REG_P (addr)
20851 || (GET_CODE (addr) == PLUS
20852 && REG_P (XEXP (addr, 0))
20853 && CONST_INT_P (XEXP (addr, 1))))
20854 return 1;
20855 else
20856 return 0;
20859 /* Output an ADD r, s, #n where n may be too big for one instruction.
20860 If adding zero to one register, output nothing. */
20861 const char *
20862 output_add_immediate (rtx *operands)
20864 HOST_WIDE_INT n = INTVAL (operands[2]);
20866 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20868 if (n < 0)
20869 output_multi_immediate (operands,
20870 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20871 -n);
20872 else
20873 output_multi_immediate (operands,
20874 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20878 return "";
20881 /* Output a multiple immediate operation.
20882 OPERANDS is the vector of operands referred to in the output patterns.
20883 INSTR1 is the output pattern to use for the first constant.
20884 INSTR2 is the output pattern to use for subsequent constants.
20885 IMMED_OP is the index of the constant slot in OPERANDS.
20886 N is the constant value. */
20887 static const char *
20888 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20889 int immed_op, HOST_WIDE_INT n)
20891 #if HOST_BITS_PER_WIDE_INT > 32
20892 n &= 0xffffffff;
20893 #endif
20895 if (n == 0)
20897 /* Quick and easy output. */
20898 operands[immed_op] = const0_rtx;
20899 output_asm_insn (instr1, operands);
20901 else
20903 int i;
20904 const char * instr = instr1;
20906 /* Note that n is never zero here (which would give no output). */
20907 for (i = 0; i < 32; i += 2)
20909 if (n & (3 << i))
20911 operands[immed_op] = GEN_INT (n & (255 << i));
20912 output_asm_insn (instr, operands);
20913 instr = instr2;
20914 i += 6;
20919 return "";
20922 /* Return the name of a shifter operation. */
20923 static const char *
20924 arm_shift_nmem(enum rtx_code code)
20926 switch (code)
20928 case ASHIFT:
20929 return ARM_LSL_NAME;
20931 case ASHIFTRT:
20932 return "asr";
20934 case LSHIFTRT:
20935 return "lsr";
20937 case ROTATERT:
20938 return "ror";
20940 default:
20941 abort();
20945 /* Return the appropriate ARM instruction for the operation code.
20946 The returned result should not be overwritten. OP is the rtx of the
20947 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20948 was shifted. */
20949 const char *
20950 arithmetic_instr (rtx op, int shift_first_arg)
20952 switch (GET_CODE (op))
20954 case PLUS:
20955 return "add";
20957 case MINUS:
20958 return shift_first_arg ? "rsb" : "sub";
20960 case IOR:
20961 return "orr";
20963 case XOR:
20964 return "eor";
20966 case AND:
20967 return "and";
20969 case ASHIFT:
20970 case ASHIFTRT:
20971 case LSHIFTRT:
20972 case ROTATERT:
20973 return arm_shift_nmem(GET_CODE(op));
20975 default:
20976 gcc_unreachable ();
20980 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20981 for the operation code. The returned result should not be overwritten.
20982 OP is the rtx code of the shift.
20983 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20984 shift. */
20985 static const char *
20986 shift_op (rtx op, HOST_WIDE_INT *amountp)
20988 const char * mnem;
20989 enum rtx_code code = GET_CODE (op);
20991 switch (code)
20993 case ROTATE:
20994 if (!CONST_INT_P (XEXP (op, 1)))
20996 output_operand_lossage ("invalid shift operand");
20997 return NULL;
21000 code = ROTATERT;
21001 *amountp = 32 - INTVAL (XEXP (op, 1));
21002 mnem = "ror";
21003 break;
21005 case ASHIFT:
21006 case ASHIFTRT:
21007 case LSHIFTRT:
21008 case ROTATERT:
21009 mnem = arm_shift_nmem(code);
21010 if (CONST_INT_P (XEXP (op, 1)))
21012 *amountp = INTVAL (XEXP (op, 1));
21014 else if (REG_P (XEXP (op, 1)))
21016 *amountp = -1;
21017 return mnem;
21019 else
21021 output_operand_lossage ("invalid shift operand");
21022 return NULL;
21024 break;
21026 case MULT:
21027 /* We never have to worry about the amount being other than a
21028 power of 2, since this case can never be reloaded from a reg. */
21029 if (!CONST_INT_P (XEXP (op, 1)))
21031 output_operand_lossage ("invalid shift operand");
21032 return NULL;
21035 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21037 /* Amount must be a power of two. */
21038 if (*amountp & (*amountp - 1))
21040 output_operand_lossage ("invalid shift operand");
21041 return NULL;
21044 *amountp = exact_log2 (*amountp);
21045 gcc_assert (IN_RANGE (*amountp, 0, 31));
21046 return ARM_LSL_NAME;
21048 default:
21049 output_operand_lossage ("invalid shift operand");
21050 return NULL;
21053 /* This is not 100% correct, but follows from the desire to merge
21054 multiplication by a power of 2 with the recognizer for a
21055 shift. >=32 is not a valid shift for "lsl", so we must try and
21056 output a shift that produces the correct arithmetical result.
21057 Using lsr #32 is identical except for the fact that the carry bit
21058 is not set correctly if we set the flags; but we never use the
21059 carry bit from such an operation, so we can ignore that. */
21060 if (code == ROTATERT)
21061 /* Rotate is just modulo 32. */
21062 *amountp &= 31;
21063 else if (*amountp != (*amountp & 31))
21065 if (code == ASHIFT)
21066 mnem = "lsr";
21067 *amountp = 32;
21070 /* Shifts of 0 are no-ops. */
21071 if (*amountp == 0)
21072 return NULL;
21074 return mnem;
21077 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21078 because /bin/as is horribly restrictive. The judgement about
21079 whether or not each character is 'printable' (and can be output as
21080 is) or not (and must be printed with an octal escape) must be made
21081 with reference to the *host* character set -- the situation is
21082 similar to that discussed in the comments above pp_c_char in
21083 c-pretty-print.cc. */
21085 #define MAX_ASCII_LEN 51
21087 void
21088 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21090 int i;
21091 int len_so_far = 0;
21093 fputs ("\t.ascii\t\"", stream);
21095 for (i = 0; i < len; i++)
21097 int c = p[i];
21099 if (len_so_far >= MAX_ASCII_LEN)
21101 fputs ("\"\n\t.ascii\t\"", stream);
21102 len_so_far = 0;
21105 if (ISPRINT (c))
21107 if (c == '\\' || c == '\"')
21109 putc ('\\', stream);
21110 len_so_far++;
21112 putc (c, stream);
21113 len_so_far++;
21115 else
21117 fprintf (stream, "\\%03o", c);
21118 len_so_far += 4;
21122 fputs ("\"\n", stream);
21126 /* Compute the register save mask for registers 0 through 12
21127 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21129 static unsigned long
21130 arm_compute_save_reg0_reg12_mask (void)
21132 unsigned long func_type = arm_current_func_type ();
21133 unsigned long save_reg_mask = 0;
21134 unsigned int reg;
21136 if (IS_INTERRUPT (func_type))
21138 unsigned int max_reg;
21139 /* Interrupt functions must not corrupt any registers,
21140 even call clobbered ones. If this is a leaf function
21141 we can just examine the registers used by the RTL, but
21142 otherwise we have to assume that whatever function is
21143 called might clobber anything, and so we have to save
21144 all the call-clobbered registers as well. */
21145 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21146 /* FIQ handlers have registers r8 - r12 banked, so
21147 we only need to check r0 - r7, Normal ISRs only
21148 bank r14 and r15, so we must check up to r12.
21149 r13 is the stack pointer which is always preserved,
21150 so we do not need to consider it here. */
21151 max_reg = 7;
21152 else
21153 max_reg = 12;
21155 for (reg = 0; reg <= max_reg; reg++)
21156 if (reg_needs_saving_p (reg))
21157 save_reg_mask |= (1 << reg);
21159 /* Also save the pic base register if necessary. */
21160 if (PIC_REGISTER_MAY_NEED_SAVING
21161 && crtl->uses_pic_offset_table)
21162 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21164 else if (IS_VOLATILE(func_type))
21166 /* For noreturn functions we historically omitted register saves
21167 altogether. However this really messes up debugging. As a
21168 compromise save just the frame pointers. Combined with the link
21169 register saved elsewhere this should be sufficient to get
21170 a backtrace. */
21171 if (frame_pointer_needed)
21172 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21173 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21174 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21175 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21176 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21178 else
21180 /* In the normal case we only need to save those registers
21181 which are call saved and which are used by this function. */
21182 for (reg = 0; reg <= 11; reg++)
21183 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21184 save_reg_mask |= (1 << reg);
21186 /* Handle the frame pointer as a special case. */
21187 if (frame_pointer_needed)
21188 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21190 /* If we aren't loading the PIC register,
21191 don't stack it even though it may be live. */
21192 if (PIC_REGISTER_MAY_NEED_SAVING
21193 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21194 || crtl->uses_pic_offset_table))
21195 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21197 /* The prologue will copy SP into R0, so save it. */
21198 if (IS_STACKALIGN (func_type))
21199 save_reg_mask |= 1;
21202 /* Save registers so the exception handler can modify them. */
21203 if (crtl->calls_eh_return)
21205 unsigned int i;
21207 for (i = 0; ; i++)
21209 reg = EH_RETURN_DATA_REGNO (i);
21210 if (reg == INVALID_REGNUM)
21211 break;
21212 save_reg_mask |= 1 << reg;
21216 return save_reg_mask;
21219 /* Return true if r3 is live at the start of the function. */
21221 static bool
21222 arm_r3_live_at_start_p (void)
21224 /* Just look at cfg info, which is still close enough to correct at this
21225 point. This gives false positives for broken functions that might use
21226 uninitialized data that happens to be allocated in r3, but who cares? */
21227 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21230 /* Compute the number of bytes used to store the static chain register on the
21231 stack, above the stack frame. We need to know this accurately to get the
21232 alignment of the rest of the stack frame correct. */
21234 static int
21235 arm_compute_static_chain_stack_bytes (void)
21237 /* Once the value is updated from the init value of -1, do not
21238 re-compute. */
21239 if (cfun->machine->static_chain_stack_bytes != -1)
21240 return cfun->machine->static_chain_stack_bytes;
21242 /* See the defining assertion in arm_expand_prologue. */
21243 if (IS_NESTED (arm_current_func_type ())
21244 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21245 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21246 || flag_stack_clash_protection)
21247 && !df_regs_ever_live_p (LR_REGNUM)))
21248 && arm_r3_live_at_start_p ()
21249 && crtl->args.pretend_args_size == 0)
21250 return 4;
21252 return 0;
21255 /* Compute a bit mask of which core registers need to be
21256 saved on the stack for the current function.
21257 This is used by arm_compute_frame_layout, which may add extra registers. */
21259 static unsigned long
21260 arm_compute_save_core_reg_mask (void)
21262 unsigned int save_reg_mask = 0;
21263 unsigned long func_type = arm_current_func_type ();
21264 unsigned int reg;
21266 if (IS_NAKED (func_type))
21267 /* This should never really happen. */
21268 return 0;
21270 /* If we are creating a stack frame, then we must save the frame pointer,
21271 IP (which will hold the old stack pointer), LR and the PC. */
21272 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21273 save_reg_mask |=
21274 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21275 | (1 << IP_REGNUM)
21276 | (1 << LR_REGNUM)
21277 | (1 << PC_REGNUM);
21279 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21281 if (arm_current_function_pac_enabled_p ())
21282 save_reg_mask |= 1 << IP_REGNUM;
21284 /* Decide if we need to save the link register.
21285 Interrupt routines have their own banked link register,
21286 so they never need to save it.
21287 Otherwise if we do not use the link register we do not need to save
21288 it. If we are pushing other registers onto the stack however, we
21289 can save an instruction in the epilogue by pushing the link register
21290 now and then popping it back into the PC. This incurs extra memory
21291 accesses though, so we only do it when optimizing for size, and only
21292 if we know that we will not need a fancy return sequence. */
21293 if (df_regs_ever_live_p (LR_REGNUM)
21294 || (save_reg_mask
21295 && optimize_size
21296 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21297 && !crtl->tail_call_emit
21298 && !crtl->calls_eh_return))
21299 save_reg_mask |= 1 << LR_REGNUM;
21301 if (cfun->machine->lr_save_eliminated)
21302 save_reg_mask &= ~ (1 << LR_REGNUM);
21304 if (TARGET_REALLY_IWMMXT
21305 && ((bit_count (save_reg_mask)
21306 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21307 arm_compute_static_chain_stack_bytes())
21308 ) % 2) != 0)
21310 /* The total number of registers that are going to be pushed
21311 onto the stack is odd. We need to ensure that the stack
21312 is 64-bit aligned before we start to save iWMMXt registers,
21313 and also before we start to create locals. (A local variable
21314 might be a double or long long which we will load/store using
21315 an iWMMXt instruction). Therefore we need to push another
21316 ARM register, so that the stack will be 64-bit aligned. We
21317 try to avoid using the arg registers (r0 -r3) as they might be
21318 used to pass values in a tail call. */
21319 for (reg = 4; reg <= 12; reg++)
21320 if ((save_reg_mask & (1 << reg)) == 0)
21321 break;
21323 if (reg <= 12)
21324 save_reg_mask |= (1 << reg);
21325 else
21327 cfun->machine->sibcall_blocked = 1;
21328 save_reg_mask |= (1 << 3);
21332 /* We may need to push an additional register for use initializing the
21333 PIC base register. */
21334 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21335 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21337 reg = thumb_find_work_register (1 << 4);
21338 if (!call_used_or_fixed_reg_p (reg))
21339 save_reg_mask |= (1 << reg);
21342 return save_reg_mask;
21345 /* Compute a bit mask of which core registers need to be
21346 saved on the stack for the current function. */
21347 static unsigned long
21348 thumb1_compute_save_core_reg_mask (void)
21350 unsigned long mask;
21351 unsigned reg;
21353 mask = 0;
21354 for (reg = 0; reg < 12; reg ++)
21355 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21356 mask |= 1 << reg;
21358 /* Handle the frame pointer as a special case. */
21359 if (frame_pointer_needed)
21360 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21362 if (flag_pic
21363 && !TARGET_SINGLE_PIC_BASE
21364 && arm_pic_register != INVALID_REGNUM
21365 && crtl->uses_pic_offset_table)
21366 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21368 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21369 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21370 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21372 /* LR will also be pushed if any lo regs are pushed. */
21373 if (mask & 0xff || thumb_force_lr_save ())
21374 mask |= (1 << LR_REGNUM);
21376 bool call_clobbered_scratch
21377 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21378 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21380 /* Make sure we have a low work register if we need one. We will
21381 need one if we are going to push a high register, but we are not
21382 currently intending to push a low register. However if both the
21383 prologue and epilogue have a spare call-clobbered low register,
21384 then we won't need to find an additional work register. It does
21385 not need to be the same register in the prologue and
21386 epilogue. */
21387 if ((mask & 0xff) == 0
21388 && !call_clobbered_scratch
21389 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21391 /* Use thumb_find_work_register to choose which register
21392 we will use. If the register is live then we will
21393 have to push it. Use LAST_LO_REGNUM as our fallback
21394 choice for the register to select. */
21395 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21396 /* Make sure the register returned by thumb_find_work_register is
21397 not part of the return value. */
21398 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21399 reg = LAST_LO_REGNUM;
21401 if (callee_saved_reg_p (reg))
21402 mask |= 1 << reg;
21405 /* The 504 below is 8 bytes less than 512 because there are two possible
21406 alignment words. We can't tell here if they will be present or not so we
21407 have to play it safe and assume that they are. */
21408 if ((CALLER_INTERWORKING_SLOT_SIZE +
21409 ROUND_UP_WORD (get_frame_size ()) +
21410 crtl->outgoing_args_size) >= 504)
21412 /* This is the same as the code in thumb1_expand_prologue() which
21413 determines which register to use for stack decrement. */
21414 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21415 if (mask & (1 << reg))
21416 break;
21418 if (reg > LAST_LO_REGNUM)
21420 /* Make sure we have a register available for stack decrement. */
21421 mask |= 1 << LAST_LO_REGNUM;
21425 return mask;
21428 /* Return the number of bytes required to save VFP registers. */
21429 static int
21430 arm_get_vfp_saved_size (void)
21432 unsigned int regno;
21433 int count;
21434 int saved;
21436 saved = 0;
21437 /* Space for saved VFP registers. */
21438 if (TARGET_VFP_BASE)
21440 count = 0;
21441 for (regno = FIRST_VFP_REGNUM;
21442 regno < LAST_VFP_REGNUM;
21443 regno += 2)
21445 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21447 if (count > 0)
21449 /* Workaround ARM10 VFPr1 bug. */
21450 if (count == 2 && !arm_arch6)
21451 count++;
21452 saved += count * 8;
21454 count = 0;
21456 else
21457 count++;
21459 if (count > 0)
21461 if (count == 2 && !arm_arch6)
21462 count++;
21463 saved += count * 8;
21466 return saved;
21470 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21471 everything bar the final return instruction. If simple_return is true,
21472 then do not output epilogue, because it has already been emitted in RTL.
21474 Note: do not forget to update length attribute of corresponding insn pattern
21475 when changing assembly output (eg. length attribute of
21476 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21477 register clearing sequences). */
21478 const char *
21479 output_return_instruction (rtx operand, bool really_return, bool reverse,
21480 bool simple_return)
21482 char conditional[10];
21483 char instr[100];
21484 unsigned reg;
21485 unsigned long live_regs_mask;
21486 unsigned long func_type;
21487 arm_stack_offsets *offsets;
21489 func_type = arm_current_func_type ();
21491 if (IS_NAKED (func_type))
21492 return "";
21494 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21496 /* If this function was declared non-returning, and we have
21497 found a tail call, then we have to trust that the called
21498 function won't return. */
21499 if (really_return)
21501 rtx ops[2];
21503 /* Otherwise, trap an attempted return by aborting. */
21504 ops[0] = operand;
21505 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21506 : "abort");
21507 assemble_external_libcall (ops[1]);
21508 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21511 return "";
21514 gcc_assert (!cfun->calls_alloca || really_return);
21516 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21518 cfun->machine->return_used_this_function = 1;
21520 offsets = arm_get_frame_offsets ();
21521 live_regs_mask = offsets->saved_regs_mask;
21523 if (!simple_return && live_regs_mask)
21525 const char * return_reg;
21527 /* If we do not have any special requirements for function exit
21528 (e.g. interworking) then we can load the return address
21529 directly into the PC. Otherwise we must load it into LR. */
21530 if (really_return
21531 && !IS_CMSE_ENTRY (func_type)
21532 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21533 return_reg = reg_names[PC_REGNUM];
21534 else
21535 return_reg = reg_names[LR_REGNUM];
21537 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21539 /* There are three possible reasons for the IP register
21540 being saved. 1) a stack frame was created, in which case
21541 IP contains the old stack pointer, or 2) an ISR routine
21542 corrupted it, or 3) it was saved to align the stack on
21543 iWMMXt. In case 1, restore IP into SP, otherwise just
21544 restore IP. */
21545 if (frame_pointer_needed)
21547 live_regs_mask &= ~ (1 << IP_REGNUM);
21548 live_regs_mask |= (1 << SP_REGNUM);
21550 else
21551 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21554 /* On some ARM architectures it is faster to use LDR rather than
21555 LDM to load a single register. On other architectures, the
21556 cost is the same. In 26 bit mode, or for exception handlers,
21557 we have to use LDM to load the PC so that the CPSR is also
21558 restored. */
21559 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21560 if (live_regs_mask == (1U << reg))
21561 break;
21563 if (reg <= LAST_ARM_REGNUM
21564 && (reg != LR_REGNUM
21565 || ! really_return
21566 || ! IS_INTERRUPT (func_type)))
21568 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21569 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21571 else
21573 char *p;
21574 int first = 1;
21576 /* Generate the load multiple instruction to restore the
21577 registers. Note we can get here, even if
21578 frame_pointer_needed is true, but only if sp already
21579 points to the base of the saved core registers. */
21580 if (live_regs_mask & (1 << SP_REGNUM))
21582 unsigned HOST_WIDE_INT stack_adjust;
21584 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21585 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21587 if (stack_adjust && arm_arch5t && TARGET_ARM)
21588 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21589 else
21591 /* If we can't use ldmib (SA110 bug),
21592 then try to pop r3 instead. */
21593 if (stack_adjust)
21594 live_regs_mask |= 1 << 3;
21596 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21599 /* For interrupt returns we have to use an LDM rather than
21600 a POP so that we can use the exception return variant. */
21601 else if (IS_INTERRUPT (func_type))
21602 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21603 else
21604 sprintf (instr, "pop%s\t{", conditional);
21606 p = instr + strlen (instr);
21608 for (reg = 0; reg <= SP_REGNUM; reg++)
21609 if (live_regs_mask & (1 << reg))
21611 int l = strlen (reg_names[reg]);
21613 if (first)
21614 first = 0;
21615 else
21617 memcpy (p, ", ", 2);
21618 p += 2;
21621 memcpy (p, "%|", 2);
21622 memcpy (p + 2, reg_names[reg], l);
21623 p += l + 2;
21626 if (live_regs_mask & (1 << LR_REGNUM))
21628 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21629 /* If returning from an interrupt, restore the CPSR. */
21630 if (IS_INTERRUPT (func_type))
21631 strcat (p, "^");
21633 else
21634 strcpy (p, "}");
21637 output_asm_insn (instr, & operand);
21639 /* See if we need to generate an extra instruction to
21640 perform the actual function return. */
21641 if (really_return
21642 && func_type != ARM_FT_INTERWORKED
21643 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21645 /* The return has already been handled
21646 by loading the LR into the PC. */
21647 return "";
21651 if (really_return)
21653 switch ((int) ARM_FUNC_TYPE (func_type))
21655 case ARM_FT_ISR:
21656 case ARM_FT_FIQ:
21657 /* ??? This is wrong for unified assembly syntax. */
21658 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21659 break;
21661 case ARM_FT_INTERWORKED:
21662 gcc_assert (arm_arch5t || arm_arch4t);
21663 sprintf (instr, "bx%s\t%%|lr", conditional);
21664 break;
21666 case ARM_FT_EXCEPTION:
21667 /* ??? This is wrong for unified assembly syntax. */
21668 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21669 break;
21671 default:
21672 if (IS_CMSE_ENTRY (func_type))
21674 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21675 emitted by cmse_nonsecure_entry_clear_before_return () and the
21676 VSTR/VLDR instructions in the prologue and epilogue. */
21677 if (!TARGET_HAVE_FPCXT_CMSE)
21679 /* Check if we have to clear the 'GE bits' which is only used if
21680 parallel add and subtraction instructions are available. */
21681 if (TARGET_INT_SIMD)
21682 snprintf (instr, sizeof (instr),
21683 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21684 else
21685 snprintf (instr, sizeof (instr),
21686 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21688 output_asm_insn (instr, & operand);
21689 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21690 care of it. */
21691 if (TARGET_HARD_FLOAT)
21693 /* Clear the cumulative exception-status bits (0-4,7) and
21694 the condition code bits (28-31) of the FPSCR. We need
21695 to remember to clear the first scratch register used
21696 (IP) and save and restore the second (r4).
21698 Important note: the length of the
21699 thumb2_cmse_entry_return insn pattern must account for
21700 the size of the below instructions. */
21701 output_asm_insn ("push\t{%|r4}", & operand);
21702 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21703 output_asm_insn ("movw\t%|r4, #65376", & operand);
21704 output_asm_insn ("movt\t%|r4, #4095", & operand);
21705 output_asm_insn ("and\t%|ip, %|r4", & operand);
21706 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21707 output_asm_insn ("pop\t{%|r4}", & operand);
21708 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21711 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21713 /* Use bx if it's available. */
21714 else if (arm_arch5t || arm_arch4t)
21715 sprintf (instr, "bx%s\t%%|lr", conditional);
21716 else
21717 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21718 break;
21721 output_asm_insn (instr, & operand);
21724 return "";
21727 /* Output in FILE asm statements needed to declare the NAME of the function
21728 defined by its DECL node. */
21730 void
21731 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21733 size_t cmse_name_len;
21734 char *cmse_name = 0;
21735 char cmse_prefix[] = "__acle_se_";
21737 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21738 extra function label for each function with the 'cmse_nonsecure_entry'
21739 attribute. This extra function label should be prepended with
21740 '__acle_se_', telling the linker that it needs to create secure gateway
21741 veneers for this function. */
21742 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21743 DECL_ATTRIBUTES (decl)))
21745 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21746 cmse_name = XALLOCAVEC (char, cmse_name_len);
21747 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21748 targetm.asm_out.globalize_label (file, cmse_name);
21750 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21751 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21754 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21755 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21756 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21757 ASM_OUTPUT_LABEL (file, name);
21759 if (cmse_name)
21760 ASM_OUTPUT_LABEL (file, cmse_name);
21762 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21765 /* Write the function name into the code section, directly preceding
21766 the function prologue.
21768 Code will be output similar to this:
21770 .ascii "arm_poke_function_name", 0
21771 .align
21773 .word 0xff000000 + (t1 - t0)
21774 arm_poke_function_name
21775 mov ip, sp
21776 stmfd sp!, {fp, ip, lr, pc}
21777 sub fp, ip, #4
21779 When performing a stack backtrace, code can inspect the value
21780 of 'pc' stored at 'fp' + 0. If the trace function then looks
21781 at location pc - 12 and the top 8 bits are set, then we know
21782 that there is a function name embedded immediately preceding this
21783 location and has length ((pc[-3]) & 0xff000000).
21785 We assume that pc is declared as a pointer to an unsigned long.
21787 It is of no benefit to output the function name if we are assembling
21788 a leaf function. These function types will not contain a stack
21789 backtrace structure, therefore it is not possible to determine the
21790 function name. */
21791 void
21792 arm_poke_function_name (FILE *stream, const char *name)
21794 unsigned long alignlength;
21795 unsigned long length;
21796 rtx x;
21798 length = strlen (name) + 1;
21799 alignlength = ROUND_UP_WORD (length);
21801 ASM_OUTPUT_ASCII (stream, name, length);
21802 ASM_OUTPUT_ALIGN (stream, 2);
21803 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21804 assemble_aligned_integer (UNITS_PER_WORD, x);
21807 /* Place some comments into the assembler stream
21808 describing the current function. */
21809 static void
21810 arm_output_function_prologue (FILE *f)
21812 unsigned long func_type;
21814 /* Sanity check. */
21815 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21817 func_type = arm_current_func_type ();
21819 switch ((int) ARM_FUNC_TYPE (func_type))
21821 default:
21822 case ARM_FT_NORMAL:
21823 break;
21824 case ARM_FT_INTERWORKED:
21825 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21826 break;
21827 case ARM_FT_ISR:
21828 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21829 break;
21830 case ARM_FT_FIQ:
21831 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21832 break;
21833 case ARM_FT_EXCEPTION:
21834 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21835 break;
21838 if (IS_NAKED (func_type))
21839 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21841 if (IS_VOLATILE (func_type))
21842 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21844 if (IS_NESTED (func_type))
21845 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21846 if (IS_STACKALIGN (func_type))
21847 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21848 if (IS_CMSE_ENTRY (func_type))
21849 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21851 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21852 (HOST_WIDE_INT) crtl->args.size,
21853 crtl->args.pretend_args_size,
21854 (HOST_WIDE_INT) get_frame_size ());
21856 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21857 frame_pointer_needed,
21858 cfun->machine->uses_anonymous_args);
21860 if (cfun->machine->lr_save_eliminated)
21861 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21863 if (crtl->calls_eh_return)
21864 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21868 static void
21869 arm_output_function_epilogue (FILE *)
21871 arm_stack_offsets *offsets;
21873 if (TARGET_THUMB1)
21875 int regno;
21877 /* Emit any call-via-reg trampolines that are needed for v4t support
21878 of call_reg and call_value_reg type insns. */
21879 for (regno = 0; regno < LR_REGNUM; regno++)
21881 rtx label = cfun->machine->call_via[regno];
21883 if (label != NULL)
21885 switch_to_section (function_section (current_function_decl));
21886 targetm.asm_out.internal_label (asm_out_file, "L",
21887 CODE_LABEL_NUMBER (label));
21888 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21892 /* ??? Probably not safe to set this here, since it assumes that a
21893 function will be emitted as assembly immediately after we generate
21894 RTL for it. This does not happen for inline functions. */
21895 cfun->machine->return_used_this_function = 0;
21897 else /* TARGET_32BIT */
21899 /* We need to take into account any stack-frame rounding. */
21900 offsets = arm_get_frame_offsets ();
21902 gcc_assert (!use_return_insn (FALSE, NULL)
21903 || (cfun->machine->return_used_this_function != 0)
21904 || offsets->saved_regs == offsets->outgoing_args
21905 || frame_pointer_needed);
21909 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21910 STR and STRD. If an even number of registers are being pushed, one
21911 or more STRD patterns are created for each register pair. If an
21912 odd number of registers are pushed, emit an initial STR followed by
21913 as many STRD instructions as are needed. This works best when the
21914 stack is initially 64-bit aligned (the normal case), since it
21915 ensures that each STRD is also 64-bit aligned. */
21916 static void
21917 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21919 int num_regs = 0;
21920 int i;
21921 int regno;
21922 rtx par = NULL_RTX;
21923 rtx dwarf = NULL_RTX;
21924 rtx tmp;
21925 bool first = true;
21927 num_regs = bit_count (saved_regs_mask);
21929 /* Must be at least one register to save, and can't save SP or PC. */
21930 gcc_assert (num_regs > 0 && num_regs <= 14);
21931 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21932 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21934 /* Create sequence for DWARF info. All the frame-related data for
21935 debugging is held in this wrapper. */
21936 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21938 /* Describe the stack adjustment. */
21939 tmp = gen_rtx_SET (stack_pointer_rtx,
21940 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21941 RTX_FRAME_RELATED_P (tmp) = 1;
21942 XVECEXP (dwarf, 0, 0) = tmp;
21944 /* Find the first register. */
21945 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21948 i = 0;
21950 /* If there's an odd number of registers to push. Start off by
21951 pushing a single register. This ensures that subsequent strd
21952 operations are dword aligned (assuming that SP was originally
21953 64-bit aligned). */
21954 if ((num_regs & 1) != 0)
21956 rtx reg, mem, insn;
21958 reg = gen_rtx_REG (SImode, regno);
21959 if (num_regs == 1)
21960 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21961 stack_pointer_rtx));
21962 else
21963 mem = gen_frame_mem (Pmode,
21964 gen_rtx_PRE_MODIFY
21965 (Pmode, stack_pointer_rtx,
21966 plus_constant (Pmode, stack_pointer_rtx,
21967 -4 * num_regs)));
21969 tmp = gen_rtx_SET (mem, reg);
21970 RTX_FRAME_RELATED_P (tmp) = 1;
21971 insn = emit_insn (tmp);
21972 RTX_FRAME_RELATED_P (insn) = 1;
21973 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21974 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21975 RTX_FRAME_RELATED_P (tmp) = 1;
21976 i++;
21977 regno++;
21978 XVECEXP (dwarf, 0, i) = tmp;
21979 first = false;
21982 while (i < num_regs)
21983 if (saved_regs_mask & (1 << regno))
21985 rtx reg1, reg2, mem1, mem2;
21986 rtx tmp0, tmp1, tmp2;
21987 int regno2;
21989 /* Find the register to pair with this one. */
21990 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21991 regno2++)
21994 reg1 = gen_rtx_REG (SImode, regno);
21995 reg2 = gen_rtx_REG (SImode, regno2);
21997 if (first)
21999 rtx insn;
22001 first = false;
22002 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22003 stack_pointer_rtx,
22004 -4 * num_regs));
22005 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22006 stack_pointer_rtx,
22007 -4 * (num_regs - 1)));
22008 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22009 plus_constant (Pmode, stack_pointer_rtx,
22010 -4 * (num_regs)));
22011 tmp1 = gen_rtx_SET (mem1, reg1);
22012 tmp2 = gen_rtx_SET (mem2, reg2);
22013 RTX_FRAME_RELATED_P (tmp0) = 1;
22014 RTX_FRAME_RELATED_P (tmp1) = 1;
22015 RTX_FRAME_RELATED_P (tmp2) = 1;
22016 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22017 XVECEXP (par, 0, 0) = tmp0;
22018 XVECEXP (par, 0, 1) = tmp1;
22019 XVECEXP (par, 0, 2) = tmp2;
22020 insn = emit_insn (par);
22021 RTX_FRAME_RELATED_P (insn) = 1;
22022 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22024 else
22026 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22027 stack_pointer_rtx,
22028 4 * i));
22029 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22030 stack_pointer_rtx,
22031 4 * (i + 1)));
22032 tmp1 = gen_rtx_SET (mem1, reg1);
22033 tmp2 = gen_rtx_SET (mem2, reg2);
22034 RTX_FRAME_RELATED_P (tmp1) = 1;
22035 RTX_FRAME_RELATED_P (tmp2) = 1;
22036 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22037 XVECEXP (par, 0, 0) = tmp1;
22038 XVECEXP (par, 0, 1) = tmp2;
22039 emit_insn (par);
22042 /* Create unwind information. This is an approximation. */
22043 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22044 plus_constant (Pmode,
22045 stack_pointer_rtx,
22046 4 * i)),
22047 reg1);
22048 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22049 plus_constant (Pmode,
22050 stack_pointer_rtx,
22051 4 * (i + 1))),
22052 reg2);
22054 RTX_FRAME_RELATED_P (tmp1) = 1;
22055 RTX_FRAME_RELATED_P (tmp2) = 1;
22056 XVECEXP (dwarf, 0, i + 1) = tmp1;
22057 XVECEXP (dwarf, 0, i + 2) = tmp2;
22058 i += 2;
22059 regno = regno2 + 1;
22061 else
22062 regno++;
22064 return;
22067 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22068 whenever possible, otherwise it emits single-word stores. The first store
22069 also allocates stack space for all saved registers, using writeback with
22070 post-addressing mode. All other stores use offset addressing. If no STRD
22071 can be emitted, this function emits a sequence of single-word stores,
22072 and not an STM as before, because single-word stores provide more freedom
22073 scheduling and can be turned into an STM by peephole optimizations. */
22074 static void
22075 arm_emit_strd_push (unsigned long saved_regs_mask)
22077 int num_regs = 0;
22078 int i, j, dwarf_index = 0;
22079 int offset = 0;
22080 rtx dwarf = NULL_RTX;
22081 rtx insn = NULL_RTX;
22082 rtx tmp, mem;
22084 /* TODO: A more efficient code can be emitted by changing the
22085 layout, e.g., first push all pairs that can use STRD to keep the
22086 stack aligned, and then push all other registers. */
22087 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22088 if (saved_regs_mask & (1 << i))
22089 num_regs++;
22091 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22092 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22093 gcc_assert (num_regs > 0);
22095 /* Create sequence for DWARF info. */
22096 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22098 /* For dwarf info, we generate explicit stack update. */
22099 tmp = gen_rtx_SET (stack_pointer_rtx,
22100 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22101 RTX_FRAME_RELATED_P (tmp) = 1;
22102 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22104 /* Save registers. */
22105 offset = - 4 * num_regs;
22106 j = 0;
22107 while (j <= LAST_ARM_REGNUM)
22108 if (saved_regs_mask & (1 << j))
22110 if ((j % 2 == 0)
22111 && (saved_regs_mask & (1 << (j + 1))))
22113 /* Current register and previous register form register pair for
22114 which STRD can be generated. */
22115 if (offset < 0)
22117 /* Allocate stack space for all saved registers. */
22118 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22119 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22120 mem = gen_frame_mem (DImode, tmp);
22121 offset = 0;
22123 else if (offset > 0)
22124 mem = gen_frame_mem (DImode,
22125 plus_constant (Pmode,
22126 stack_pointer_rtx,
22127 offset));
22128 else
22129 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22131 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22132 RTX_FRAME_RELATED_P (tmp) = 1;
22133 tmp = emit_insn (tmp);
22135 /* Record the first store insn. */
22136 if (dwarf_index == 1)
22137 insn = tmp;
22139 /* Generate dwarf info. */
22140 mem = gen_frame_mem (SImode,
22141 plus_constant (Pmode,
22142 stack_pointer_rtx,
22143 offset));
22144 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22145 RTX_FRAME_RELATED_P (tmp) = 1;
22146 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22148 mem = gen_frame_mem (SImode,
22149 plus_constant (Pmode,
22150 stack_pointer_rtx,
22151 offset + 4));
22152 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22153 RTX_FRAME_RELATED_P (tmp) = 1;
22154 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22156 offset += 8;
22157 j += 2;
22159 else
22161 /* Emit a single word store. */
22162 if (offset < 0)
22164 /* Allocate stack space for all saved registers. */
22165 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22166 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22167 mem = gen_frame_mem (SImode, tmp);
22168 offset = 0;
22170 else if (offset > 0)
22171 mem = gen_frame_mem (SImode,
22172 plus_constant (Pmode,
22173 stack_pointer_rtx,
22174 offset));
22175 else
22176 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22178 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22179 RTX_FRAME_RELATED_P (tmp) = 1;
22180 tmp = emit_insn (tmp);
22182 /* Record the first store insn. */
22183 if (dwarf_index == 1)
22184 insn = tmp;
22186 /* Generate dwarf info. */
22187 mem = gen_frame_mem (SImode,
22188 plus_constant(Pmode,
22189 stack_pointer_rtx,
22190 offset));
22191 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22192 RTX_FRAME_RELATED_P (tmp) = 1;
22193 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22195 offset += 4;
22196 j += 1;
22199 else
22200 j++;
22202 /* Attach dwarf info to the first insn we generate. */
22203 gcc_assert (insn != NULL_RTX);
22204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22205 RTX_FRAME_RELATED_P (insn) = 1;
22208 /* Generate and emit an insn that we will recognize as a push_multi.
22209 Unfortunately, since this insn does not reflect very well the actual
22210 semantics of the operation, we need to annotate the insn for the benefit
22211 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22212 MASK for registers that should be annotated for DWARF2 frame unwind
22213 information. */
22214 static rtx
22215 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22217 int num_regs = 0;
22218 int num_dwarf_regs = 0;
22219 int i, j;
22220 rtx par;
22221 rtx dwarf;
22222 int dwarf_par_index;
22223 rtx tmp, reg;
22225 /* We don't record the PC in the dwarf frame information. */
22226 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22228 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22230 if (mask & (1 << i))
22231 num_regs++;
22232 if (dwarf_regs_mask & (1 << i))
22233 num_dwarf_regs++;
22236 gcc_assert (num_regs && num_regs <= 16);
22237 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22239 /* For the body of the insn we are going to generate an UNSPEC in
22240 parallel with several USEs. This allows the insn to be recognized
22241 by the push_multi pattern in the arm.md file.
22243 The body of the insn looks something like this:
22245 (parallel [
22246 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22247 (const_int:SI <num>)))
22248 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22249 (use (reg:SI XX))
22250 (use (reg:SI YY))
22254 For the frame note however, we try to be more explicit and actually
22255 show each register being stored into the stack frame, plus a (single)
22256 decrement of the stack pointer. We do it this way in order to be
22257 friendly to the stack unwinding code, which only wants to see a single
22258 stack decrement per instruction. The RTL we generate for the note looks
22259 something like this:
22261 (sequence [
22262 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22263 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22264 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22265 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22269 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22270 instead we'd have a parallel expression detailing all
22271 the stores to the various memory addresses so that debug
22272 information is more up-to-date. Remember however while writing
22273 this to take care of the constraints with the push instruction.
22275 Note also that this has to be taken care of for the VFP registers.
22277 For more see PR43399. */
22279 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22280 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22281 dwarf_par_index = 1;
22283 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22285 if (mask & (1 << i))
22287 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22288 following example reg-reg copy of SP to IP register is handled
22289 through .cfi_def_cfa_register directive and the .cfi_offset
22290 directive for IP register is skipped by dwarf code emitter.
22291 Example:
22292 mov ip, sp
22293 .cfi_def_cfa_register 12
22294 push {fp, ip, lr, pc}
22295 .cfi_offset 11, -16
22296 .cfi_offset 13, -12
22297 .cfi_offset 14, -8
22299 Where as Arm-specific .save directive handling is different to that
22300 of dwarf code emitter and it doesn't consider reg-reg copies while
22301 updating the register list. When PACBTI is enabled we manually
22302 updated the .save directive register list to use "ra_auth_code"
22303 (pseduo register 143) instead of IP register as shown in following
22304 pseduo code.
22305 Example:
22306 pacbti ip, lr, sp
22307 .cfi_register 143, 12
22308 push {r3, r7, ip, lr}
22309 .save {r3, r7, ra_auth_code, lr}
22311 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22312 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22313 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22315 XVECEXP (par, 0, 0)
22316 = gen_rtx_SET (gen_frame_mem
22317 (BLKmode,
22318 gen_rtx_PRE_MODIFY (Pmode,
22319 stack_pointer_rtx,
22320 plus_constant
22321 (Pmode, stack_pointer_rtx,
22322 -4 * num_regs))
22324 gen_rtx_UNSPEC (BLKmode,
22325 gen_rtvec (1, reg),
22326 UNSPEC_PUSH_MULT));
22328 if (dwarf_regs_mask & (1 << i))
22330 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22331 dwarf_reg);
22332 RTX_FRAME_RELATED_P (tmp) = 1;
22333 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22336 break;
22340 for (j = 1, i++; j < num_regs; i++)
22342 if (mask & (1 << i))
22344 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22345 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22346 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22348 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22350 if (dwarf_regs_mask & (1 << i))
22353 = gen_rtx_SET (gen_frame_mem
22354 (SImode,
22355 plus_constant (Pmode, stack_pointer_rtx,
22356 4 * j)),
22357 dwarf_reg);
22358 RTX_FRAME_RELATED_P (tmp) = 1;
22359 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22362 j++;
22366 par = emit_insn (par);
22368 tmp = gen_rtx_SET (stack_pointer_rtx,
22369 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22370 RTX_FRAME_RELATED_P (tmp) = 1;
22371 XVECEXP (dwarf, 0, 0) = tmp;
22373 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22375 return par;
22378 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22379 SIZE is the offset to be adjusted.
22380 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22381 static void
22382 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22384 rtx dwarf;
22386 RTX_FRAME_RELATED_P (insn) = 1;
22387 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22388 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22391 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22392 SAVED_REGS_MASK shows which registers need to be restored.
22394 Unfortunately, since this insn does not reflect very well the actual
22395 semantics of the operation, we need to annotate the insn for the benefit
22396 of DWARF2 frame unwind information. */
22397 static void
22398 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22400 int num_regs = 0;
22401 int i, j;
22402 rtx par;
22403 rtx dwarf = NULL_RTX;
22404 rtx tmp, reg;
22405 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22406 int offset_adj;
22407 int emit_update;
22409 offset_adj = return_in_pc ? 1 : 0;
22410 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22411 if (saved_regs_mask & (1 << i))
22412 num_regs++;
22414 gcc_assert (num_regs && num_regs <= 16);
22416 /* If SP is in reglist, then we don't emit SP update insn. */
22417 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22419 /* The parallel needs to hold num_regs SETs
22420 and one SET for the stack update. */
22421 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22423 if (return_in_pc)
22424 XVECEXP (par, 0, 0) = ret_rtx;
22426 if (emit_update)
22428 /* Increment the stack pointer, based on there being
22429 num_regs 4-byte registers to restore. */
22430 tmp = gen_rtx_SET (stack_pointer_rtx,
22431 plus_constant (Pmode,
22432 stack_pointer_rtx,
22433 4 * num_regs));
22434 RTX_FRAME_RELATED_P (tmp) = 1;
22435 XVECEXP (par, 0, offset_adj) = tmp;
22438 /* Now restore every reg, which may include PC. */
22439 for (j = 0, i = 0; j < num_regs; i++)
22440 if (saved_regs_mask & (1 << i))
22442 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22443 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22444 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22445 if ((num_regs == 1) && emit_update && !return_in_pc)
22447 /* Emit single load with writeback. */
22448 tmp = gen_frame_mem (SImode,
22449 gen_rtx_POST_INC (Pmode,
22450 stack_pointer_rtx));
22451 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22452 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22453 dwarf);
22454 return;
22457 tmp = gen_rtx_SET (reg,
22458 gen_frame_mem
22459 (SImode,
22460 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22461 RTX_FRAME_RELATED_P (tmp) = 1;
22462 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22464 /* We need to maintain a sequence for DWARF info too. As dwarf info
22465 should not have PC, skip PC. */
22466 if (i != PC_REGNUM)
22467 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22469 j++;
22472 if (return_in_pc)
22473 par = emit_jump_insn (par);
22474 else
22475 par = emit_insn (par);
22477 REG_NOTES (par) = dwarf;
22478 if (!return_in_pc)
22479 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22480 stack_pointer_rtx, stack_pointer_rtx);
22483 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22484 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22486 Unfortunately, since this insn does not reflect very well the actual
22487 semantics of the operation, we need to annotate the insn for the benefit
22488 of DWARF2 frame unwind information. */
22489 static void
22490 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22492 int i, j;
22493 rtx par;
22494 rtx dwarf = NULL_RTX;
22495 rtx tmp, reg;
22497 gcc_assert (num_regs && num_regs <= 32);
22499 /* Workaround ARM10 VFPr1 bug. */
22500 if (num_regs == 2 && !arm_arch6)
22502 if (first_reg == 15)
22503 first_reg--;
22505 num_regs++;
22508 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22509 there could be up to 32 D-registers to restore.
22510 If there are more than 16 D-registers, make two recursive calls,
22511 each of which emits one pop_multi instruction. */
22512 if (num_regs > 16)
22514 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22515 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22516 return;
22519 /* The parallel needs to hold num_regs SETs
22520 and one SET for the stack update. */
22521 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22523 /* Increment the stack pointer, based on there being
22524 num_regs 8-byte registers to restore. */
22525 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22526 RTX_FRAME_RELATED_P (tmp) = 1;
22527 XVECEXP (par, 0, 0) = tmp;
22529 /* Now show every reg that will be restored, using a SET for each. */
22530 for (j = 0, i=first_reg; j < num_regs; i += 2)
22532 reg = gen_rtx_REG (DFmode, i);
22534 tmp = gen_rtx_SET (reg,
22535 gen_frame_mem
22536 (DFmode,
22537 plus_constant (Pmode, base_reg, 8 * j)));
22538 RTX_FRAME_RELATED_P (tmp) = 1;
22539 XVECEXP (par, 0, j + 1) = tmp;
22541 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22543 j++;
22546 par = emit_insn (par);
22547 REG_NOTES (par) = dwarf;
22549 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22550 if (REGNO (base_reg) == IP_REGNUM)
22552 RTX_FRAME_RELATED_P (par) = 1;
22553 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22555 else
22556 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22557 base_reg, base_reg);
22560 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22561 number of registers are being popped, multiple LDRD patterns are created for
22562 all register pairs. If odd number of registers are popped, last register is
22563 loaded by using LDR pattern. */
22564 static void
22565 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22567 int num_regs = 0;
22568 int i, j;
22569 rtx par = NULL_RTX;
22570 rtx dwarf = NULL_RTX;
22571 rtx tmp, reg, tmp1;
22572 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22574 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22575 if (saved_regs_mask & (1 << i))
22576 num_regs++;
22578 gcc_assert (num_regs && num_regs <= 16);
22580 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22581 to be popped. So, if num_regs is even, now it will become odd,
22582 and we can generate pop with PC. If num_regs is odd, it will be
22583 even now, and ldr with return can be generated for PC. */
22584 if (return_in_pc)
22585 num_regs--;
22587 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22589 /* Var j iterates over all the registers to gather all the registers in
22590 saved_regs_mask. Var i gives index of saved registers in stack frame.
22591 A PARALLEL RTX of register-pair is created here, so that pattern for
22592 LDRD can be matched. As PC is always last register to be popped, and
22593 we have already decremented num_regs if PC, we don't have to worry
22594 about PC in this loop. */
22595 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22596 if (saved_regs_mask & (1 << j))
22598 /* Create RTX for memory load. */
22599 reg = gen_rtx_REG (SImode, j);
22600 tmp = gen_rtx_SET (reg,
22601 gen_frame_mem (SImode,
22602 plus_constant (Pmode,
22603 stack_pointer_rtx, 4 * i)));
22604 RTX_FRAME_RELATED_P (tmp) = 1;
22606 if (i % 2 == 0)
22608 /* When saved-register index (i) is even, the RTX to be emitted is
22609 yet to be created. Hence create it first. The LDRD pattern we
22610 are generating is :
22611 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22612 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22613 where target registers need not be consecutive. */
22614 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22615 dwarf = NULL_RTX;
22618 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22619 added as 0th element and if i is odd, reg_i is added as 1st element
22620 of LDRD pattern shown above. */
22621 XVECEXP (par, 0, (i % 2)) = tmp;
22622 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22624 if ((i % 2) == 1)
22626 /* When saved-register index (i) is odd, RTXs for both the registers
22627 to be loaded are generated in above given LDRD pattern, and the
22628 pattern can be emitted now. */
22629 par = emit_insn (par);
22630 REG_NOTES (par) = dwarf;
22631 RTX_FRAME_RELATED_P (par) = 1;
22634 i++;
22637 /* If the number of registers pushed is odd AND return_in_pc is false OR
22638 number of registers are even AND return_in_pc is true, last register is
22639 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22640 then LDR with post increment. */
22642 /* Increment the stack pointer, based on there being
22643 num_regs 4-byte registers to restore. */
22644 tmp = gen_rtx_SET (stack_pointer_rtx,
22645 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22646 RTX_FRAME_RELATED_P (tmp) = 1;
22647 tmp = emit_insn (tmp);
22648 if (!return_in_pc)
22650 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22651 stack_pointer_rtx, stack_pointer_rtx);
22654 dwarf = NULL_RTX;
22656 if (((num_regs % 2) == 1 && !return_in_pc)
22657 || ((num_regs % 2) == 0 && return_in_pc))
22659 /* Scan for the single register to be popped. Skip until the saved
22660 register is found. */
22661 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22663 /* Gen LDR with post increment here. */
22664 tmp1 = gen_rtx_MEM (SImode,
22665 gen_rtx_POST_INC (SImode,
22666 stack_pointer_rtx));
22667 set_mem_alias_set (tmp1, get_frame_alias_set ());
22669 reg = gen_rtx_REG (SImode, j);
22670 tmp = gen_rtx_SET (reg, tmp1);
22671 RTX_FRAME_RELATED_P (tmp) = 1;
22672 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22674 if (return_in_pc)
22676 /* If return_in_pc, j must be PC_REGNUM. */
22677 gcc_assert (j == PC_REGNUM);
22678 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22679 XVECEXP (par, 0, 0) = ret_rtx;
22680 XVECEXP (par, 0, 1) = tmp;
22681 par = emit_jump_insn (par);
22683 else
22685 par = emit_insn (tmp);
22686 REG_NOTES (par) = dwarf;
22687 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22688 stack_pointer_rtx, stack_pointer_rtx);
22692 else if ((num_regs % 2) == 1 && return_in_pc)
22694 /* There are 2 registers to be popped. So, generate the pattern
22695 pop_multiple_with_stack_update_and_return to pop in PC. */
22696 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22699 return;
22702 /* LDRD in ARM mode needs consecutive registers as operands. This function
22703 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22704 offset addressing and then generates one separate stack udpate. This provides
22705 more scheduling freedom, compared to writeback on every load. However,
22706 if the function returns using load into PC directly
22707 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22708 before the last load. TODO: Add a peephole optimization to recognize
22709 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22710 peephole optimization to merge the load at stack-offset zero
22711 with the stack update instruction using load with writeback
22712 in post-index addressing mode. */
22713 static void
22714 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22716 int j = 0;
22717 int offset = 0;
22718 rtx par = NULL_RTX;
22719 rtx dwarf = NULL_RTX;
22720 rtx tmp, mem;
22722 /* Restore saved registers. */
22723 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22724 j = 0;
22725 while (j <= LAST_ARM_REGNUM)
22726 if (saved_regs_mask & (1 << j))
22728 if ((j % 2) == 0
22729 && (saved_regs_mask & (1 << (j + 1)))
22730 && (j + 1) != PC_REGNUM)
22732 /* Current register and next register form register pair for which
22733 LDRD can be generated. PC is always the last register popped, and
22734 we handle it separately. */
22735 if (offset > 0)
22736 mem = gen_frame_mem (DImode,
22737 plus_constant (Pmode,
22738 stack_pointer_rtx,
22739 offset));
22740 else
22741 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22743 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22744 tmp = emit_insn (tmp);
22745 RTX_FRAME_RELATED_P (tmp) = 1;
22747 /* Generate dwarf info. */
22749 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22750 gen_rtx_REG (SImode, j),
22751 NULL_RTX);
22752 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22753 gen_rtx_REG (SImode, j + 1),
22754 dwarf);
22756 REG_NOTES (tmp) = dwarf;
22758 offset += 8;
22759 j += 2;
22761 else if (j != PC_REGNUM)
22763 /* Emit a single word load. */
22764 if (offset > 0)
22765 mem = gen_frame_mem (SImode,
22766 plus_constant (Pmode,
22767 stack_pointer_rtx,
22768 offset));
22769 else
22770 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22772 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22773 tmp = emit_insn (tmp);
22774 RTX_FRAME_RELATED_P (tmp) = 1;
22776 /* Generate dwarf info. */
22777 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22778 gen_rtx_REG (SImode, j),
22779 NULL_RTX);
22781 offset += 4;
22782 j += 1;
22784 else /* j == PC_REGNUM */
22785 j++;
22787 else
22788 j++;
22790 /* Update the stack. */
22791 if (offset > 0)
22793 tmp = gen_rtx_SET (stack_pointer_rtx,
22794 plus_constant (Pmode,
22795 stack_pointer_rtx,
22796 offset));
22797 tmp = emit_insn (tmp);
22798 arm_add_cfa_adjust_cfa_note (tmp, offset,
22799 stack_pointer_rtx, stack_pointer_rtx);
22800 offset = 0;
22803 if (saved_regs_mask & (1 << PC_REGNUM))
22805 /* Only PC is to be popped. */
22806 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22807 XVECEXP (par, 0, 0) = ret_rtx;
22808 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22809 gen_frame_mem (SImode,
22810 gen_rtx_POST_INC (SImode,
22811 stack_pointer_rtx)));
22812 RTX_FRAME_RELATED_P (tmp) = 1;
22813 XVECEXP (par, 0, 1) = tmp;
22814 par = emit_jump_insn (par);
22816 /* Generate dwarf info. */
22817 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22818 gen_rtx_REG (SImode, PC_REGNUM),
22819 NULL_RTX);
22820 REG_NOTES (par) = dwarf;
22821 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22822 stack_pointer_rtx, stack_pointer_rtx);
22826 /* Calculate the size of the return value that is passed in registers. */
22827 static unsigned
22828 arm_size_return_regs (void)
22830 machine_mode mode;
22832 if (crtl->return_rtx != 0)
22833 mode = GET_MODE (crtl->return_rtx);
22834 else
22835 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22837 return GET_MODE_SIZE (mode);
22840 /* Return true if the current function needs to save/restore LR. */
22841 static bool
22842 thumb_force_lr_save (void)
22844 return !cfun->machine->lr_save_eliminated
22845 && (!crtl->is_leaf
22846 || thumb_far_jump_used_p ()
22847 || df_regs_ever_live_p (LR_REGNUM));
22850 /* We do not know if r3 will be available because
22851 we do have an indirect tailcall happening in this
22852 particular case. */
22853 static bool
22854 is_indirect_tailcall_p (rtx call)
22856 rtx pat = PATTERN (call);
22858 /* Indirect tail call. */
22859 pat = XVECEXP (pat, 0, 0);
22860 if (GET_CODE (pat) == SET)
22861 pat = SET_SRC (pat);
22863 pat = XEXP (XEXP (pat, 0), 0);
22864 return REG_P (pat);
22867 /* Return true if r3 is used by any of the tail call insns in the
22868 current function. */
22869 static bool
22870 any_sibcall_could_use_r3 (void)
22872 edge_iterator ei;
22873 edge e;
22875 if (!crtl->tail_call_emit)
22876 return false;
22877 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22878 if (e->flags & EDGE_SIBCALL)
22880 rtx_insn *call = BB_END (e->src);
22881 if (!CALL_P (call))
22882 call = prev_nonnote_nondebug_insn (call);
22883 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22884 if (find_regno_fusage (call, USE, 3)
22885 || is_indirect_tailcall_p (call))
22886 return true;
22888 return false;
22892 /* Compute the distance from register FROM to register TO.
22893 These can be the arg pointer (26), the soft frame pointer (25),
22894 the stack pointer (13) or the hard frame pointer (11).
22895 In thumb mode r7 is used as the soft frame pointer, if needed.
22896 Typical stack layout looks like this:
22898 old stack pointer -> | |
22899 ----
22900 | | \
22901 | | saved arguments for
22902 | | vararg functions
22903 | | /
22905 hard FP & arg pointer -> | | \
22906 | | stack
22907 | | frame
22908 | | /
22910 | | \
22911 | | call saved
22912 | | registers
22913 soft frame pointer -> | | /
22915 | | \
22916 | | local
22917 | | variables
22918 locals base pointer -> | | /
22920 | | \
22921 | | outgoing
22922 | | arguments
22923 current stack pointer -> | | /
22926 For a given function some or all of these stack components
22927 may not be needed, giving rise to the possibility of
22928 eliminating some of the registers.
22930 The values returned by this function must reflect the behavior
22931 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22933 The sign of the number returned reflects the direction of stack
22934 growth, so the values are positive for all eliminations except
22935 from the soft frame pointer to the hard frame pointer.
22937 SFP may point just inside the local variables block to ensure correct
22938 alignment. */
22941 /* Return cached stack offsets. */
22943 static arm_stack_offsets *
22944 arm_get_frame_offsets (void)
22946 struct arm_stack_offsets *offsets;
22948 offsets = &cfun->machine->stack_offsets;
22950 return offsets;
22954 /* Calculate stack offsets. These are used to calculate register elimination
22955 offsets and in prologue/epilogue code. Also calculates which registers
22956 should be saved. */
22958 static void
22959 arm_compute_frame_layout (void)
22961 struct arm_stack_offsets *offsets;
22962 unsigned long func_type;
22963 int saved;
22964 int core_saved;
22965 HOST_WIDE_INT frame_size;
22966 int i;
22968 offsets = &cfun->machine->stack_offsets;
22970 /* Initially this is the size of the local variables. It will translated
22971 into an offset once we have determined the size of preceding data. */
22972 frame_size = ROUND_UP_WORD (get_frame_size ());
22974 /* Space for variadic functions. */
22975 offsets->saved_args = crtl->args.pretend_args_size;
22977 /* In Thumb mode this is incorrect, but never used. */
22978 offsets->frame
22979 = (offsets->saved_args
22980 + arm_compute_static_chain_stack_bytes ()
22981 + (frame_pointer_needed ? 4 : 0));
22983 if (TARGET_32BIT)
22985 unsigned int regno;
22987 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22988 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22989 saved = core_saved;
22991 /* We know that SP will be doubleword aligned on entry, and we must
22992 preserve that condition at any subroutine call. We also require the
22993 soft frame pointer to be doubleword aligned. */
22995 if (TARGET_REALLY_IWMMXT)
22997 /* Check for the call-saved iWMMXt registers. */
22998 for (regno = FIRST_IWMMXT_REGNUM;
22999 regno <= LAST_IWMMXT_REGNUM;
23000 regno++)
23001 if (reg_needs_saving_p (regno))
23002 saved += 8;
23005 func_type = arm_current_func_type ();
23006 /* Space for saved VFP registers. */
23007 if (! IS_VOLATILE (func_type)
23008 && TARGET_VFP_BASE)
23009 saved += arm_get_vfp_saved_size ();
23011 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23012 nonecure entry functions with VSTR/VLDR. */
23013 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23014 saved += 4;
23016 else /* TARGET_THUMB1 */
23018 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23019 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23020 saved = core_saved;
23021 if (TARGET_BACKTRACE)
23022 saved += 16;
23025 /* Saved registers include the stack frame. */
23026 offsets->saved_regs
23027 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23028 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23030 /* A leaf function does not need any stack alignment if it has nothing
23031 on the stack. */
23032 if (crtl->is_leaf && frame_size == 0
23033 /* However if it calls alloca(), we have a dynamically allocated
23034 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23035 && ! cfun->calls_alloca)
23037 offsets->outgoing_args = offsets->soft_frame;
23038 offsets->locals_base = offsets->soft_frame;
23039 return;
23042 /* Ensure SFP has the correct alignment. */
23043 if (ARM_DOUBLEWORD_ALIGN
23044 && (offsets->soft_frame & 7))
23046 offsets->soft_frame += 4;
23047 /* Try to align stack by pushing an extra reg. Don't bother doing this
23048 when there is a stack frame as the alignment will be rolled into
23049 the normal stack adjustment. */
23050 if (frame_size + crtl->outgoing_args_size == 0)
23052 int reg = -1;
23054 /* Register r3 is caller-saved. Normally it does not need to be
23055 saved on entry by the prologue. However if we choose to save
23056 it for padding then we may confuse the compiler into thinking
23057 a prologue sequence is required when in fact it is not. This
23058 will occur when shrink-wrapping if r3 is used as a scratch
23059 register and there are no other callee-saved writes.
23061 This situation can be avoided when other callee-saved registers
23062 are available and r3 is not mandatory if we choose a callee-saved
23063 register for padding. */
23064 bool prefer_callee_reg_p = false;
23066 /* If it is safe to use r3, then do so. This sometimes
23067 generates better code on Thumb-2 by avoiding the need to
23068 use 32-bit push/pop instructions. */
23069 if (! any_sibcall_could_use_r3 ()
23070 && arm_size_return_regs () <= 12
23071 && (offsets->saved_regs_mask & (1 << 3)) == 0
23072 && (TARGET_THUMB2
23073 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23075 reg = 3;
23076 if (!TARGET_THUMB2)
23077 prefer_callee_reg_p = true;
23079 if (reg == -1
23080 || prefer_callee_reg_p)
23082 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23084 /* Avoid fixed registers; they may be changed at
23085 arbitrary times so it's unsafe to restore them
23086 during the epilogue. */
23087 if (!fixed_regs[i]
23088 && (offsets->saved_regs_mask & (1 << i)) == 0)
23090 reg = i;
23091 break;
23096 if (reg != -1)
23098 offsets->saved_regs += 4;
23099 offsets->saved_regs_mask |= (1 << reg);
23104 offsets->locals_base = offsets->soft_frame + frame_size;
23105 offsets->outgoing_args = (offsets->locals_base
23106 + crtl->outgoing_args_size);
23108 if (ARM_DOUBLEWORD_ALIGN)
23110 /* Ensure SP remains doubleword aligned. */
23111 if (offsets->outgoing_args & 7)
23112 offsets->outgoing_args += 4;
23113 gcc_assert (!(offsets->outgoing_args & 7));
23118 /* Calculate the relative offsets for the different stack pointers. Positive
23119 offsets are in the direction of stack growth. */
23121 HOST_WIDE_INT
23122 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23124 arm_stack_offsets *offsets;
23126 offsets = arm_get_frame_offsets ();
23128 /* OK, now we have enough information to compute the distances.
23129 There must be an entry in these switch tables for each pair
23130 of registers in ELIMINABLE_REGS, even if some of the entries
23131 seem to be redundant or useless. */
23132 switch (from)
23134 case ARG_POINTER_REGNUM:
23135 switch (to)
23137 case THUMB_HARD_FRAME_POINTER_REGNUM:
23138 return 0;
23140 case FRAME_POINTER_REGNUM:
23141 /* This is the reverse of the soft frame pointer
23142 to hard frame pointer elimination below. */
23143 return offsets->soft_frame - offsets->saved_args;
23145 case ARM_HARD_FRAME_POINTER_REGNUM:
23146 /* This is only non-zero in the case where the static chain register
23147 is stored above the frame. */
23148 return offsets->frame - offsets->saved_args - 4;
23150 case STACK_POINTER_REGNUM:
23151 /* If nothing has been pushed on the stack at all
23152 then this will return -4. This *is* correct! */
23153 return offsets->outgoing_args - (offsets->saved_args + 4);
23155 default:
23156 gcc_unreachable ();
23158 gcc_unreachable ();
23160 case FRAME_POINTER_REGNUM:
23161 switch (to)
23163 case THUMB_HARD_FRAME_POINTER_REGNUM:
23164 return 0;
23166 case ARM_HARD_FRAME_POINTER_REGNUM:
23167 /* The hard frame pointer points to the top entry in the
23168 stack frame. The soft frame pointer to the bottom entry
23169 in the stack frame. If there is no stack frame at all,
23170 then they are identical. */
23172 return offsets->frame - offsets->soft_frame;
23174 case STACK_POINTER_REGNUM:
23175 return offsets->outgoing_args - offsets->soft_frame;
23177 default:
23178 gcc_unreachable ();
23180 gcc_unreachable ();
23182 default:
23183 /* You cannot eliminate from the stack pointer.
23184 In theory you could eliminate from the hard frame
23185 pointer to the stack pointer, but this will never
23186 happen, since if a stack frame is not needed the
23187 hard frame pointer will never be used. */
23188 gcc_unreachable ();
23192 /* Given FROM and TO register numbers, say whether this elimination is
23193 allowed. Frame pointer elimination is automatically handled.
23195 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23196 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23197 pointer, we must eliminate FRAME_POINTER_REGNUM into
23198 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23199 ARG_POINTER_REGNUM. */
23201 bool
23202 arm_can_eliminate (const int from, const int to)
23204 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23205 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23206 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23207 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23208 true);
23211 /* Emit RTL to save coprocessor registers on function entry. Returns the
23212 number of bytes pushed. */
23214 static int
23215 arm_save_coproc_regs(void)
23217 int saved_size = 0;
23218 unsigned reg;
23219 unsigned start_reg;
23220 rtx insn;
23222 if (TARGET_REALLY_IWMMXT)
23223 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23224 if (reg_needs_saving_p (reg))
23226 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23227 insn = gen_rtx_MEM (V2SImode, insn);
23228 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23229 RTX_FRAME_RELATED_P (insn) = 1;
23230 saved_size += 8;
23233 if (TARGET_VFP_BASE)
23235 start_reg = FIRST_VFP_REGNUM;
23237 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23239 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23241 if (start_reg != reg)
23242 saved_size += vfp_emit_fstmd (start_reg,
23243 (reg - start_reg) / 2);
23244 start_reg = reg + 2;
23247 if (start_reg != reg)
23248 saved_size += vfp_emit_fstmd (start_reg,
23249 (reg - start_reg) / 2);
23251 return saved_size;
23255 /* Set the Thumb frame pointer from the stack pointer. */
23257 static void
23258 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23260 HOST_WIDE_INT amount;
23261 rtx insn, dwarf;
23263 amount = offsets->outgoing_args - offsets->locals_base;
23264 if (amount < 1024)
23265 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23266 stack_pointer_rtx, GEN_INT (amount)));
23267 else
23269 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23270 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23271 expects the first two operands to be the same. */
23272 if (TARGET_THUMB2)
23274 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23275 stack_pointer_rtx,
23276 hard_frame_pointer_rtx));
23278 else
23280 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23281 hard_frame_pointer_rtx,
23282 stack_pointer_rtx));
23284 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23285 plus_constant (Pmode, stack_pointer_rtx, amount));
23286 RTX_FRAME_RELATED_P (dwarf) = 1;
23287 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23290 RTX_FRAME_RELATED_P (insn) = 1;
23293 struct scratch_reg {
23294 rtx reg;
23295 bool saved;
23298 /* Return a short-lived scratch register for use as a 2nd scratch register on
23299 function entry after the registers are saved in the prologue. This register
23300 must be released by means of release_scratch_register_on_entry. IP is not
23301 considered since it is always used as the 1st scratch register if available.
23303 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23304 mask of live registers. */
23306 static void
23307 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23308 unsigned long live_regs)
23310 int regno = -1;
23312 sr->saved = false;
23314 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23315 regno = LR_REGNUM;
23316 else
23318 unsigned int i;
23320 for (i = 4; i < 11; i++)
23321 if (regno1 != i && (live_regs & (1 << i)) != 0)
23323 regno = i;
23324 break;
23327 if (regno < 0)
23329 /* If IP is used as the 1st scratch register for a nested function,
23330 then either r3 wasn't available or is used to preserve IP. */
23331 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23332 regno1 = 3;
23333 regno = (regno1 == 3 ? 2 : 3);
23334 sr->saved
23335 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23336 regno);
23340 sr->reg = gen_rtx_REG (SImode, regno);
23341 if (sr->saved)
23343 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23344 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23345 rtx x = gen_rtx_SET (stack_pointer_rtx,
23346 plus_constant (Pmode, stack_pointer_rtx, -4));
23347 RTX_FRAME_RELATED_P (insn) = 1;
23348 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23352 /* Release a scratch register obtained from the preceding function. */
23354 static void
23355 release_scratch_register_on_entry (struct scratch_reg *sr)
23357 if (sr->saved)
23359 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23360 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23361 rtx x = gen_rtx_SET (stack_pointer_rtx,
23362 plus_constant (Pmode, stack_pointer_rtx, 4));
23363 RTX_FRAME_RELATED_P (insn) = 1;
23364 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23368 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23370 #if PROBE_INTERVAL > 4096
23371 #error Cannot use indexed addressing mode for stack probing
23372 #endif
23374 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23375 inclusive. These are offsets from the current stack pointer. REGNO1
23376 is the index number of the 1st scratch register and LIVE_REGS is the
23377 mask of live registers. */
23379 static void
23380 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23381 unsigned int regno1, unsigned long live_regs)
23383 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23385 /* See if we have a constant small number of probes to generate. If so,
23386 that's the easy case. */
23387 if (size <= PROBE_INTERVAL)
23389 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23390 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23391 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23394 /* The run-time loop is made up of 10 insns in the generic case while the
23395 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23396 else if (size <= 5 * PROBE_INTERVAL)
23398 HOST_WIDE_INT i, rem;
23400 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23401 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23402 emit_stack_probe (reg1);
23404 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23405 it exceeds SIZE. If only two probes are needed, this will not
23406 generate any code. Then probe at FIRST + SIZE. */
23407 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23409 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23410 emit_stack_probe (reg1);
23413 rem = size - (i - PROBE_INTERVAL);
23414 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23416 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23417 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23419 else
23420 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23423 /* Otherwise, do the same as above, but in a loop. Note that we must be
23424 extra careful with variables wrapping around because we might be at
23425 the very top (or the very bottom) of the address space and we have
23426 to be able to handle this case properly; in particular, we use an
23427 equality test for the loop condition. */
23428 else
23430 HOST_WIDE_INT rounded_size;
23431 struct scratch_reg sr;
23433 get_scratch_register_on_entry (&sr, regno1, live_regs);
23435 emit_move_insn (reg1, GEN_INT (first));
23438 /* Step 1: round SIZE to the previous multiple of the interval. */
23440 rounded_size = size & -PROBE_INTERVAL;
23441 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23444 /* Step 2: compute initial and final value of the loop counter. */
23446 /* TEST_ADDR = SP + FIRST. */
23447 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23449 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23450 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23453 /* Step 3: the loop
23457 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23458 probe at TEST_ADDR
23460 while (TEST_ADDR != LAST_ADDR)
23462 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23463 until it is equal to ROUNDED_SIZE. */
23465 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23468 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23469 that SIZE is equal to ROUNDED_SIZE. */
23471 if (size != rounded_size)
23473 HOST_WIDE_INT rem = size - rounded_size;
23475 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23477 emit_set_insn (sr.reg,
23478 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23479 emit_stack_probe (plus_constant (Pmode, sr.reg,
23480 PROBE_INTERVAL - rem));
23482 else
23483 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23486 release_scratch_register_on_entry (&sr);
23489 /* Make sure nothing is scheduled before we are done. */
23490 emit_insn (gen_blockage ());
23493 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23494 absolute addresses. */
23496 const char *
23497 output_probe_stack_range (rtx reg1, rtx reg2)
23499 static int labelno = 0;
23500 char loop_lab[32];
23501 rtx xops[2];
23503 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23505 /* Loop. */
23506 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23508 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23509 xops[0] = reg1;
23510 xops[1] = GEN_INT (PROBE_INTERVAL);
23511 output_asm_insn ("sub\t%0, %0, %1", xops);
23513 /* Probe at TEST_ADDR. */
23514 output_asm_insn ("str\tr0, [%0, #0]", xops);
23516 /* Test if TEST_ADDR == LAST_ADDR. */
23517 xops[1] = reg2;
23518 output_asm_insn ("cmp\t%0, %1", xops);
23520 /* Branch. */
23521 fputs ("\tbne\t", asm_out_file);
23522 assemble_name_raw (asm_out_file, loop_lab);
23523 fputc ('\n', asm_out_file);
23525 return "";
23528 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23529 function. */
23530 void
23531 arm_expand_prologue (void)
23533 rtx amount;
23534 rtx insn;
23535 rtx ip_rtx;
23536 unsigned long live_regs_mask;
23537 unsigned long func_type;
23538 int fp_offset = 0;
23539 int saved_pretend_args = 0;
23540 int saved_regs = 0;
23541 unsigned HOST_WIDE_INT args_to_push;
23542 HOST_WIDE_INT size;
23543 arm_stack_offsets *offsets;
23544 bool clobber_ip;
23546 func_type = arm_current_func_type ();
23548 /* Naked functions don't have prologues. */
23549 if (IS_NAKED (func_type))
23551 if (flag_stack_usage_info)
23552 current_function_static_stack_size = 0;
23553 return;
23556 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23557 args_to_push = crtl->args.pretend_args_size;
23559 /* Compute which register we will have to save onto the stack. */
23560 offsets = arm_get_frame_offsets ();
23561 live_regs_mask = offsets->saved_regs_mask;
23563 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23565 if (IS_STACKALIGN (func_type))
23567 rtx r0, r1;
23569 /* Handle a word-aligned stack pointer. We generate the following:
23571 mov r0, sp
23572 bic r1, r0, #7
23573 mov sp, r1
23574 <save and restore r0 in normal prologue/epilogue>
23575 mov sp, r0
23576 bx lr
23578 The unwinder doesn't need to know about the stack realignment.
23579 Just tell it we saved SP in r0. */
23580 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23582 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23583 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23585 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23586 RTX_FRAME_RELATED_P (insn) = 1;
23587 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23589 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23591 /* ??? The CFA changes here, which may cause GDB to conclude that it
23592 has entered a different function. That said, the unwind info is
23593 correct, individually, before and after this instruction because
23594 we've described the save of SP, which will override the default
23595 handling of SP as restoring from the CFA. */
23596 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23599 /* Let's compute the static_chain_stack_bytes required and store it. Right
23600 now the value must be -1 as stored by arm_init_machine_status (). */
23601 cfun->machine->static_chain_stack_bytes
23602 = arm_compute_static_chain_stack_bytes ();
23604 /* The static chain register is the same as the IP register. If it is
23605 clobbered when creating the frame, we need to save and restore it. */
23606 clobber_ip = (IS_NESTED (func_type)
23607 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23608 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23609 || flag_stack_clash_protection)
23610 && !df_regs_ever_live_p (LR_REGNUM)
23611 && arm_r3_live_at_start_p ()))
23612 || arm_current_function_pac_enabled_p ()));
23614 /* Find somewhere to store IP whilst the frame is being created.
23615 We try the following places in order:
23617 1. The last argument register r3 if it is available.
23618 2. A slot on the stack above the frame if there are no
23619 arguments to push onto the stack.
23620 3. Register r3 again, after pushing the argument registers
23621 onto the stack, if this is a varargs function.
23622 4. The last slot on the stack created for the arguments to
23623 push, if this isn't a varargs function.
23625 Note - we only need to tell the dwarf2 backend about the SP
23626 adjustment in the second variant; the static chain register
23627 doesn't need to be unwound, as it doesn't contain a value
23628 inherited from the caller. */
23629 if (clobber_ip)
23631 if (!arm_r3_live_at_start_p ())
23632 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23633 else if (args_to_push == 0)
23635 rtx addr, dwarf;
23637 saved_regs += 4;
23639 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23640 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23641 fp_offset = 4;
23643 /* Just tell the dwarf backend that we adjusted SP. */
23644 dwarf = gen_rtx_SET (stack_pointer_rtx,
23645 plus_constant (Pmode, stack_pointer_rtx,
23646 -fp_offset));
23647 RTX_FRAME_RELATED_P (insn) = 1;
23648 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23649 if (arm_current_function_pac_enabled_p ())
23650 cfun->machine->pacspval_needed = 1;
23652 else
23654 /* Store the args on the stack. */
23655 if (cfun->machine->uses_anonymous_args)
23657 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23658 (0xf0 >> (args_to_push / 4)) & 0xf);
23659 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23660 saved_pretend_args = 1;
23662 else
23664 rtx addr, dwarf;
23666 if (args_to_push == 4)
23667 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23668 else
23669 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23670 plus_constant (Pmode,
23671 stack_pointer_rtx,
23672 -args_to_push));
23674 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23676 /* Just tell the dwarf backend that we adjusted SP. */
23677 dwarf = gen_rtx_SET (stack_pointer_rtx,
23678 plus_constant (Pmode, stack_pointer_rtx,
23679 -args_to_push));
23680 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23683 RTX_FRAME_RELATED_P (insn) = 1;
23684 fp_offset = args_to_push;
23685 args_to_push = 0;
23686 if (arm_current_function_pac_enabled_p ())
23687 cfun->machine->pacspval_needed = 1;
23691 if (arm_current_function_pac_enabled_p ())
23693 /* If IP was clobbered we only emit a PAC instruction as the BTI
23694 one will be added before the push of the clobbered IP (if
23695 necessary) by the bti pass. */
23696 if (aarch_bti_enabled () && !clobber_ip)
23697 insn = emit_insn (gen_pacbti_nop ());
23698 else
23699 insn = emit_insn (gen_pac_nop ());
23701 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23702 RTX_FRAME_RELATED_P (insn) = 1;
23703 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23706 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23708 if (IS_INTERRUPT (func_type))
23710 /* Interrupt functions must not corrupt any registers.
23711 Creating a frame pointer however, corrupts the IP
23712 register, so we must push it first. */
23713 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23715 /* Do not set RTX_FRAME_RELATED_P on this insn.
23716 The dwarf stack unwinding code only wants to see one
23717 stack decrement per function, and this is not it. If
23718 this instruction is labeled as being part of the frame
23719 creation sequence then dwarf2out_frame_debug_expr will
23720 die when it encounters the assignment of IP to FP
23721 later on, since the use of SP here establishes SP as
23722 the CFA register and not IP.
23724 Anyway this instruction is not really part of the stack
23725 frame creation although it is part of the prologue. */
23728 insn = emit_set_insn (ip_rtx,
23729 plus_constant (Pmode, stack_pointer_rtx,
23730 fp_offset));
23731 RTX_FRAME_RELATED_P (insn) = 1;
23734 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23735 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23737 saved_regs += 4;
23738 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23739 GEN_INT (FPCXTNS_ENUM)));
23740 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23741 plus_constant (Pmode, stack_pointer_rtx, -4));
23742 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23743 RTX_FRAME_RELATED_P (insn) = 1;
23746 if (args_to_push)
23748 /* Push the argument registers, or reserve space for them. */
23749 if (cfun->machine->uses_anonymous_args)
23750 insn = emit_multi_reg_push
23751 ((0xf0 >> (args_to_push / 4)) & 0xf,
23752 (0xf0 >> (args_to_push / 4)) & 0xf);
23753 else
23754 insn = emit_insn
23755 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23756 GEN_INT (- args_to_push)));
23757 RTX_FRAME_RELATED_P (insn) = 1;
23760 /* If this is an interrupt service routine, and the link register
23761 is going to be pushed, and we're not generating extra
23762 push of IP (needed when frame is needed and frame layout if apcs),
23763 subtracting four from LR now will mean that the function return
23764 can be done with a single instruction. */
23765 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23766 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23767 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23768 && TARGET_ARM)
23770 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23772 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23775 if (live_regs_mask)
23777 unsigned long dwarf_regs_mask = live_regs_mask;
23779 saved_regs += bit_count (live_regs_mask) * 4;
23780 if (optimize_size && !frame_pointer_needed
23781 && saved_regs == offsets->saved_regs - offsets->saved_args)
23783 /* If no coprocessor registers are being pushed and we don't have
23784 to worry about a frame pointer then push extra registers to
23785 create the stack frame. This is done in a way that does not
23786 alter the frame layout, so is independent of the epilogue. */
23787 int n;
23788 int frame;
23789 n = 0;
23790 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23791 n++;
23792 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23793 if (frame && n * 4 >= frame)
23795 n = frame / 4;
23796 live_regs_mask |= (1 << n) - 1;
23797 saved_regs += frame;
23801 if (TARGET_LDRD
23802 && current_tune->prefer_ldrd_strd
23803 && !optimize_function_for_size_p (cfun))
23805 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23806 if (TARGET_THUMB2)
23807 thumb2_emit_strd_push (live_regs_mask);
23808 else if (TARGET_ARM
23809 && !TARGET_APCS_FRAME
23810 && !IS_INTERRUPT (func_type))
23811 arm_emit_strd_push (live_regs_mask);
23812 else
23814 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23815 RTX_FRAME_RELATED_P (insn) = 1;
23818 else
23820 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23821 RTX_FRAME_RELATED_P (insn) = 1;
23825 if (! IS_VOLATILE (func_type))
23826 saved_regs += arm_save_coproc_regs ();
23828 if (frame_pointer_needed && TARGET_ARM)
23830 /* Create the new frame pointer. */
23831 if (TARGET_APCS_FRAME)
23833 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23834 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23835 RTX_FRAME_RELATED_P (insn) = 1;
23837 else
23839 insn = GEN_INT (saved_regs - (4 + fp_offset));
23840 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23841 stack_pointer_rtx, insn));
23842 RTX_FRAME_RELATED_P (insn) = 1;
23846 size = offsets->outgoing_args - offsets->saved_args;
23847 if (flag_stack_usage_info)
23848 current_function_static_stack_size = size;
23850 /* If this isn't an interrupt service routine and we have a frame, then do
23851 stack checking. We use IP as the first scratch register, except for the
23852 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23853 if (!IS_INTERRUPT (func_type)
23854 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23855 || flag_stack_clash_protection))
23857 unsigned int regno;
23859 if (!IS_NESTED (func_type) || clobber_ip)
23860 regno = IP_REGNUM;
23861 else if (df_regs_ever_live_p (LR_REGNUM))
23862 regno = LR_REGNUM;
23863 else
23864 regno = 3;
23866 if (crtl->is_leaf && !cfun->calls_alloca)
23868 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23869 arm_emit_probe_stack_range (get_stack_check_protect (),
23870 size - get_stack_check_protect (),
23871 regno, live_regs_mask);
23873 else if (size > 0)
23874 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23875 regno, live_regs_mask);
23878 /* Recover the static chain register. */
23879 if (clobber_ip)
23881 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23882 insn = gen_rtx_REG (SImode, 3);
23883 else
23885 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23886 insn = gen_frame_mem (SImode, insn);
23888 emit_set_insn (ip_rtx, insn);
23889 emit_insn (gen_force_register_use (ip_rtx));
23892 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23894 /* This add can produce multiple insns for a large constant, so we
23895 need to get tricky. */
23896 rtx_insn *last = get_last_insn ();
23898 amount = GEN_INT (offsets->saved_args + saved_regs
23899 - offsets->outgoing_args);
23901 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23902 amount));
23905 last = last ? NEXT_INSN (last) : get_insns ();
23906 RTX_FRAME_RELATED_P (last) = 1;
23908 while (last != insn);
23910 /* If the frame pointer is needed, emit a special barrier that
23911 will prevent the scheduler from moving stores to the frame
23912 before the stack adjustment. */
23913 if (frame_pointer_needed)
23914 emit_insn (gen_stack_tie (stack_pointer_rtx,
23915 hard_frame_pointer_rtx));
23919 if (frame_pointer_needed && TARGET_THUMB2)
23920 thumb_set_frame_pointer (offsets);
23922 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23924 unsigned long mask;
23926 mask = live_regs_mask;
23927 mask &= THUMB2_WORK_REGS;
23928 if (!IS_NESTED (func_type))
23929 mask |= (1 << IP_REGNUM);
23930 arm_load_pic_register (mask, NULL_RTX);
23933 /* If we are profiling, make sure no instructions are scheduled before
23934 the call to mcount. Similarly if the user has requested no
23935 scheduling in the prolog. Similarly if we want non-call exceptions
23936 using the EABI unwinder, to prevent faulting instructions from being
23937 swapped with a stack adjustment. */
23938 if (crtl->profile || !TARGET_SCHED_PROLOG
23939 || (arm_except_unwind_info (&global_options) == UI_TARGET
23940 && cfun->can_throw_non_call_exceptions))
23941 emit_insn (gen_blockage ());
23943 /* If the link register is being kept alive, with the return address in it,
23944 then make sure that it does not get reused by the ce2 pass. */
23945 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23946 cfun->machine->lr_save_eliminated = 1;
23949 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23950 static void
23951 arm_print_condition (FILE *stream)
23953 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23955 /* Branch conversion is not implemented for Thumb-2. */
23956 if (TARGET_THUMB)
23958 output_operand_lossage ("predicated Thumb instruction");
23959 return;
23961 if (current_insn_predicate != NULL)
23963 output_operand_lossage
23964 ("predicated instruction in conditional sequence");
23965 return;
23968 fputs (arm_condition_codes[arm_current_cc], stream);
23970 else if (current_insn_predicate)
23972 enum arm_cond_code code;
23974 if (TARGET_THUMB1)
23976 output_operand_lossage ("predicated Thumb instruction");
23977 return;
23980 code = get_arm_condition_code (current_insn_predicate);
23981 fputs (arm_condition_codes[code], stream);
23986 /* Globally reserved letters: acln
23987 Puncutation letters currently used: @_|?().!#
23988 Lower case letters currently used: bcdefhimpqtvwxyz
23989 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23990 Letters previously used, but now deprecated/obsolete: sWXYZ.
23992 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23994 If CODE is 'd', then the X is a condition operand and the instruction
23995 should only be executed if the condition is true.
23996 if CODE is 'D', then the X is a condition operand and the instruction
23997 should only be executed if the condition is false: however, if the mode
23998 of the comparison is CCFPEmode, then always execute the instruction -- we
23999 do this because in these circumstances !GE does not necessarily imply LT;
24000 in these cases the instruction pattern will take care to make sure that
24001 an instruction containing %d will follow, thereby undoing the effects of
24002 doing this instruction unconditionally.
24003 If CODE is 'N' then X is a floating point operand that must be negated
24004 before output.
24005 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24006 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24007 If CODE is 'V', then the operand must be a CONST_INT representing
24008 the bits to preserve in the modified register (Rd) of a BFI or BFC
24009 instruction: print out both the width and lsb (shift) fields. */
24010 static void
24011 arm_print_operand (FILE *stream, rtx x, int code)
24013 switch (code)
24015 case '@':
24016 fputs (ASM_COMMENT_START, stream);
24017 return;
24019 case '_':
24020 fputs (user_label_prefix, stream);
24021 return;
24023 case '|':
24024 fputs (REGISTER_PREFIX, stream);
24025 return;
24027 case '?':
24028 arm_print_condition (stream);
24029 return;
24031 case '.':
24032 /* The current condition code for a condition code setting instruction.
24033 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24034 fputc('s', stream);
24035 arm_print_condition (stream);
24036 return;
24038 case '!':
24039 /* If the instruction is conditionally executed then print
24040 the current condition code, otherwise print 's'. */
24041 gcc_assert (TARGET_THUMB2);
24042 if (current_insn_predicate)
24043 arm_print_condition (stream);
24044 else
24045 fputc('s', stream);
24046 break;
24048 /* %# is a "break" sequence. It doesn't output anything, but is used to
24049 separate e.g. operand numbers from following text, if that text consists
24050 of further digits which we don't want to be part of the operand
24051 number. */
24052 case '#':
24053 return;
24055 case 'N':
24057 REAL_VALUE_TYPE r;
24058 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24059 fprintf (stream, "%s", fp_const_from_val (&r));
24061 return;
24063 /* An integer or symbol address without a preceding # sign. */
24064 case 'c':
24065 switch (GET_CODE (x))
24067 case CONST_INT:
24068 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24069 break;
24071 case SYMBOL_REF:
24072 output_addr_const (stream, x);
24073 break;
24075 case CONST:
24076 if (GET_CODE (XEXP (x, 0)) == PLUS
24077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24079 output_addr_const (stream, x);
24080 break;
24082 /* Fall through. */
24084 default:
24085 output_operand_lossage ("Unsupported operand for code '%c'", code);
24087 return;
24089 /* An integer that we want to print in HEX. */
24090 case 'x':
24091 switch (GET_CODE (x))
24093 case CONST_INT:
24094 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24095 break;
24097 default:
24098 output_operand_lossage ("Unsupported operand for code '%c'", code);
24100 return;
24102 case 'B':
24103 if (CONST_INT_P (x))
24105 HOST_WIDE_INT val;
24106 val = ARM_SIGN_EXTEND (~INTVAL (x));
24107 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24109 else
24111 putc ('~', stream);
24112 output_addr_const (stream, x);
24114 return;
24116 case 'b':
24117 /* Print the log2 of a CONST_INT. */
24119 HOST_WIDE_INT val;
24121 if (!CONST_INT_P (x)
24122 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24123 output_operand_lossage ("Unsupported operand for code '%c'", code);
24124 else
24125 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24127 return;
24129 case 'L':
24130 /* The low 16 bits of an immediate constant. */
24131 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24132 return;
24134 case 'i':
24135 fprintf (stream, "%s", arithmetic_instr (x, 1));
24136 return;
24138 case 'I':
24139 fprintf (stream, "%s", arithmetic_instr (x, 0));
24140 return;
24142 case 'S':
24144 HOST_WIDE_INT val;
24145 const char *shift;
24147 shift = shift_op (x, &val);
24149 if (shift)
24151 fprintf (stream, ", %s ", shift);
24152 if (val == -1)
24153 arm_print_operand (stream, XEXP (x, 1), 0);
24154 else
24155 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24158 return;
24160 /* An explanation of the 'Q', 'R' and 'H' register operands:
24162 In a pair of registers containing a DI or DF value the 'Q'
24163 operand returns the register number of the register containing
24164 the least significant part of the value. The 'R' operand returns
24165 the register number of the register containing the most
24166 significant part of the value.
24168 The 'H' operand returns the higher of the two register numbers.
24169 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24170 same as the 'Q' operand, since the most significant part of the
24171 value is held in the lower number register. The reverse is true
24172 on systems where WORDS_BIG_ENDIAN is false.
24174 The purpose of these operands is to distinguish between cases
24175 where the endian-ness of the values is important (for example
24176 when they are added together), and cases where the endian-ness
24177 is irrelevant, but the order of register operations is important.
24178 For example when loading a value from memory into a register
24179 pair, the endian-ness does not matter. Provided that the value
24180 from the lower memory address is put into the lower numbered
24181 register, and the value from the higher address is put into the
24182 higher numbered register, the load will work regardless of whether
24183 the value being loaded is big-wordian or little-wordian. The
24184 order of the two register loads can matter however, if the address
24185 of the memory location is actually held in one of the registers
24186 being overwritten by the load.
24188 The 'Q' and 'R' constraints are also available for 64-bit
24189 constants. */
24190 case 'Q':
24191 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24193 rtx part = gen_lowpart (SImode, x);
24194 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24195 return;
24198 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24200 output_operand_lossage ("invalid operand for code '%c'", code);
24201 return;
24204 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24205 return;
24207 case 'R':
24208 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24210 machine_mode mode = GET_MODE (x);
24211 rtx part;
24213 if (mode == VOIDmode)
24214 mode = DImode;
24215 part = gen_highpart_mode (SImode, mode, x);
24216 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24217 return;
24220 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24222 output_operand_lossage ("invalid operand for code '%c'", code);
24223 return;
24226 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24227 return;
24229 case 'H':
24230 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24232 output_operand_lossage ("invalid operand for code '%c'", code);
24233 return;
24236 asm_fprintf (stream, "%r", REGNO (x) + 1);
24237 return;
24239 case 'J':
24240 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24242 output_operand_lossage ("invalid operand for code '%c'", code);
24243 return;
24246 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24247 return;
24249 case 'K':
24250 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24252 output_operand_lossage ("invalid operand for code '%c'", code);
24253 return;
24256 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24257 return;
24259 case 'm':
24260 asm_fprintf (stream, "%r",
24261 REG_P (XEXP (x, 0))
24262 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24263 return;
24265 case 'M':
24266 asm_fprintf (stream, "{%r-%r}",
24267 REGNO (x),
24268 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24269 return;
24271 /* Like 'M', but writing doubleword vector registers, for use by Neon
24272 insns. */
24273 case 'h':
24275 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24276 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24277 if (numregs == 1)
24278 asm_fprintf (stream, "{d%d}", regno);
24279 else
24280 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24282 return;
24284 case 'd':
24285 /* CONST_TRUE_RTX means always -- that's the default. */
24286 if (x == const_true_rtx)
24287 return;
24289 if (!COMPARISON_P (x))
24291 output_operand_lossage ("invalid operand for code '%c'", code);
24292 return;
24295 fputs (arm_condition_codes[get_arm_condition_code (x)],
24296 stream);
24297 return;
24299 case 'D':
24300 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24301 want to do that. */
24302 if (x == const_true_rtx)
24304 output_operand_lossage ("instruction never executed");
24305 return;
24307 if (!COMPARISON_P (x))
24309 output_operand_lossage ("invalid operand for code '%c'", code);
24310 return;
24313 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24314 (get_arm_condition_code (x))],
24315 stream);
24316 return;
24318 case 'V':
24320 /* Output the LSB (shift) and width for a bitmask instruction
24321 based on a literal mask. The LSB is printed first,
24322 followed by the width.
24324 Eg. For 0b1...1110001, the result is #1, #3. */
24325 if (!CONST_INT_P (x))
24327 output_operand_lossage ("invalid operand for code '%c'", code);
24328 return;
24331 unsigned HOST_WIDE_INT val
24332 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24333 int lsb = exact_log2 (val & -val);
24334 asm_fprintf (stream, "#%d, #%d", lsb,
24335 (exact_log2 (val + (val & -val)) - lsb));
24337 return;
24339 case 's':
24340 case 'W':
24341 case 'X':
24342 case 'Y':
24343 case 'Z':
24344 /* Former Maverick support, removed after GCC-4.7. */
24345 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24346 return;
24348 case 'U':
24349 if (!REG_P (x)
24350 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24351 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24352 /* Bad value for wCG register number. */
24354 output_operand_lossage ("invalid operand for code '%c'", code);
24355 return;
24358 else
24359 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24360 return;
24362 /* Print an iWMMXt control register name. */
24363 case 'w':
24364 if (!CONST_INT_P (x)
24365 || INTVAL (x) < 0
24366 || INTVAL (x) >= 16)
24367 /* Bad value for wC register number. */
24369 output_operand_lossage ("invalid operand for code '%c'", code);
24370 return;
24373 else
24375 static const char * wc_reg_names [16] =
24377 "wCID", "wCon", "wCSSF", "wCASF",
24378 "wC4", "wC5", "wC6", "wC7",
24379 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24380 "wC12", "wC13", "wC14", "wC15"
24383 fputs (wc_reg_names [INTVAL (x)], stream);
24385 return;
24387 /* Print the high single-precision register of a VFP double-precision
24388 register. */
24389 case 'p':
24391 machine_mode mode = GET_MODE (x);
24392 int regno;
24394 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24396 output_operand_lossage ("invalid operand for code '%c'", code);
24397 return;
24400 regno = REGNO (x);
24401 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24403 output_operand_lossage ("invalid operand for code '%c'", code);
24404 return;
24407 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24409 return;
24411 /* Print a VFP/Neon double precision or quad precision register name. */
24412 case 'P':
24413 case 'q':
24415 machine_mode mode = GET_MODE (x);
24416 int is_quad = (code == 'q');
24417 int regno;
24419 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24421 output_operand_lossage ("invalid operand for code '%c'", code);
24422 return;
24425 if (!REG_P (x)
24426 || !IS_VFP_REGNUM (REGNO (x)))
24428 output_operand_lossage ("invalid operand for code '%c'", code);
24429 return;
24432 regno = REGNO (x);
24433 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24434 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24436 output_operand_lossage ("invalid operand for code '%c'", code);
24437 return;
24440 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24441 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24443 return;
24445 /* These two codes print the low/high doubleword register of a Neon quad
24446 register, respectively. For pair-structure types, can also print
24447 low/high quadword registers. */
24448 case 'e':
24449 case 'f':
24451 machine_mode mode = GET_MODE (x);
24452 int regno;
24454 if ((GET_MODE_SIZE (mode) != 16
24455 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24457 output_operand_lossage ("invalid operand for code '%c'", code);
24458 return;
24461 regno = REGNO (x);
24462 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24464 output_operand_lossage ("invalid operand for code '%c'", code);
24465 return;
24468 if (GET_MODE_SIZE (mode) == 16)
24469 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24470 + (code == 'f' ? 1 : 0));
24471 else
24472 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24473 + (code == 'f' ? 1 : 0));
24475 return;
24477 /* Print a VFPv3 floating-point constant, represented as an integer
24478 index. */
24479 case 'G':
24481 int index = vfp3_const_double_index (x);
24482 gcc_assert (index != -1);
24483 fprintf (stream, "%d", index);
24485 return;
24487 /* Print bits representing opcode features for Neon.
24489 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24490 and polynomials as unsigned.
24492 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24494 Bit 2 is 1 for rounding functions, 0 otherwise. */
24496 /* Identify the type as 's', 'u', 'p' or 'f'. */
24497 case 'T':
24499 HOST_WIDE_INT bits = INTVAL (x);
24500 fputc ("uspf"[bits & 3], stream);
24502 return;
24504 /* Likewise, but signed and unsigned integers are both 'i'. */
24505 case 'F':
24507 HOST_WIDE_INT bits = INTVAL (x);
24508 fputc ("iipf"[bits & 3], stream);
24510 return;
24512 /* As for 'T', but emit 'u' instead of 'p'. */
24513 case 't':
24515 HOST_WIDE_INT bits = INTVAL (x);
24516 fputc ("usuf"[bits & 3], stream);
24518 return;
24520 /* Bit 2: rounding (vs none). */
24521 case 'O':
24523 HOST_WIDE_INT bits = INTVAL (x);
24524 fputs ((bits & 4) != 0 ? "r" : "", stream);
24526 return;
24528 /* Memory operand for vld1/vst1 instruction. */
24529 case 'A':
24531 rtx addr;
24532 bool postinc = FALSE;
24533 rtx postinc_reg = NULL;
24534 unsigned align, memsize, align_bits;
24536 gcc_assert (MEM_P (x));
24537 addr = XEXP (x, 0);
24538 if (GET_CODE (addr) == POST_INC)
24540 postinc = 1;
24541 addr = XEXP (addr, 0);
24543 if (GET_CODE (addr) == POST_MODIFY)
24545 postinc_reg = XEXP( XEXP (addr, 1), 1);
24546 addr = XEXP (addr, 0);
24548 asm_fprintf (stream, "[%r", REGNO (addr));
24550 /* We know the alignment of this access, so we can emit a hint in the
24551 instruction (for some alignments) as an aid to the memory subsystem
24552 of the target. */
24553 align = MEM_ALIGN (x) >> 3;
24554 memsize = MEM_SIZE (x);
24556 /* Only certain alignment specifiers are supported by the hardware. */
24557 if (memsize == 32 && (align % 32) == 0)
24558 align_bits = 256;
24559 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24560 align_bits = 128;
24561 else if (memsize >= 8 && (align % 8) == 0)
24562 align_bits = 64;
24563 else
24564 align_bits = 0;
24566 if (align_bits != 0)
24567 asm_fprintf (stream, ":%d", align_bits);
24569 asm_fprintf (stream, "]");
24571 if (postinc)
24572 fputs("!", stream);
24573 if (postinc_reg)
24574 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24576 return;
24578 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24579 rtx_code the memory operands output looks like following.
24580 1. [Rn], #+/-<imm>
24581 2. [Rn, #+/-<imm>]!
24582 3. [Rn, #+/-<imm>]
24583 4. [Rn]. */
24584 case 'E':
24586 rtx addr;
24587 rtx postinc_reg = NULL;
24588 unsigned inc_val = 0;
24589 enum rtx_code code;
24591 gcc_assert (MEM_P (x));
24592 addr = XEXP (x, 0);
24593 code = GET_CODE (addr);
24594 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24595 || code == PRE_DEC)
24597 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24598 inc_val = GET_MODE_SIZE (GET_MODE (x));
24599 if (code == POST_INC || code == POST_DEC)
24600 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24601 ? "": "-", inc_val);
24602 else
24603 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24604 ? "": "-", inc_val);
24606 else if (code == POST_MODIFY || code == PRE_MODIFY)
24608 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24609 postinc_reg = XEXP (XEXP (addr, 1), 1);
24610 if (postinc_reg && CONST_INT_P (postinc_reg))
24612 if (code == POST_MODIFY)
24613 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24614 else
24615 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24618 else if (code == PLUS)
24620 rtx base = XEXP (addr, 0);
24621 rtx index = XEXP (addr, 1);
24623 gcc_assert (REG_P (base) && CONST_INT_P (index));
24625 HOST_WIDE_INT offset = INTVAL (index);
24626 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24628 else
24630 gcc_assert (REG_P (addr));
24631 asm_fprintf (stream, "[%r]",REGNO (addr));
24634 return;
24636 case 'C':
24638 rtx addr;
24640 gcc_assert (MEM_P (x));
24641 addr = XEXP (x, 0);
24642 gcc_assert (REG_P (addr));
24643 asm_fprintf (stream, "[%r]", REGNO (addr));
24645 return;
24647 /* Translate an S register number into a D register number and element index. */
24648 case 'y':
24650 machine_mode mode = GET_MODE (x);
24651 int regno;
24653 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24655 output_operand_lossage ("invalid operand for code '%c'", code);
24656 return;
24659 regno = REGNO (x);
24660 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24662 output_operand_lossage ("invalid operand for code '%c'", code);
24663 return;
24666 regno = regno - FIRST_VFP_REGNUM;
24667 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24669 return;
24671 case 'v':
24672 gcc_assert (CONST_DOUBLE_P (x));
24673 int result;
24674 result = vfp3_const_double_for_fract_bits (x);
24675 if (result == 0)
24676 result = vfp3_const_double_for_bits (x);
24677 fprintf (stream, "#%d", result);
24678 return;
24680 /* Register specifier for vld1.16/vst1.16. Translate the S register
24681 number into a D register number and element index. */
24682 case 'z':
24684 machine_mode mode = GET_MODE (x);
24685 int regno;
24687 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24689 output_operand_lossage ("invalid operand for code '%c'", code);
24690 return;
24693 regno = REGNO (x);
24694 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24696 output_operand_lossage ("invalid operand for code '%c'", code);
24697 return;
24700 regno = regno - FIRST_VFP_REGNUM;
24701 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24703 return;
24705 default:
24706 if (x == 0)
24708 output_operand_lossage ("missing operand");
24709 return;
24712 switch (GET_CODE (x))
24714 case REG:
24715 asm_fprintf (stream, "%r", REGNO (x));
24716 break;
24718 case MEM:
24719 output_address (GET_MODE (x), XEXP (x, 0));
24720 break;
24722 case CONST_DOUBLE:
24724 char fpstr[20];
24725 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24726 sizeof (fpstr), 0, 1);
24727 fprintf (stream, "#%s", fpstr);
24729 break;
24731 default:
24732 gcc_assert (GET_CODE (x) != NEG);
24733 fputc ('#', stream);
24734 if (GET_CODE (x) == HIGH)
24736 fputs (":lower16:", stream);
24737 x = XEXP (x, 0);
24740 output_addr_const (stream, x);
24741 break;
24746 /* Target hook for printing a memory address. */
24747 static void
24748 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24750 if (TARGET_32BIT)
24752 int is_minus = GET_CODE (x) == MINUS;
24754 if (REG_P (x))
24755 asm_fprintf (stream, "[%r]", REGNO (x));
24756 else if (GET_CODE (x) == PLUS || is_minus)
24758 rtx base = XEXP (x, 0);
24759 rtx index = XEXP (x, 1);
24760 HOST_WIDE_INT offset = 0;
24761 if (!REG_P (base)
24762 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24764 /* Ensure that BASE is a register. */
24765 /* (one of them must be). */
24766 /* Also ensure the SP is not used as in index register. */
24767 std::swap (base, index);
24769 switch (GET_CODE (index))
24771 case CONST_INT:
24772 offset = INTVAL (index);
24773 if (is_minus)
24774 offset = -offset;
24775 asm_fprintf (stream, "[%r, #%wd]",
24776 REGNO (base), offset);
24777 break;
24779 case REG:
24780 asm_fprintf (stream, "[%r, %s%r]",
24781 REGNO (base), is_minus ? "-" : "",
24782 REGNO (index));
24783 break;
24785 case MULT:
24786 case ASHIFTRT:
24787 case LSHIFTRT:
24788 case ASHIFT:
24789 case ROTATERT:
24791 asm_fprintf (stream, "[%r, %s%r",
24792 REGNO (base), is_minus ? "-" : "",
24793 REGNO (XEXP (index, 0)));
24794 arm_print_operand (stream, index, 'S');
24795 fputs ("]", stream);
24796 break;
24799 default:
24800 gcc_unreachable ();
24803 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24804 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24806 gcc_assert (REG_P (XEXP (x, 0)));
24808 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24809 asm_fprintf (stream, "[%r, #%s%d]!",
24810 REGNO (XEXP (x, 0)),
24811 GET_CODE (x) == PRE_DEC ? "-" : "",
24812 GET_MODE_SIZE (mode));
24813 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24814 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24815 else
24816 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24817 GET_CODE (x) == POST_DEC ? "-" : "",
24818 GET_MODE_SIZE (mode));
24820 else if (GET_CODE (x) == PRE_MODIFY)
24822 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24823 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24824 asm_fprintf (stream, "#%wd]!",
24825 INTVAL (XEXP (XEXP (x, 1), 1)));
24826 else
24827 asm_fprintf (stream, "%r]!",
24828 REGNO (XEXP (XEXP (x, 1), 1)));
24830 else if (GET_CODE (x) == POST_MODIFY)
24832 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24833 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24834 asm_fprintf (stream, "#%wd",
24835 INTVAL (XEXP (XEXP (x, 1), 1)));
24836 else
24837 asm_fprintf (stream, "%r",
24838 REGNO (XEXP (XEXP (x, 1), 1)));
24840 else output_addr_const (stream, x);
24842 else
24844 if (REG_P (x))
24845 asm_fprintf (stream, "[%r]", REGNO (x));
24846 else if (GET_CODE (x) == POST_INC)
24847 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24848 else if (GET_CODE (x) == PLUS)
24850 gcc_assert (REG_P (XEXP (x, 0)));
24851 if (CONST_INT_P (XEXP (x, 1)))
24852 asm_fprintf (stream, "[%r, #%wd]",
24853 REGNO (XEXP (x, 0)),
24854 INTVAL (XEXP (x, 1)));
24855 else
24856 asm_fprintf (stream, "[%r, %r]",
24857 REGNO (XEXP (x, 0)),
24858 REGNO (XEXP (x, 1)));
24860 else
24861 output_addr_const (stream, x);
24865 /* Target hook for indicating whether a punctuation character for
24866 TARGET_PRINT_OPERAND is valid. */
24867 static bool
24868 arm_print_operand_punct_valid_p (unsigned char code)
24870 return (code == '@' || code == '|' || code == '.'
24871 || code == '(' || code == ')' || code == '#'
24872 || (TARGET_32BIT && (code == '?'))
24873 || (TARGET_THUMB2 && (code == '!'))
24874 || (TARGET_THUMB && (code == '_')));
24877 /* Target hook for assembling integer objects. The ARM version needs to
24878 handle word-sized values specially. */
24879 static bool
24880 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24882 machine_mode mode;
24884 if (size == UNITS_PER_WORD && aligned_p)
24886 fputs ("\t.word\t", asm_out_file);
24887 output_addr_const (asm_out_file, x);
24889 /* Mark symbols as position independent. We only do this in the
24890 .text segment, not in the .data segment. */
24891 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24892 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24894 /* See legitimize_pic_address for an explanation of the
24895 TARGET_VXWORKS_RTP check. */
24896 /* References to weak symbols cannot be resolved locally:
24897 they may be overridden by a non-weak definition at link
24898 time. */
24899 if (!arm_pic_data_is_text_relative
24900 || (SYMBOL_REF_P (x)
24901 && (!SYMBOL_REF_LOCAL_P (x)
24902 || (SYMBOL_REF_DECL (x)
24903 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24904 || (SYMBOL_REF_FUNCTION_P (x)
24905 && !arm_fdpic_local_funcdesc_p (x)))))
24907 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24908 fputs ("(GOTFUNCDESC)", asm_out_file);
24909 else
24910 fputs ("(GOT)", asm_out_file);
24912 else
24914 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24915 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24916 else
24918 bool is_readonly;
24920 if (!TARGET_FDPIC
24921 || arm_is_segment_info_known (x, &is_readonly))
24922 fputs ("(GOTOFF)", asm_out_file);
24923 else
24924 fputs ("(GOT)", asm_out_file);
24929 /* For FDPIC we also have to mark symbol for .data section. */
24930 if (TARGET_FDPIC
24931 && !making_const_table
24932 && SYMBOL_REF_P (x)
24933 && SYMBOL_REF_FUNCTION_P (x))
24934 fputs ("(FUNCDESC)", asm_out_file);
24936 fputc ('\n', asm_out_file);
24937 return true;
24940 mode = GET_MODE (x);
24942 if (arm_vector_mode_supported_p (mode))
24944 int i, units;
24946 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24948 units = CONST_VECTOR_NUNITS (x);
24949 size = GET_MODE_UNIT_SIZE (mode);
24951 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24952 for (i = 0; i < units; i++)
24954 rtx elt = CONST_VECTOR_ELT (x, i);
24955 assemble_integer
24956 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24958 else
24959 for (i = 0; i < units; i++)
24961 rtx elt = CONST_VECTOR_ELT (x, i);
24962 assemble_real
24963 (*CONST_DOUBLE_REAL_VALUE (elt),
24964 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24965 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24968 return true;
24971 return default_assemble_integer (x, size, aligned_p);
24974 static void
24975 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24977 section *s;
24979 if (!TARGET_AAPCS_BASED)
24981 (is_ctor ?
24982 default_named_section_asm_out_constructor
24983 : default_named_section_asm_out_destructor) (symbol, priority);
24984 return;
24987 /* Put these in the .init_array section, using a special relocation. */
24988 if (priority != DEFAULT_INIT_PRIORITY)
24990 char buf[18];
24991 sprintf (buf, "%s.%.5u",
24992 is_ctor ? ".init_array" : ".fini_array",
24993 priority);
24994 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24996 else if (is_ctor)
24997 s = ctors_section;
24998 else
24999 s = dtors_section;
25001 switch_to_section (s);
25002 assemble_align (POINTER_SIZE);
25003 fputs ("\t.word\t", asm_out_file);
25004 output_addr_const (asm_out_file, symbol);
25005 fputs ("(target1)\n", asm_out_file);
25008 /* Add a function to the list of static constructors. */
25010 static void
25011 arm_elf_asm_constructor (rtx symbol, int priority)
25013 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25016 /* Add a function to the list of static destructors. */
25018 static void
25019 arm_elf_asm_destructor (rtx symbol, int priority)
25021 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25024 /* A finite state machine takes care of noticing whether or not instructions
25025 can be conditionally executed, and thus decrease execution time and code
25026 size by deleting branch instructions. The fsm is controlled by
25027 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25029 /* The state of the fsm controlling condition codes are:
25030 0: normal, do nothing special
25031 1: make ASM_OUTPUT_OPCODE not output this instruction
25032 2: make ASM_OUTPUT_OPCODE not output this instruction
25033 3: make instructions conditional
25034 4: make instructions conditional
25036 State transitions (state->state by whom under condition):
25037 0 -> 1 final_prescan_insn if the `target' is a label
25038 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25039 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25040 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25041 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25042 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25043 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25044 (the target insn is arm_target_insn).
25046 If the jump clobbers the conditions then we use states 2 and 4.
25048 A similar thing can be done with conditional return insns.
25050 XXX In case the `target' is an unconditional branch, this conditionalising
25051 of the instructions always reduces code size, but not always execution
25052 time. But then, I want to reduce the code size to somewhere near what
25053 /bin/cc produces. */
25055 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25056 instructions. When a COND_EXEC instruction is seen the subsequent
25057 instructions are scanned so that multiple conditional instructions can be
25058 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25059 specify the length and true/false mask for the IT block. These will be
25060 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25062 /* Returns the index of the ARM condition code string in
25063 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25064 COMPARISON should be an rtx like `(eq (...) (...))'. */
25066 enum arm_cond_code
25067 maybe_get_arm_condition_code (rtx comparison)
25069 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25070 enum arm_cond_code code;
25071 enum rtx_code comp_code = GET_CODE (comparison);
25073 if (GET_MODE_CLASS (mode) != MODE_CC)
25074 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25075 XEXP (comparison, 1));
25077 switch (mode)
25079 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25080 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25081 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25082 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25083 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25084 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25085 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25086 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25087 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25088 case E_CC_DLTUmode: code = ARM_CC;
25090 dominance:
25091 if (comp_code == EQ)
25092 return ARM_INVERSE_CONDITION_CODE (code);
25093 if (comp_code == NE)
25094 return code;
25095 return ARM_NV;
25097 case E_CC_NZmode:
25098 switch (comp_code)
25100 case NE: return ARM_NE;
25101 case EQ: return ARM_EQ;
25102 case GE: return ARM_PL;
25103 case LT: return ARM_MI;
25104 default: return ARM_NV;
25107 case E_CC_Zmode:
25108 switch (comp_code)
25110 case NE: return ARM_NE;
25111 case EQ: return ARM_EQ;
25112 default: return ARM_NV;
25115 case E_CC_Nmode:
25116 switch (comp_code)
25118 case NE: return ARM_MI;
25119 case EQ: return ARM_PL;
25120 default: return ARM_NV;
25123 case E_CCFPEmode:
25124 case E_CCFPmode:
25125 /* We can handle all cases except UNEQ and LTGT. */
25126 switch (comp_code)
25128 case GE: return ARM_GE;
25129 case GT: return ARM_GT;
25130 case LE: return ARM_LS;
25131 case LT: return ARM_MI;
25132 case NE: return ARM_NE;
25133 case EQ: return ARM_EQ;
25134 case ORDERED: return ARM_VC;
25135 case UNORDERED: return ARM_VS;
25136 case UNLT: return ARM_LT;
25137 case UNLE: return ARM_LE;
25138 case UNGT: return ARM_HI;
25139 case UNGE: return ARM_PL;
25140 /* UNEQ and LTGT do not have a representation. */
25141 case UNEQ: /* Fall through. */
25142 case LTGT: /* Fall through. */
25143 default: return ARM_NV;
25146 case E_CC_SWPmode:
25147 switch (comp_code)
25149 case NE: return ARM_NE;
25150 case EQ: return ARM_EQ;
25151 case GE: return ARM_LE;
25152 case GT: return ARM_LT;
25153 case LE: return ARM_GE;
25154 case LT: return ARM_GT;
25155 case GEU: return ARM_LS;
25156 case GTU: return ARM_CC;
25157 case LEU: return ARM_CS;
25158 case LTU: return ARM_HI;
25159 default: return ARM_NV;
25162 case E_CC_Cmode:
25163 switch (comp_code)
25165 case LTU: return ARM_CS;
25166 case GEU: return ARM_CC;
25167 default: return ARM_NV;
25170 case E_CC_NVmode:
25171 switch (comp_code)
25173 case GE: return ARM_GE;
25174 case LT: return ARM_LT;
25175 default: return ARM_NV;
25178 case E_CC_Bmode:
25179 switch (comp_code)
25181 case GEU: return ARM_CS;
25182 case LTU: return ARM_CC;
25183 default: return ARM_NV;
25186 case E_CC_Vmode:
25187 switch (comp_code)
25189 case NE: return ARM_VS;
25190 case EQ: return ARM_VC;
25191 default: return ARM_NV;
25194 case E_CC_ADCmode:
25195 switch (comp_code)
25197 case GEU: return ARM_CS;
25198 case LTU: return ARM_CC;
25199 default: return ARM_NV;
25202 case E_CCmode:
25203 case E_CC_RSBmode:
25204 switch (comp_code)
25206 case NE: return ARM_NE;
25207 case EQ: return ARM_EQ;
25208 case GE: return ARM_GE;
25209 case GT: return ARM_GT;
25210 case LE: return ARM_LE;
25211 case LT: return ARM_LT;
25212 case GEU: return ARM_CS;
25213 case GTU: return ARM_HI;
25214 case LEU: return ARM_LS;
25215 case LTU: return ARM_CC;
25216 default: return ARM_NV;
25219 default: gcc_unreachable ();
25223 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25224 static enum arm_cond_code
25225 get_arm_condition_code (rtx comparison)
25227 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25228 gcc_assert (code != ARM_NV);
25229 return code;
25232 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25233 code registers when not targetting Thumb1. The VFP condition register
25234 only exists when generating hard-float code. */
25235 static bool
25236 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25238 if (!TARGET_32BIT)
25239 return false;
25241 *p1 = CC_REGNUM;
25242 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25243 return true;
25246 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25247 instructions. */
25248 void
25249 thumb2_final_prescan_insn (rtx_insn *insn)
25251 rtx_insn *first_insn = insn;
25252 rtx body = PATTERN (insn);
25253 rtx predicate;
25254 enum arm_cond_code code;
25255 int n;
25256 int mask;
25257 int max;
25259 /* max_insns_skipped in the tune was already taken into account in the
25260 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25261 just emit the IT blocks as we can. It does not make sense to split
25262 the IT blocks. */
25263 max = MAX_INSN_PER_IT_BLOCK;
25265 /* Remove the previous insn from the count of insns to be output. */
25266 if (arm_condexec_count)
25267 arm_condexec_count--;
25269 /* Nothing to do if we are already inside a conditional block. */
25270 if (arm_condexec_count)
25271 return;
25273 if (GET_CODE (body) != COND_EXEC)
25274 return;
25276 /* Conditional jumps are implemented directly. */
25277 if (JUMP_P (insn))
25278 return;
25280 predicate = COND_EXEC_TEST (body);
25281 arm_current_cc = get_arm_condition_code (predicate);
25283 n = get_attr_ce_count (insn);
25284 arm_condexec_count = 1;
25285 arm_condexec_mask = (1 << n) - 1;
25286 arm_condexec_masklen = n;
25287 /* See if subsequent instructions can be combined into the same block. */
25288 for (;;)
25290 insn = next_nonnote_insn (insn);
25292 /* Jumping into the middle of an IT block is illegal, so a label or
25293 barrier terminates the block. */
25294 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25295 break;
25297 body = PATTERN (insn);
25298 /* USE and CLOBBER aren't really insns, so just skip them. */
25299 if (GET_CODE (body) == USE
25300 || GET_CODE (body) == CLOBBER)
25301 continue;
25303 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25304 if (GET_CODE (body) != COND_EXEC)
25305 break;
25306 /* Maximum number of conditionally executed instructions in a block. */
25307 n = get_attr_ce_count (insn);
25308 if (arm_condexec_masklen + n > max)
25309 break;
25311 predicate = COND_EXEC_TEST (body);
25312 code = get_arm_condition_code (predicate);
25313 mask = (1 << n) - 1;
25314 if (arm_current_cc == code)
25315 arm_condexec_mask |= (mask << arm_condexec_masklen);
25316 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25317 break;
25319 arm_condexec_count++;
25320 arm_condexec_masklen += n;
25322 /* A jump must be the last instruction in a conditional block. */
25323 if (JUMP_P (insn))
25324 break;
25326 /* Restore recog_data (getting the attributes of other insns can
25327 destroy this array, but final.cc assumes that it remains intact
25328 across this call). */
25329 extract_constrain_insn_cached (first_insn);
25332 void
25333 arm_final_prescan_insn (rtx_insn *insn)
25335 /* BODY will hold the body of INSN. */
25336 rtx body = PATTERN (insn);
25338 /* This will be 1 if trying to repeat the trick, and things need to be
25339 reversed if it appears to fail. */
25340 int reverse = 0;
25342 /* If we start with a return insn, we only succeed if we find another one. */
25343 int seeking_return = 0;
25344 enum rtx_code return_code = UNKNOWN;
25346 /* START_INSN will hold the insn from where we start looking. This is the
25347 first insn after the following code_label if REVERSE is true. */
25348 rtx_insn *start_insn = insn;
25350 /* If in state 4, check if the target branch is reached, in order to
25351 change back to state 0. */
25352 if (arm_ccfsm_state == 4)
25354 if (insn == arm_target_insn)
25356 arm_target_insn = NULL;
25357 arm_ccfsm_state = 0;
25359 return;
25362 /* If in state 3, it is possible to repeat the trick, if this insn is an
25363 unconditional branch to a label, and immediately following this branch
25364 is the previous target label which is only used once, and the label this
25365 branch jumps to is not too far off. */
25366 if (arm_ccfsm_state == 3)
25368 if (simplejump_p (insn))
25370 start_insn = next_nonnote_insn (start_insn);
25371 if (BARRIER_P (start_insn))
25373 /* XXX Isn't this always a barrier? */
25374 start_insn = next_nonnote_insn (start_insn);
25376 if (LABEL_P (start_insn)
25377 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25378 && LABEL_NUSES (start_insn) == 1)
25379 reverse = TRUE;
25380 else
25381 return;
25383 else if (ANY_RETURN_P (body))
25385 start_insn = next_nonnote_insn (start_insn);
25386 if (BARRIER_P (start_insn))
25387 start_insn = next_nonnote_insn (start_insn);
25388 if (LABEL_P (start_insn)
25389 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25390 && LABEL_NUSES (start_insn) == 1)
25392 reverse = TRUE;
25393 seeking_return = 1;
25394 return_code = GET_CODE (body);
25396 else
25397 return;
25399 else
25400 return;
25403 gcc_assert (!arm_ccfsm_state || reverse);
25404 if (!JUMP_P (insn))
25405 return;
25407 /* This jump might be paralleled with a clobber of the condition codes
25408 the jump should always come first */
25409 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25410 body = XVECEXP (body, 0, 0);
25412 if (reverse
25413 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25414 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25416 int insns_skipped;
25417 int fail = FALSE, succeed = FALSE;
25418 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25419 int then_not_else = TRUE;
25420 rtx_insn *this_insn = start_insn;
25421 rtx label = 0;
25423 /* Register the insn jumped to. */
25424 if (reverse)
25426 if (!seeking_return)
25427 label = XEXP (SET_SRC (body), 0);
25429 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25430 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25431 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25433 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25434 then_not_else = FALSE;
25436 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25438 seeking_return = 1;
25439 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25441 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25443 seeking_return = 1;
25444 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25445 then_not_else = FALSE;
25447 else
25448 gcc_unreachable ();
25450 /* See how many insns this branch skips, and what kind of insns. If all
25451 insns are okay, and the label or unconditional branch to the same
25452 label is not too far away, succeed. */
25453 for (insns_skipped = 0;
25454 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25456 rtx scanbody;
25458 this_insn = next_nonnote_insn (this_insn);
25459 if (!this_insn)
25460 break;
25462 switch (GET_CODE (this_insn))
25464 case CODE_LABEL:
25465 /* Succeed if it is the target label, otherwise fail since
25466 control falls in from somewhere else. */
25467 if (this_insn == label)
25469 arm_ccfsm_state = 1;
25470 succeed = TRUE;
25472 else
25473 fail = TRUE;
25474 break;
25476 case BARRIER:
25477 /* Succeed if the following insn is the target label.
25478 Otherwise fail.
25479 If return insns are used then the last insn in a function
25480 will be a barrier. */
25481 this_insn = next_nonnote_insn (this_insn);
25482 if (this_insn && this_insn == label)
25484 arm_ccfsm_state = 1;
25485 succeed = TRUE;
25487 else
25488 fail = TRUE;
25489 break;
25491 case CALL_INSN:
25492 /* The AAPCS says that conditional calls should not be
25493 used since they make interworking inefficient (the
25494 linker can't transform BL<cond> into BLX). That's
25495 only a problem if the machine has BLX. */
25496 if (arm_arch5t)
25498 fail = TRUE;
25499 break;
25502 /* Succeed if the following insn is the target label, or
25503 if the following two insns are a barrier and the
25504 target label. */
25505 this_insn = next_nonnote_insn (this_insn);
25506 if (this_insn && BARRIER_P (this_insn))
25507 this_insn = next_nonnote_insn (this_insn);
25509 if (this_insn && this_insn == label
25510 && insns_skipped < max_insns_skipped)
25512 arm_ccfsm_state = 1;
25513 succeed = TRUE;
25515 else
25516 fail = TRUE;
25517 break;
25519 case JUMP_INSN:
25520 /* If this is an unconditional branch to the same label, succeed.
25521 If it is to another label, do nothing. If it is conditional,
25522 fail. */
25523 /* XXX Probably, the tests for SET and the PC are
25524 unnecessary. */
25526 scanbody = PATTERN (this_insn);
25527 if (GET_CODE (scanbody) == SET
25528 && GET_CODE (SET_DEST (scanbody)) == PC)
25530 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25531 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25533 arm_ccfsm_state = 2;
25534 succeed = TRUE;
25536 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25537 fail = TRUE;
25539 /* Fail if a conditional return is undesirable (e.g. on a
25540 StrongARM), but still allow this if optimizing for size. */
25541 else if (GET_CODE (scanbody) == return_code
25542 && !use_return_insn (TRUE, NULL)
25543 && !optimize_size)
25544 fail = TRUE;
25545 else if (GET_CODE (scanbody) == return_code)
25547 arm_ccfsm_state = 2;
25548 succeed = TRUE;
25550 else if (GET_CODE (scanbody) == PARALLEL)
25552 switch (get_attr_conds (this_insn))
25554 case CONDS_NOCOND:
25555 break;
25556 default:
25557 fail = TRUE;
25558 break;
25561 else
25562 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25564 break;
25566 case INSN:
25567 /* Instructions using or affecting the condition codes make it
25568 fail. */
25569 scanbody = PATTERN (this_insn);
25570 if (!(GET_CODE (scanbody) == SET
25571 || GET_CODE (scanbody) == PARALLEL)
25572 || get_attr_conds (this_insn) != CONDS_NOCOND)
25573 fail = TRUE;
25574 break;
25576 default:
25577 break;
25580 if (succeed)
25582 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25583 arm_target_label = CODE_LABEL_NUMBER (label);
25584 else
25586 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25588 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25590 this_insn = next_nonnote_insn (this_insn);
25591 gcc_assert (!this_insn
25592 || (!BARRIER_P (this_insn)
25593 && !LABEL_P (this_insn)));
25595 if (!this_insn)
25597 /* Oh, dear! we ran off the end.. give up. */
25598 extract_constrain_insn_cached (insn);
25599 arm_ccfsm_state = 0;
25600 arm_target_insn = NULL;
25601 return;
25603 arm_target_insn = this_insn;
25606 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25607 what it was. */
25608 if (!reverse)
25609 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25611 if (reverse || then_not_else)
25612 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25615 /* Restore recog_data (getting the attributes of other insns can
25616 destroy this array, but final.cc assumes that it remains intact
25617 across this call. */
25618 extract_constrain_insn_cached (insn);
25622 /* Output IT instructions. */
25623 void
25624 thumb2_asm_output_opcode (FILE * stream)
25626 char buff[5];
25627 int n;
25629 if (arm_condexec_mask)
25631 for (n = 0; n < arm_condexec_masklen; n++)
25632 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25633 buff[n] = 0;
25634 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25635 arm_condition_codes[arm_current_cc]);
25636 arm_condexec_mask = 0;
25640 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25641 UNITS_PER_WORD bytes wide. */
25642 static unsigned int
25643 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25645 if (IS_VPR_REGNUM (regno))
25646 return CEIL (GET_MODE_SIZE (mode), 2);
25648 if (TARGET_32BIT
25649 && regno > PC_REGNUM
25650 && regno != FRAME_POINTER_REGNUM
25651 && regno != ARG_POINTER_REGNUM
25652 && !IS_VFP_REGNUM (regno))
25653 return 1;
25655 return ARM_NUM_REGS (mode);
25658 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25659 static bool
25660 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25662 if (GET_MODE_CLASS (mode) == MODE_CC)
25663 return (regno == CC_REGNUM
25664 || (TARGET_VFP_BASE
25665 && regno == VFPCC_REGNUM));
25667 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25668 return false;
25670 if (IS_VPR_REGNUM (regno))
25671 return VALID_MVE_PRED_MODE (mode);
25673 if (TARGET_THUMB1)
25674 /* For the Thumb we only allow values bigger than SImode in
25675 registers 0 - 6, so that there is always a second low
25676 register available to hold the upper part of the value.
25677 We probably we ought to ensure that the register is the
25678 start of an even numbered register pair. */
25679 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25681 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25683 if (mode == DFmode || mode == DImode)
25684 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25686 if (mode == HFmode || mode == BFmode || mode == HImode
25687 || mode == SFmode || mode == SImode)
25688 return VFP_REGNO_OK_FOR_SINGLE (regno);
25690 if (TARGET_NEON)
25691 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25692 || (VALID_NEON_QREG_MODE (mode)
25693 && NEON_REGNO_OK_FOR_QUAD (regno))
25694 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25695 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25696 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25697 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25698 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25699 if (TARGET_HAVE_MVE)
25700 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25701 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25702 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25704 return false;
25707 if (TARGET_REALLY_IWMMXT)
25709 if (IS_IWMMXT_GR_REGNUM (regno))
25710 return mode == SImode;
25712 if (IS_IWMMXT_REGNUM (regno))
25713 return VALID_IWMMXT_REG_MODE (mode);
25716 /* We allow almost any value to be stored in the general registers.
25717 Restrict doubleword quantities to even register pairs in ARM state
25718 so that we can use ldrd. The same restriction applies for MVE
25719 in order to support Armv8.1-M Mainline instructions.
25720 Do not allow very large Neon structure opaque modes in general
25721 registers; they would use too many. */
25722 if (regno <= LAST_ARM_REGNUM)
25724 if (ARM_NUM_REGS (mode) > 4)
25725 return false;
25727 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25728 return true;
25730 return !((TARGET_LDRD || TARGET_CDE)
25731 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25734 if (regno == FRAME_POINTER_REGNUM
25735 || regno == ARG_POINTER_REGNUM)
25736 /* We only allow integers in the fake hard registers. */
25737 return GET_MODE_CLASS (mode) == MODE_INT;
25739 return false;
25742 /* Implement TARGET_MODES_TIEABLE_P. */
25744 static bool
25745 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25747 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25748 return true;
25750 if (TARGET_HAVE_MVE
25751 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25752 return true;
25754 /* We specifically want to allow elements of "structure" modes to
25755 be tieable to the structure. This more general condition allows
25756 other rarer situations too. */
25757 if ((TARGET_NEON
25758 && (VALID_NEON_DREG_MODE (mode1)
25759 || VALID_NEON_QREG_MODE (mode1)
25760 || VALID_NEON_STRUCT_MODE (mode1))
25761 && (VALID_NEON_DREG_MODE (mode2)
25762 || VALID_NEON_QREG_MODE (mode2)
25763 || VALID_NEON_STRUCT_MODE (mode2)))
25764 || (TARGET_HAVE_MVE
25765 && (VALID_MVE_MODE (mode1)
25766 || VALID_MVE_STRUCT_MODE (mode1))
25767 && (VALID_MVE_MODE (mode2)
25768 || VALID_MVE_STRUCT_MODE (mode2))))
25769 return true;
25771 return false;
25774 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25775 not used in arm mode. */
25777 enum reg_class
25778 arm_regno_class (int regno)
25780 if (regno == PC_REGNUM)
25781 return NO_REGS;
25783 if (IS_VPR_REGNUM (regno))
25784 return VPR_REG;
25786 if (IS_PAC_REGNUM (regno))
25787 return PAC_REG;
25789 if (TARGET_THUMB1)
25791 if (regno == STACK_POINTER_REGNUM)
25792 return STACK_REG;
25793 if (regno == CC_REGNUM)
25794 return CC_REG;
25795 if (regno < 8)
25796 return LO_REGS;
25797 return HI_REGS;
25800 if (TARGET_THUMB2 && regno < 8)
25801 return LO_REGS;
25803 if ( regno <= LAST_ARM_REGNUM
25804 || regno == FRAME_POINTER_REGNUM
25805 || regno == ARG_POINTER_REGNUM)
25806 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25808 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25809 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25811 if (IS_VFP_REGNUM (regno))
25813 if (regno <= D7_VFP_REGNUM)
25814 return VFP_D0_D7_REGS;
25815 else if (regno <= LAST_LO_VFP_REGNUM)
25816 return VFP_LO_REGS;
25817 else
25818 return VFP_HI_REGS;
25821 if (IS_IWMMXT_REGNUM (regno))
25822 return IWMMXT_REGS;
25824 if (IS_IWMMXT_GR_REGNUM (regno))
25825 return IWMMXT_GR_REGS;
25827 return NO_REGS;
25830 /* Handle a special case when computing the offset
25831 of an argument from the frame pointer. */
25833 arm_debugger_arg_offset (int value, rtx addr)
25835 rtx_insn *insn;
25837 /* We are only interested if dbxout_parms() failed to compute the offset. */
25838 if (value != 0)
25839 return 0;
25841 /* We can only cope with the case where the address is held in a register. */
25842 if (!REG_P (addr))
25843 return 0;
25845 /* If we are using the frame pointer to point at the argument, then
25846 an offset of 0 is correct. */
25847 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25848 return 0;
25850 /* If we are using the stack pointer to point at the
25851 argument, then an offset of 0 is correct. */
25852 /* ??? Check this is consistent with thumb2 frame layout. */
25853 if ((TARGET_THUMB || !frame_pointer_needed)
25854 && REGNO (addr) == SP_REGNUM)
25855 return 0;
25857 /* Oh dear. The argument is pointed to by a register rather
25858 than being held in a register, or being stored at a known
25859 offset from the frame pointer. Since GDB only understands
25860 those two kinds of argument we must translate the address
25861 held in the register into an offset from the frame pointer.
25862 We do this by searching through the insns for the function
25863 looking to see where this register gets its value. If the
25864 register is initialized from the frame pointer plus an offset
25865 then we are in luck and we can continue, otherwise we give up.
25867 This code is exercised by producing debugging information
25868 for a function with arguments like this:
25870 double func (double a, double b, int c, double d) {return d;}
25872 Without this code the stab for parameter 'd' will be set to
25873 an offset of 0 from the frame pointer, rather than 8. */
25875 /* The if() statement says:
25877 If the insn is a normal instruction
25878 and if the insn is setting the value in a register
25879 and if the register being set is the register holding the address of the argument
25880 and if the address is computing by an addition
25881 that involves adding to a register
25882 which is the frame pointer
25883 a constant integer
25885 then... */
25887 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25889 if ( NONJUMP_INSN_P (insn)
25890 && GET_CODE (PATTERN (insn)) == SET
25891 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25892 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25893 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25894 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25895 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25898 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25900 break;
25904 if (value == 0)
25906 debug_rtx (addr);
25907 warning (0, "unable to compute real location of stacked parameter");
25908 value = 8; /* XXX magic hack */
25911 return value;
25914 /* Implement TARGET_PROMOTED_TYPE. */
25916 static tree
25917 arm_promoted_type (const_tree t)
25919 if (SCALAR_FLOAT_TYPE_P (t)
25920 && TYPE_PRECISION (t) == 16
25921 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25922 return float_type_node;
25923 return NULL_TREE;
25926 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25927 This simply adds HFmode as a supported mode; even though we don't
25928 implement arithmetic on this type directly, it's supported by
25929 optabs conversions, much the way the double-word arithmetic is
25930 special-cased in the default hook. */
25932 static bool
25933 arm_scalar_mode_supported_p (scalar_mode mode)
25935 if (mode == HFmode)
25936 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25937 else if (ALL_FIXED_POINT_MODE_P (mode))
25938 return true;
25939 else
25940 return default_scalar_mode_supported_p (mode);
25943 /* Set the value of FLT_EVAL_METHOD.
25944 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25946 0: evaluate all operations and constants, whose semantic type has at
25947 most the range and precision of type float, to the range and
25948 precision of float; evaluate all other operations and constants to
25949 the range and precision of the semantic type;
25951 N, where _FloatN is a supported interchange floating type
25952 evaluate all operations and constants, whose semantic type has at
25953 most the range and precision of _FloatN type, to the range and
25954 precision of the _FloatN type; evaluate all other operations and
25955 constants to the range and precision of the semantic type;
25957 If we have the ARMv8.2-A extensions then we support _Float16 in native
25958 precision, so we should set this to 16. Otherwise, we support the type,
25959 but want to evaluate expressions in float precision, so set this to
25960 0. */
25962 static enum flt_eval_method
25963 arm_excess_precision (enum excess_precision_type type)
25965 switch (type)
25967 case EXCESS_PRECISION_TYPE_FAST:
25968 case EXCESS_PRECISION_TYPE_STANDARD:
25969 /* We can calculate either in 16-bit range and precision or
25970 32-bit range and precision. Make that decision based on whether
25971 we have native support for the ARMv8.2-A 16-bit floating-point
25972 instructions or not. */
25973 return (TARGET_VFP_FP16INST
25974 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25975 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25976 case EXCESS_PRECISION_TYPE_IMPLICIT:
25977 case EXCESS_PRECISION_TYPE_FLOAT16:
25978 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25979 default:
25980 gcc_unreachable ();
25982 return FLT_EVAL_METHOD_UNPREDICTABLE;
25986 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25987 _Float16 if we are using anything other than ieee format for 16-bit
25988 floating point. Otherwise, punt to the default implementation. */
25989 static opt_scalar_float_mode
25990 arm_floatn_mode (int n, bool extended)
25992 if (!extended && n == 16)
25994 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25995 return HFmode;
25996 return opt_scalar_float_mode ();
25999 return default_floatn_mode (n, extended);
26003 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26004 not to early-clobber SRC registers in the process.
26006 We assume that the operands described by SRC and DEST represent a
26007 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26008 number of components into which the copy has been decomposed. */
26009 void
26010 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26012 unsigned int i;
26014 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26015 || REGNO (operands[0]) < REGNO (operands[1]))
26017 for (i = 0; i < count; i++)
26019 operands[2 * i] = dest[i];
26020 operands[2 * i + 1] = src[i];
26023 else
26025 for (i = 0; i < count; i++)
26027 operands[2 * i] = dest[count - i - 1];
26028 operands[2 * i + 1] = src[count - i - 1];
26033 /* Split operands into moves from op[1] + op[2] into op[0]. */
26035 void
26036 neon_split_vcombine (rtx operands[3])
26038 unsigned int dest = REGNO (operands[0]);
26039 unsigned int src1 = REGNO (operands[1]);
26040 unsigned int src2 = REGNO (operands[2]);
26041 machine_mode halfmode = GET_MODE (operands[1]);
26042 unsigned int halfregs = REG_NREGS (operands[1]);
26043 rtx destlo, desthi;
26045 if (src1 == dest && src2 == dest + halfregs)
26047 /* No-op move. Can't split to nothing; emit something. */
26048 emit_note (NOTE_INSN_DELETED);
26049 return;
26052 /* Preserve register attributes for variable tracking. */
26053 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26054 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26055 GET_MODE_SIZE (halfmode));
26057 /* Special case of reversed high/low parts. Use VSWP. */
26058 if (src2 == dest && src1 == dest + halfregs)
26060 rtx x = gen_rtx_SET (destlo, operands[1]);
26061 rtx y = gen_rtx_SET (desthi, operands[2]);
26062 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26063 return;
26066 if (!reg_overlap_mentioned_p (operands[2], destlo))
26068 /* Try to avoid unnecessary moves if part of the result
26069 is in the right place already. */
26070 if (src1 != dest)
26071 emit_move_insn (destlo, operands[1]);
26072 if (src2 != dest + halfregs)
26073 emit_move_insn (desthi, operands[2]);
26075 else
26077 if (src2 != dest + halfregs)
26078 emit_move_insn (desthi, operands[2]);
26079 if (src1 != dest)
26080 emit_move_insn (destlo, operands[1]);
26084 /* Return the number (counting from 0) of
26085 the least significant set bit in MASK. */
26087 inline static int
26088 number_of_first_bit_set (unsigned mask)
26090 return ctz_hwi (mask);
26093 /* Like emit_multi_reg_push, but allowing for a different set of
26094 registers to be described as saved. MASK is the set of registers
26095 to be saved; REAL_REGS is the set of registers to be described as
26096 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26098 static rtx_insn *
26099 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26101 unsigned long regno;
26102 rtx par[10], tmp, reg;
26103 rtx_insn *insn;
26104 int i, j;
26106 /* Build the parallel of the registers actually being stored. */
26107 for (i = 0; mask; ++i, mask &= mask - 1)
26109 regno = ctz_hwi (mask);
26110 reg = gen_rtx_REG (SImode, regno);
26112 if (i == 0)
26113 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26114 else
26115 tmp = gen_rtx_USE (VOIDmode, reg);
26117 par[i] = tmp;
26120 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26121 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26122 tmp = gen_frame_mem (BLKmode, tmp);
26123 tmp = gen_rtx_SET (tmp, par[0]);
26124 par[0] = tmp;
26126 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26127 insn = emit_insn (tmp);
26129 /* Always build the stack adjustment note for unwind info. */
26130 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26131 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26132 par[0] = tmp;
26134 /* Build the parallel of the registers recorded as saved for unwind. */
26135 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26137 regno = ctz_hwi (real_regs);
26138 reg = gen_rtx_REG (SImode, regno);
26140 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26141 tmp = gen_frame_mem (SImode, tmp);
26142 tmp = gen_rtx_SET (tmp, reg);
26143 RTX_FRAME_RELATED_P (tmp) = 1;
26144 par[j + 1] = tmp;
26147 if (j == 0)
26148 tmp = par[0];
26149 else
26151 RTX_FRAME_RELATED_P (par[0]) = 1;
26152 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26155 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26157 return insn;
26160 /* Emit code to push or pop registers to or from the stack. F is the
26161 assembly file. MASK is the registers to pop. */
26162 static void
26163 thumb_pop (FILE *f, unsigned long mask)
26165 int regno;
26166 int lo_mask = mask & 0xFF;
26168 gcc_assert (mask);
26170 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26172 /* Special case. Do not generate a POP PC statement here, do it in
26173 thumb_exit() */
26174 thumb_exit (f, -1);
26175 return;
26178 fprintf (f, "\tpop\t{");
26180 /* Look at the low registers first. */
26181 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26183 if (lo_mask & 1)
26185 asm_fprintf (f, "%r", regno);
26187 if ((lo_mask & ~1) != 0)
26188 fprintf (f, ", ");
26192 if (mask & (1 << PC_REGNUM))
26194 /* Catch popping the PC. */
26195 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26196 || IS_CMSE_ENTRY (arm_current_func_type ()))
26198 /* The PC is never poped directly, instead
26199 it is popped into r3 and then BX is used. */
26200 fprintf (f, "}\n");
26202 thumb_exit (f, -1);
26204 return;
26206 else
26208 if (mask & 0xFF)
26209 fprintf (f, ", ");
26211 asm_fprintf (f, "%r", PC_REGNUM);
26215 fprintf (f, "}\n");
26218 /* Generate code to return from a thumb function.
26219 If 'reg_containing_return_addr' is -1, then the return address is
26220 actually on the stack, at the stack pointer.
26222 Note: do not forget to update length attribute of corresponding insn pattern
26223 when changing assembly output (eg. length attribute of epilogue_insns when
26224 updating Armv8-M Baseline Security Extensions register clearing
26225 sequences). */
26226 static void
26227 thumb_exit (FILE *f, int reg_containing_return_addr)
26229 unsigned regs_available_for_popping;
26230 unsigned regs_to_pop;
26231 int pops_needed;
26232 unsigned available;
26233 unsigned required;
26234 machine_mode mode;
26235 int size;
26236 int restore_a4 = FALSE;
26238 /* Compute the registers we need to pop. */
26239 regs_to_pop = 0;
26240 pops_needed = 0;
26242 if (reg_containing_return_addr == -1)
26244 regs_to_pop |= 1 << LR_REGNUM;
26245 ++pops_needed;
26248 if (TARGET_BACKTRACE)
26250 /* Restore the (ARM) frame pointer and stack pointer. */
26251 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26252 pops_needed += 2;
26255 /* If there is nothing to pop then just emit the BX instruction and
26256 return. */
26257 if (pops_needed == 0)
26259 if (crtl->calls_eh_return)
26260 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26262 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26264 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26265 emitted by cmse_nonsecure_entry_clear_before_return (). */
26266 if (!TARGET_HAVE_FPCXT_CMSE)
26267 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26268 reg_containing_return_addr);
26269 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26271 else
26272 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26273 return;
26275 /* Otherwise if we are not supporting interworking and we have not created
26276 a backtrace structure and the function was not entered in ARM mode then
26277 just pop the return address straight into the PC. */
26278 else if (!TARGET_INTERWORK
26279 && !TARGET_BACKTRACE
26280 && !is_called_in_ARM_mode (current_function_decl)
26281 && !crtl->calls_eh_return
26282 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26284 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26285 return;
26288 /* Find out how many of the (return) argument registers we can corrupt. */
26289 regs_available_for_popping = 0;
26291 /* If returning via __builtin_eh_return, the bottom three registers
26292 all contain information needed for the return. */
26293 if (crtl->calls_eh_return)
26294 size = 12;
26295 else
26297 /* If we can deduce the registers used from the function's
26298 return value. This is more reliable that examining
26299 df_regs_ever_live_p () because that will be set if the register is
26300 ever used in the function, not just if the register is used
26301 to hold a return value. */
26303 if (crtl->return_rtx != 0)
26304 mode = GET_MODE (crtl->return_rtx);
26305 else
26306 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26308 size = GET_MODE_SIZE (mode);
26310 if (size == 0)
26312 /* In a void function we can use any argument register.
26313 In a function that returns a structure on the stack
26314 we can use the second and third argument registers. */
26315 if (mode == VOIDmode)
26316 regs_available_for_popping =
26317 (1 << ARG_REGISTER (1))
26318 | (1 << ARG_REGISTER (2))
26319 | (1 << ARG_REGISTER (3));
26320 else
26321 regs_available_for_popping =
26322 (1 << ARG_REGISTER (2))
26323 | (1 << ARG_REGISTER (3));
26325 else if (size <= 4)
26326 regs_available_for_popping =
26327 (1 << ARG_REGISTER (2))
26328 | (1 << ARG_REGISTER (3));
26329 else if (size <= 8)
26330 regs_available_for_popping =
26331 (1 << ARG_REGISTER (3));
26334 /* Match registers to be popped with registers into which we pop them. */
26335 for (available = regs_available_for_popping,
26336 required = regs_to_pop;
26337 required != 0 && available != 0;
26338 available &= ~(available & - available),
26339 required &= ~(required & - required))
26340 -- pops_needed;
26342 /* If we have any popping registers left over, remove them. */
26343 if (available > 0)
26344 regs_available_for_popping &= ~available;
26346 /* Otherwise if we need another popping register we can use
26347 the fourth argument register. */
26348 else if (pops_needed)
26350 /* If we have not found any free argument registers and
26351 reg a4 contains the return address, we must move it. */
26352 if (regs_available_for_popping == 0
26353 && reg_containing_return_addr == LAST_ARG_REGNUM)
26355 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26356 reg_containing_return_addr = LR_REGNUM;
26358 else if (size > 12)
26360 /* Register a4 is being used to hold part of the return value,
26361 but we have dire need of a free, low register. */
26362 restore_a4 = TRUE;
26364 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26367 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26369 /* The fourth argument register is available. */
26370 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26372 --pops_needed;
26376 /* Pop as many registers as we can. */
26377 thumb_pop (f, regs_available_for_popping);
26379 /* Process the registers we popped. */
26380 if (reg_containing_return_addr == -1)
26382 /* The return address was popped into the lowest numbered register. */
26383 regs_to_pop &= ~(1 << LR_REGNUM);
26385 reg_containing_return_addr =
26386 number_of_first_bit_set (regs_available_for_popping);
26388 /* Remove this register for the mask of available registers, so that
26389 the return address will not be corrupted by further pops. */
26390 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26393 /* If we popped other registers then handle them here. */
26394 if (regs_available_for_popping)
26396 int frame_pointer;
26398 /* Work out which register currently contains the frame pointer. */
26399 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26401 /* Move it into the correct place. */
26402 asm_fprintf (f, "\tmov\t%r, %r\n",
26403 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26405 /* (Temporarily) remove it from the mask of popped registers. */
26406 regs_available_for_popping &= ~(1 << frame_pointer);
26407 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26409 if (regs_available_for_popping)
26411 int stack_pointer;
26413 /* We popped the stack pointer as well,
26414 find the register that contains it. */
26415 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26417 /* Move it into the stack register. */
26418 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26420 /* At this point we have popped all necessary registers, so
26421 do not worry about restoring regs_available_for_popping
26422 to its correct value:
26424 assert (pops_needed == 0)
26425 assert (regs_available_for_popping == (1 << frame_pointer))
26426 assert (regs_to_pop == (1 << STACK_POINTER)) */
26428 else
26430 /* Since we have just move the popped value into the frame
26431 pointer, the popping register is available for reuse, and
26432 we know that we still have the stack pointer left to pop. */
26433 regs_available_for_popping |= (1 << frame_pointer);
26437 /* If we still have registers left on the stack, but we no longer have
26438 any registers into which we can pop them, then we must move the return
26439 address into the link register and make available the register that
26440 contained it. */
26441 if (regs_available_for_popping == 0 && pops_needed > 0)
26443 regs_available_for_popping |= 1 << reg_containing_return_addr;
26445 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26446 reg_containing_return_addr);
26448 reg_containing_return_addr = LR_REGNUM;
26451 /* If we have registers left on the stack then pop some more.
26452 We know that at most we will want to pop FP and SP. */
26453 if (pops_needed > 0)
26455 int popped_into;
26456 int move_to;
26458 thumb_pop (f, regs_available_for_popping);
26460 /* We have popped either FP or SP.
26461 Move whichever one it is into the correct register. */
26462 popped_into = number_of_first_bit_set (regs_available_for_popping);
26463 move_to = number_of_first_bit_set (regs_to_pop);
26465 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26466 --pops_needed;
26469 /* If we still have not popped everything then we must have only
26470 had one register available to us and we are now popping the SP. */
26471 if (pops_needed > 0)
26473 int popped_into;
26475 thumb_pop (f, regs_available_for_popping);
26477 popped_into = number_of_first_bit_set (regs_available_for_popping);
26479 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26481 assert (regs_to_pop == (1 << STACK_POINTER))
26482 assert (pops_needed == 1)
26486 /* If necessary restore the a4 register. */
26487 if (restore_a4)
26489 if (reg_containing_return_addr != LR_REGNUM)
26491 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26492 reg_containing_return_addr = LR_REGNUM;
26495 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26498 if (crtl->calls_eh_return)
26499 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26501 /* Return to caller. */
26502 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26504 /* This is for the cases where LR is not being used to contain the return
26505 address. It may therefore contain information that we might not want
26506 to leak, hence it must be cleared. The value in R0 will never be a
26507 secret at this point, so it is safe to use it, see the clearing code
26508 in cmse_nonsecure_entry_clear_before_return (). */
26509 if (reg_containing_return_addr != LR_REGNUM)
26510 asm_fprintf (f, "\tmov\tlr, r0\n");
26512 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26513 by cmse_nonsecure_entry_clear_before_return (). */
26514 if (!TARGET_HAVE_FPCXT_CMSE)
26515 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26516 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26518 else
26519 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26522 /* Scan INSN just before assembler is output for it.
26523 For Thumb-1, we track the status of the condition codes; this
26524 information is used in the cbranchsi4_insn pattern. */
26525 void
26526 thumb1_final_prescan_insn (rtx_insn *insn)
26528 if (flag_print_asm_name)
26529 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26530 INSN_ADDRESSES (INSN_UID (insn)));
26531 /* Don't overwrite the previous setter when we get to a cbranch. */
26532 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26534 enum attr_conds conds;
26536 if (cfun->machine->thumb1_cc_insn)
26538 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26539 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26540 CC_STATUS_INIT;
26542 conds = get_attr_conds (insn);
26543 if (conds == CONDS_SET)
26545 rtx set = single_set (insn);
26546 cfun->machine->thumb1_cc_insn = insn;
26547 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26548 cfun->machine->thumb1_cc_op1 = const0_rtx;
26549 cfun->machine->thumb1_cc_mode = CC_NZmode;
26550 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26552 rtx src1 = XEXP (SET_SRC (set), 1);
26553 if (src1 == const0_rtx)
26554 cfun->machine->thumb1_cc_mode = CCmode;
26556 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26558 /* Record the src register operand instead of dest because
26559 cprop_hardreg pass propagates src. */
26560 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26563 else if (conds != CONDS_NOCOND)
26564 cfun->machine->thumb1_cc_insn = NULL_RTX;
26567 /* Check if unexpected far jump is used. */
26568 if (cfun->machine->lr_save_eliminated
26569 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26570 internal_error("Unexpected thumb1 far jump");
26574 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26576 unsigned HOST_WIDE_INT mask = 0xff;
26577 int i;
26579 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26580 if (val == 0) /* XXX */
26581 return 0;
26583 for (i = 0; i < 25; i++)
26584 if ((val & (mask << i)) == val)
26585 return 1;
26587 return 0;
26590 /* Returns nonzero if the current function contains,
26591 or might contain a far jump. */
26592 static int
26593 thumb_far_jump_used_p (void)
26595 rtx_insn *insn;
26596 bool far_jump = false;
26597 unsigned int func_size = 0;
26599 /* If we have already decided that far jumps may be used,
26600 do not bother checking again, and always return true even if
26601 it turns out that they are not being used. Once we have made
26602 the decision that far jumps are present (and that hence the link
26603 register will be pushed onto the stack) we cannot go back on it. */
26604 if (cfun->machine->far_jump_used)
26605 return 1;
26607 /* If this function is not being called from the prologue/epilogue
26608 generation code then it must be being called from the
26609 INITIAL_ELIMINATION_OFFSET macro. */
26610 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26612 /* In this case we know that we are being asked about the elimination
26613 of the arg pointer register. If that register is not being used,
26614 then there are no arguments on the stack, and we do not have to
26615 worry that a far jump might force the prologue to push the link
26616 register, changing the stack offsets. In this case we can just
26617 return false, since the presence of far jumps in the function will
26618 not affect stack offsets.
26620 If the arg pointer is live (or if it was live, but has now been
26621 eliminated and so set to dead) then we do have to test to see if
26622 the function might contain a far jump. This test can lead to some
26623 false negatives, since before reload is completed, then length of
26624 branch instructions is not known, so gcc defaults to returning their
26625 longest length, which in turn sets the far jump attribute to true.
26627 A false negative will not result in bad code being generated, but it
26628 will result in a needless push and pop of the link register. We
26629 hope that this does not occur too often.
26631 If we need doubleword stack alignment this could affect the other
26632 elimination offsets so we can't risk getting it wrong. */
26633 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26634 cfun->machine->arg_pointer_live = 1;
26635 else if (!cfun->machine->arg_pointer_live)
26636 return 0;
26639 /* We should not change far_jump_used during or after reload, as there is
26640 no chance to change stack frame layout. */
26641 if (reload_in_progress || reload_completed)
26642 return 0;
26644 /* Check to see if the function contains a branch
26645 insn with the far jump attribute set. */
26646 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26648 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26650 far_jump = true;
26652 func_size += get_attr_length (insn);
26655 /* Attribute far_jump will always be true for thumb1 before
26656 shorten_branch pass. So checking far_jump attribute before
26657 shorten_branch isn't much useful.
26659 Following heuristic tries to estimate more accurately if a far jump
26660 may finally be used. The heuristic is very conservative as there is
26661 no chance to roll-back the decision of not to use far jump.
26663 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26664 2-byte insn is associated with a 4 byte constant pool. Using
26665 function size 2048/3 as the threshold is conservative enough. */
26666 if (far_jump)
26668 if ((func_size * 3) >= 2048)
26670 /* Record the fact that we have decided that
26671 the function does use far jumps. */
26672 cfun->machine->far_jump_used = 1;
26673 return 1;
26677 return 0;
26680 /* Return nonzero if FUNC must be entered in ARM mode. */
26681 static bool
26682 is_called_in_ARM_mode (tree func)
26684 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26686 /* Ignore the problem about functions whose address is taken. */
26687 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26688 return true;
26690 #ifdef ARM_PE
26691 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26692 #else
26693 return false;
26694 #endif
26697 /* Given the stack offsets and register mask in OFFSETS, decide how
26698 many additional registers to push instead of subtracting a constant
26699 from SP. For epilogues the principle is the same except we use pop.
26700 FOR_PROLOGUE indicates which we're generating. */
26701 static int
26702 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26704 HOST_WIDE_INT amount;
26705 unsigned long live_regs_mask = offsets->saved_regs_mask;
26706 /* Extract a mask of the ones we can give to the Thumb's push/pop
26707 instruction. */
26708 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26709 /* Then count how many other high registers will need to be pushed. */
26710 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26711 int n_free, reg_base, size;
26713 if (!for_prologue && frame_pointer_needed)
26714 amount = offsets->locals_base - offsets->saved_regs;
26715 else
26716 amount = offsets->outgoing_args - offsets->saved_regs;
26718 /* If the stack frame size is 512 exactly, we can save one load
26719 instruction, which should make this a win even when optimizing
26720 for speed. */
26721 if (!optimize_size && amount != 512)
26722 return 0;
26724 /* Can't do this if there are high registers to push. */
26725 if (high_regs_pushed != 0)
26726 return 0;
26728 /* Shouldn't do it in the prologue if no registers would normally
26729 be pushed at all. In the epilogue, also allow it if we'll have
26730 a pop insn for the PC. */
26731 if (l_mask == 0
26732 && (for_prologue
26733 || TARGET_BACKTRACE
26734 || (live_regs_mask & 1 << LR_REGNUM) == 0
26735 || TARGET_INTERWORK
26736 || crtl->args.pretend_args_size != 0))
26737 return 0;
26739 /* Don't do this if thumb_expand_prologue wants to emit instructions
26740 between the push and the stack frame allocation. */
26741 if (for_prologue
26742 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26743 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26744 return 0;
26746 reg_base = 0;
26747 n_free = 0;
26748 if (!for_prologue)
26750 size = arm_size_return_regs ();
26751 reg_base = ARM_NUM_INTS (size);
26752 live_regs_mask >>= reg_base;
26755 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26756 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26758 live_regs_mask >>= 1;
26759 n_free++;
26762 if (n_free == 0)
26763 return 0;
26764 gcc_assert (amount / 4 * 4 == amount);
26766 if (amount >= 512 && (amount - n_free * 4) < 512)
26767 return (amount - 508) / 4;
26768 if (amount <= n_free * 4)
26769 return amount / 4;
26770 return 0;
26773 /* The bits which aren't usefully expanded as rtl. */
26774 const char *
26775 thumb1_unexpanded_epilogue (void)
26777 arm_stack_offsets *offsets;
26778 int regno;
26779 unsigned long live_regs_mask = 0;
26780 int high_regs_pushed = 0;
26781 int extra_pop;
26782 int had_to_push_lr;
26783 int size;
26785 if (cfun->machine->return_used_this_function != 0)
26786 return "";
26788 if (IS_NAKED (arm_current_func_type ()))
26789 return "";
26791 offsets = arm_get_frame_offsets ();
26792 live_regs_mask = offsets->saved_regs_mask;
26793 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26795 /* If we can deduce the registers used from the function's return value.
26796 This is more reliable that examining df_regs_ever_live_p () because that
26797 will be set if the register is ever used in the function, not just if
26798 the register is used to hold a return value. */
26799 size = arm_size_return_regs ();
26801 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26802 if (extra_pop > 0)
26804 unsigned long extra_mask = (1 << extra_pop) - 1;
26805 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26808 /* The prolog may have pushed some high registers to use as
26809 work registers. e.g. the testsuite file:
26810 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26811 compiles to produce:
26812 push {r4, r5, r6, r7, lr}
26813 mov r7, r9
26814 mov r6, r8
26815 push {r6, r7}
26816 as part of the prolog. We have to undo that pushing here. */
26818 if (high_regs_pushed)
26820 unsigned long mask = live_regs_mask & 0xff;
26821 int next_hi_reg;
26823 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26825 if (mask == 0)
26826 /* Oh dear! We have no low registers into which we can pop
26827 high registers! */
26828 internal_error
26829 ("no low registers available for popping high registers");
26831 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26832 if (live_regs_mask & (1 << next_hi_reg))
26833 break;
26835 while (high_regs_pushed)
26837 /* Find lo register(s) into which the high register(s) can
26838 be popped. */
26839 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26841 if (mask & (1 << regno))
26842 high_regs_pushed--;
26843 if (high_regs_pushed == 0)
26844 break;
26847 if (high_regs_pushed == 0 && regno >= 0)
26848 mask &= ~((1 << regno) - 1);
26850 /* Pop the values into the low register(s). */
26851 thumb_pop (asm_out_file, mask);
26853 /* Move the value(s) into the high registers. */
26854 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26856 if (mask & (1 << regno))
26858 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26859 regno);
26861 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26862 next_hi_reg--)
26863 if (live_regs_mask & (1 << next_hi_reg))
26864 break;
26868 live_regs_mask &= ~0x0f00;
26871 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26872 live_regs_mask &= 0xff;
26874 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26876 /* Pop the return address into the PC. */
26877 if (had_to_push_lr)
26878 live_regs_mask |= 1 << PC_REGNUM;
26880 /* Either no argument registers were pushed or a backtrace
26881 structure was created which includes an adjusted stack
26882 pointer, so just pop everything. */
26883 if (live_regs_mask)
26884 thumb_pop (asm_out_file, live_regs_mask);
26886 /* We have either just popped the return address into the
26887 PC or it is was kept in LR for the entire function.
26888 Note that thumb_pop has already called thumb_exit if the
26889 PC was in the list. */
26890 if (!had_to_push_lr)
26891 thumb_exit (asm_out_file, LR_REGNUM);
26893 else
26895 /* Pop everything but the return address. */
26896 if (live_regs_mask)
26897 thumb_pop (asm_out_file, live_regs_mask);
26899 if (had_to_push_lr)
26901 if (size > 12)
26903 /* We have no free low regs, so save one. */
26904 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26905 LAST_ARG_REGNUM);
26908 /* Get the return address into a temporary register. */
26909 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26911 if (size > 12)
26913 /* Move the return address to lr. */
26914 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26915 LAST_ARG_REGNUM);
26916 /* Restore the low register. */
26917 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26918 IP_REGNUM);
26919 regno = LR_REGNUM;
26921 else
26922 regno = LAST_ARG_REGNUM;
26924 else
26925 regno = LR_REGNUM;
26927 /* Remove the argument registers that were pushed onto the stack. */
26928 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26929 SP_REGNUM, SP_REGNUM,
26930 crtl->args.pretend_args_size);
26932 thumb_exit (asm_out_file, regno);
26935 return "";
26938 /* Functions to save and restore machine-specific function data. */
26939 static struct machine_function *
26940 arm_init_machine_status (void)
26942 struct machine_function *machine;
26943 machine = ggc_cleared_alloc<machine_function> ();
26945 #if ARM_FT_UNKNOWN != 0
26946 machine->func_type = ARM_FT_UNKNOWN;
26947 #endif
26948 machine->static_chain_stack_bytes = -1;
26949 machine->pacspval_needed = 0;
26950 return machine;
26953 /* Return an RTX indicating where the return address to the
26954 calling function can be found. */
26956 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26958 if (count != 0)
26959 return NULL_RTX;
26961 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26964 /* Do anything needed before RTL is emitted for each function. */
26965 void
26966 arm_init_expanders (void)
26968 /* Arrange to initialize and mark the machine per-function status. */
26969 init_machine_status = arm_init_machine_status;
26971 /* This is to stop the combine pass optimizing away the alignment
26972 adjustment of va_arg. */
26973 /* ??? It is claimed that this should not be necessary. */
26974 if (cfun)
26975 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26978 /* Check that FUNC is called with a different mode. */
26980 bool
26981 arm_change_mode_p (tree func)
26983 if (TREE_CODE (func) != FUNCTION_DECL)
26984 return false;
26986 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26988 if (!callee_tree)
26989 callee_tree = target_option_default_node;
26991 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26992 int flags = callee_opts->x_target_flags;
26994 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26997 /* Like arm_compute_initial_elimination offset. Simpler because there
26998 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26999 to point at the base of the local variables after static stack
27000 space for a function has been allocated. */
27002 HOST_WIDE_INT
27003 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27005 arm_stack_offsets *offsets;
27007 offsets = arm_get_frame_offsets ();
27009 switch (from)
27011 case ARG_POINTER_REGNUM:
27012 switch (to)
27014 case STACK_POINTER_REGNUM:
27015 return offsets->outgoing_args - offsets->saved_args;
27017 case FRAME_POINTER_REGNUM:
27018 return offsets->soft_frame - offsets->saved_args;
27020 case ARM_HARD_FRAME_POINTER_REGNUM:
27021 return offsets->saved_regs - offsets->saved_args;
27023 case THUMB_HARD_FRAME_POINTER_REGNUM:
27024 return offsets->locals_base - offsets->saved_args;
27026 default:
27027 gcc_unreachable ();
27029 break;
27031 case FRAME_POINTER_REGNUM:
27032 switch (to)
27034 case STACK_POINTER_REGNUM:
27035 return offsets->outgoing_args - offsets->soft_frame;
27037 case ARM_HARD_FRAME_POINTER_REGNUM:
27038 return offsets->saved_regs - offsets->soft_frame;
27040 case THUMB_HARD_FRAME_POINTER_REGNUM:
27041 return offsets->locals_base - offsets->soft_frame;
27043 default:
27044 gcc_unreachable ();
27046 break;
27048 default:
27049 gcc_unreachable ();
27053 /* Generate the function's prologue. */
27055 void
27056 thumb1_expand_prologue (void)
27058 rtx_insn *insn;
27060 HOST_WIDE_INT amount;
27061 HOST_WIDE_INT size;
27062 arm_stack_offsets *offsets;
27063 unsigned long func_type;
27064 int regno;
27065 unsigned long live_regs_mask;
27066 unsigned long l_mask;
27067 unsigned high_regs_pushed = 0;
27068 bool lr_needs_saving;
27070 func_type = arm_current_func_type ();
27072 /* Naked functions don't have prologues. */
27073 if (IS_NAKED (func_type))
27075 if (flag_stack_usage_info)
27076 current_function_static_stack_size = 0;
27077 return;
27080 if (IS_INTERRUPT (func_type))
27082 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27083 return;
27086 if (is_called_in_ARM_mode (current_function_decl))
27087 emit_insn (gen_prologue_thumb1_interwork ());
27089 offsets = arm_get_frame_offsets ();
27090 live_regs_mask = offsets->saved_regs_mask;
27091 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27093 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27094 l_mask = live_regs_mask & 0x40ff;
27095 /* Then count how many other high registers will need to be pushed. */
27096 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27098 if (crtl->args.pretend_args_size)
27100 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27102 if (cfun->machine->uses_anonymous_args)
27104 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27105 unsigned long mask;
27107 mask = 1ul << (LAST_ARG_REGNUM + 1);
27108 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27110 insn = thumb1_emit_multi_reg_push (mask, 0);
27112 else
27114 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27115 stack_pointer_rtx, x));
27117 RTX_FRAME_RELATED_P (insn) = 1;
27120 if (TARGET_BACKTRACE)
27122 HOST_WIDE_INT offset = 0;
27123 unsigned work_register;
27124 rtx work_reg, x, arm_hfp_rtx;
27126 /* We have been asked to create a stack backtrace structure.
27127 The code looks like this:
27129 0 .align 2
27130 0 func:
27131 0 sub SP, #16 Reserve space for 4 registers.
27132 2 push {R7} Push low registers.
27133 4 add R7, SP, #20 Get the stack pointer before the push.
27134 6 str R7, [SP, #8] Store the stack pointer
27135 (before reserving the space).
27136 8 mov R7, PC Get hold of the start of this code + 12.
27137 10 str R7, [SP, #16] Store it.
27138 12 mov R7, FP Get hold of the current frame pointer.
27139 14 str R7, [SP, #4] Store it.
27140 16 mov R7, LR Get hold of the current return address.
27141 18 str R7, [SP, #12] Store it.
27142 20 add R7, SP, #16 Point at the start of the
27143 backtrace structure.
27144 22 mov FP, R7 Put this value into the frame pointer. */
27146 work_register = thumb_find_work_register (live_regs_mask);
27147 work_reg = gen_rtx_REG (SImode, work_register);
27148 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27150 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27151 stack_pointer_rtx, GEN_INT (-16)));
27152 RTX_FRAME_RELATED_P (insn) = 1;
27154 if (l_mask)
27156 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27157 RTX_FRAME_RELATED_P (insn) = 1;
27158 lr_needs_saving = false;
27160 offset = bit_count (l_mask) * UNITS_PER_WORD;
27163 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27164 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27166 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27167 x = gen_frame_mem (SImode, x);
27168 emit_move_insn (x, work_reg);
27170 /* Make sure that the instruction fetching the PC is in the right place
27171 to calculate "start of backtrace creation code + 12". */
27172 /* ??? The stores using the common WORK_REG ought to be enough to
27173 prevent the scheduler from doing anything weird. Failing that
27174 we could always move all of the following into an UNSPEC_VOLATILE. */
27175 if (l_mask)
27177 x = gen_rtx_REG (SImode, PC_REGNUM);
27178 emit_move_insn (work_reg, x);
27180 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27181 x = gen_frame_mem (SImode, x);
27182 emit_move_insn (x, work_reg);
27184 emit_move_insn (work_reg, arm_hfp_rtx);
27186 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27187 x = gen_frame_mem (SImode, x);
27188 emit_move_insn (x, work_reg);
27190 else
27192 emit_move_insn (work_reg, arm_hfp_rtx);
27194 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27195 x = gen_frame_mem (SImode, x);
27196 emit_move_insn (x, work_reg);
27198 x = gen_rtx_REG (SImode, PC_REGNUM);
27199 emit_move_insn (work_reg, x);
27201 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27202 x = gen_frame_mem (SImode, x);
27203 emit_move_insn (x, work_reg);
27206 x = gen_rtx_REG (SImode, LR_REGNUM);
27207 emit_move_insn (work_reg, x);
27209 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27210 x = gen_frame_mem (SImode, x);
27211 emit_move_insn (x, work_reg);
27213 x = GEN_INT (offset + 12);
27214 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27216 emit_move_insn (arm_hfp_rtx, work_reg);
27218 /* Optimization: If we are not pushing any low registers but we are going
27219 to push some high registers then delay our first push. This will just
27220 be a push of LR and we can combine it with the push of the first high
27221 register. */
27222 else if ((l_mask & 0xff) != 0
27223 || (high_regs_pushed == 0 && lr_needs_saving))
27225 unsigned long mask = l_mask;
27226 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27227 insn = thumb1_emit_multi_reg_push (mask, mask);
27228 RTX_FRAME_RELATED_P (insn) = 1;
27229 lr_needs_saving = false;
27232 if (high_regs_pushed)
27234 unsigned pushable_regs;
27235 unsigned next_hi_reg;
27236 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27237 : crtl->args.info.nregs;
27238 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27240 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27241 if (live_regs_mask & (1 << next_hi_reg))
27242 break;
27244 /* Here we need to mask out registers used for passing arguments
27245 even if they can be pushed. This is to avoid using them to
27246 stash the high registers. Such kind of stash may clobber the
27247 use of arguments. */
27248 pushable_regs = l_mask & (~arg_regs_mask);
27249 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27251 /* Normally, LR can be used as a scratch register once it has been
27252 saved; but if the function examines its own return address then
27253 the value is still live and we need to avoid using it. */
27254 bool return_addr_live
27255 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27256 LR_REGNUM);
27258 if (lr_needs_saving || return_addr_live)
27259 pushable_regs &= ~(1 << LR_REGNUM);
27261 if (pushable_regs == 0)
27262 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27264 while (high_regs_pushed > 0)
27266 unsigned long real_regs_mask = 0;
27267 unsigned long push_mask = 0;
27269 for (regno = LR_REGNUM; regno >= 0; regno --)
27271 if (pushable_regs & (1 << regno))
27273 emit_move_insn (gen_rtx_REG (SImode, regno),
27274 gen_rtx_REG (SImode, next_hi_reg));
27276 high_regs_pushed --;
27277 real_regs_mask |= (1 << next_hi_reg);
27278 push_mask |= (1 << regno);
27280 if (high_regs_pushed)
27282 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27283 next_hi_reg --)
27284 if (live_regs_mask & (1 << next_hi_reg))
27285 break;
27287 else
27288 break;
27292 /* If we had to find a work register and we have not yet
27293 saved the LR then add it to the list of regs to push. */
27294 if (lr_needs_saving)
27296 push_mask |= 1 << LR_REGNUM;
27297 real_regs_mask |= 1 << LR_REGNUM;
27298 lr_needs_saving = false;
27299 /* If the return address is not live at this point, we
27300 can add LR to the list of registers that we can use
27301 for pushes. */
27302 if (!return_addr_live)
27303 pushable_regs |= 1 << LR_REGNUM;
27306 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27307 RTX_FRAME_RELATED_P (insn) = 1;
27311 /* Load the pic register before setting the frame pointer,
27312 so we can use r7 as a temporary work register. */
27313 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27314 arm_load_pic_register (live_regs_mask, NULL_RTX);
27316 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27317 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27318 stack_pointer_rtx);
27320 size = offsets->outgoing_args - offsets->saved_args;
27321 if (flag_stack_usage_info)
27322 current_function_static_stack_size = size;
27324 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27325 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27326 || flag_stack_clash_protection)
27327 && size)
27328 sorry ("%<-fstack-check=specific%> for Thumb-1");
27330 amount = offsets->outgoing_args - offsets->saved_regs;
27331 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27332 if (amount)
27334 if (amount < 512)
27336 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27337 GEN_INT (- amount)));
27338 RTX_FRAME_RELATED_P (insn) = 1;
27340 else
27342 rtx reg, dwarf;
27344 /* The stack decrement is too big for an immediate value in a single
27345 insn. In theory we could issue multiple subtracts, but after
27346 three of them it becomes more space efficient to place the full
27347 value in the constant pool and load into a register. (Also the
27348 ARM debugger really likes to see only one stack decrement per
27349 function). So instead we look for a scratch register into which
27350 we can load the decrement, and then we subtract this from the
27351 stack pointer. Unfortunately on the thumb the only available
27352 scratch registers are the argument registers, and we cannot use
27353 these as they may hold arguments to the function. Instead we
27354 attempt to locate a call preserved register which is used by this
27355 function. If we can find one, then we know that it will have
27356 been pushed at the start of the prologue and so we can corrupt
27357 it now. */
27358 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27359 if (live_regs_mask & (1 << regno))
27360 break;
27362 gcc_assert(regno <= LAST_LO_REGNUM);
27364 reg = gen_rtx_REG (SImode, regno);
27366 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27368 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27369 stack_pointer_rtx, reg));
27371 dwarf = gen_rtx_SET (stack_pointer_rtx,
27372 plus_constant (Pmode, stack_pointer_rtx,
27373 -amount));
27374 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27375 RTX_FRAME_RELATED_P (insn) = 1;
27379 if (frame_pointer_needed)
27380 thumb_set_frame_pointer (offsets);
27382 /* If we are profiling, make sure no instructions are scheduled before
27383 the call to mcount. Similarly if the user has requested no
27384 scheduling in the prolog. Similarly if we want non-call exceptions
27385 using the EABI unwinder, to prevent faulting instructions from being
27386 swapped with a stack adjustment. */
27387 if (crtl->profile || !TARGET_SCHED_PROLOG
27388 || (arm_except_unwind_info (&global_options) == UI_TARGET
27389 && cfun->can_throw_non_call_exceptions))
27390 emit_insn (gen_blockage ());
27392 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27393 if (live_regs_mask & 0xff)
27394 cfun->machine->lr_save_eliminated = 0;
27397 /* Clear caller saved registers not used to pass return values and leaked
27398 condition flags before exiting a cmse_nonsecure_entry function. */
27400 void
27401 cmse_nonsecure_entry_clear_before_return (void)
27403 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27404 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27405 uint32_t padding_bits_to_clear = 0;
27406 auto_sbitmap to_clear_bitmap (maxregno + 1);
27407 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27408 tree result_type;
27410 bitmap_clear (to_clear_bitmap);
27411 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27412 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27414 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27415 registers. */
27416 if (clear_vfpregs)
27418 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27420 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27422 if (!TARGET_HAVE_FPCXT_CMSE)
27424 /* Make sure we don't clear the two scratch registers used to clear
27425 the relevant FPSCR bits in output_return_instruction. */
27426 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27427 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27428 emit_use (gen_rtx_REG (SImode, 4));
27429 bitmap_clear_bit (to_clear_bitmap, 4);
27433 /* If the user has defined registers to be caller saved, these are no longer
27434 restored by the function before returning and must thus be cleared for
27435 security purposes. */
27436 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27438 /* We do not touch registers that can be used to pass arguments as per
27439 the AAPCS, since these should never be made callee-saved by user
27440 options. */
27441 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27442 continue;
27443 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27444 continue;
27445 if (!callee_saved_reg_p (regno)
27446 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27447 || TARGET_HARD_FLOAT))
27448 bitmap_set_bit (to_clear_bitmap, regno);
27451 /* Make sure we do not clear the registers used to return the result in. */
27452 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27453 if (!VOID_TYPE_P (result_type))
27455 uint64_t to_clear_return_mask;
27456 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27458 /* No need to check that we return in registers, because we don't
27459 support returning on stack yet. */
27460 gcc_assert (REG_P (result_rtl));
27461 to_clear_return_mask
27462 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27463 &padding_bits_to_clear);
27464 if (to_clear_return_mask)
27466 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27467 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27469 if (to_clear_return_mask & (1ULL << regno))
27470 bitmap_clear_bit (to_clear_bitmap, regno);
27475 if (padding_bits_to_clear != 0)
27477 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27478 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27480 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27481 returning a composite type, which only uses r0. Let's make sure that
27482 r1-r3 is cleared too. */
27483 bitmap_clear (to_clear_arg_regs_bitmap);
27484 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27485 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27488 /* Clear full registers that leak before returning. */
27489 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27490 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27491 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27492 clearing_reg);
27495 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27496 POP instruction can be generated. LR should be replaced by PC. All
27497 the checks required are already done by USE_RETURN_INSN (). Hence,
27498 all we really need to check here is if single register is to be
27499 returned, or multiple register return. */
27500 void
27501 thumb2_expand_return (bool simple_return)
27503 int i, num_regs;
27504 unsigned long saved_regs_mask;
27505 arm_stack_offsets *offsets;
27507 offsets = arm_get_frame_offsets ();
27508 saved_regs_mask = offsets->saved_regs_mask;
27510 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27511 if (saved_regs_mask & (1 << i))
27512 num_regs++;
27514 if (!simple_return && saved_regs_mask)
27516 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27517 functions or adapt code to handle according to ACLE. This path should
27518 not be reachable for cmse_nonsecure_entry functions though we prefer
27519 to assert it for now to ensure that future code changes do not silently
27520 change this behavior. */
27521 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27522 if (arm_current_function_pac_enabled_p ())
27524 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27525 arm_emit_multi_reg_pop (saved_regs_mask);
27526 emit_insn (gen_aut_nop ());
27527 emit_jump_insn (simple_return_rtx);
27529 else if (num_regs == 1)
27531 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27532 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27533 rtx addr = gen_rtx_MEM (SImode,
27534 gen_rtx_POST_INC (SImode,
27535 stack_pointer_rtx));
27536 set_mem_alias_set (addr, get_frame_alias_set ());
27537 XVECEXP (par, 0, 0) = ret_rtx;
27538 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27539 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27540 emit_jump_insn (par);
27542 else
27544 saved_regs_mask &= ~ (1 << LR_REGNUM);
27545 saved_regs_mask |= (1 << PC_REGNUM);
27546 arm_emit_multi_reg_pop (saved_regs_mask);
27549 else
27551 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27552 cmse_nonsecure_entry_clear_before_return ();
27553 emit_jump_insn (simple_return_rtx);
27557 void
27558 thumb1_expand_epilogue (void)
27560 HOST_WIDE_INT amount;
27561 arm_stack_offsets *offsets;
27562 int regno;
27564 /* Naked functions don't have prologues. */
27565 if (IS_NAKED (arm_current_func_type ()))
27566 return;
27568 offsets = arm_get_frame_offsets ();
27569 amount = offsets->outgoing_args - offsets->saved_regs;
27571 if (frame_pointer_needed)
27573 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27574 amount = offsets->locals_base - offsets->saved_regs;
27576 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27578 gcc_assert (amount >= 0);
27579 if (amount)
27581 emit_insn (gen_blockage ());
27583 if (amount < 512)
27584 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27585 GEN_INT (amount)));
27586 else
27588 /* r3 is always free in the epilogue. */
27589 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27591 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27592 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27596 /* Emit a USE (stack_pointer_rtx), so that
27597 the stack adjustment will not be deleted. */
27598 emit_insn (gen_force_register_use (stack_pointer_rtx));
27600 if (crtl->profile || !TARGET_SCHED_PROLOG)
27601 emit_insn (gen_blockage ());
27603 /* Emit a clobber for each insn that will be restored in the epilogue,
27604 so that flow2 will get register lifetimes correct. */
27605 for (regno = 0; regno < 13; regno++)
27606 if (reg_needs_saving_p (regno))
27607 emit_clobber (gen_rtx_REG (SImode, regno));
27609 if (! df_regs_ever_live_p (LR_REGNUM))
27610 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27612 /* Clear all caller-saved regs that are not used to return. */
27613 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27614 cmse_nonsecure_entry_clear_before_return ();
27617 /* Epilogue code for APCS frame. */
27618 static void
27619 arm_expand_epilogue_apcs_frame (bool really_return)
27621 unsigned long func_type;
27622 unsigned long saved_regs_mask;
27623 int num_regs = 0;
27624 int i;
27625 int floats_from_frame = 0;
27626 arm_stack_offsets *offsets;
27628 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27629 func_type = arm_current_func_type ();
27631 /* Get frame offsets for ARM. */
27632 offsets = arm_get_frame_offsets ();
27633 saved_regs_mask = offsets->saved_regs_mask;
27635 /* Find the offset of the floating-point save area in the frame. */
27636 floats_from_frame
27637 = (offsets->saved_args
27638 + arm_compute_static_chain_stack_bytes ()
27639 - offsets->frame);
27641 /* Compute how many core registers saved and how far away the floats are. */
27642 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27643 if (saved_regs_mask & (1 << i))
27645 num_regs++;
27646 floats_from_frame += 4;
27649 if (TARGET_VFP_BASE)
27651 int start_reg;
27652 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27654 /* The offset is from IP_REGNUM. */
27655 int saved_size = arm_get_vfp_saved_size ();
27656 if (saved_size > 0)
27658 rtx_insn *insn;
27659 floats_from_frame += saved_size;
27660 insn = emit_insn (gen_addsi3 (ip_rtx,
27661 hard_frame_pointer_rtx,
27662 GEN_INT (-floats_from_frame)));
27663 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27664 ip_rtx, hard_frame_pointer_rtx);
27667 /* Generate VFP register multi-pop. */
27668 start_reg = FIRST_VFP_REGNUM;
27670 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27671 /* Look for a case where a reg does not need restoring. */
27672 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27674 if (start_reg != i)
27675 arm_emit_vfp_multi_reg_pop (start_reg,
27676 (i - start_reg) / 2,
27677 gen_rtx_REG (SImode,
27678 IP_REGNUM));
27679 start_reg = i + 2;
27682 /* Restore the remaining regs that we have discovered (or possibly
27683 even all of them, if the conditional in the for loop never
27684 fired). */
27685 if (start_reg != i)
27686 arm_emit_vfp_multi_reg_pop (start_reg,
27687 (i - start_reg) / 2,
27688 gen_rtx_REG (SImode, IP_REGNUM));
27691 if (TARGET_IWMMXT)
27693 /* The frame pointer is guaranteed to be non-double-word aligned, as
27694 it is set to double-word-aligned old_stack_pointer - 4. */
27695 rtx_insn *insn;
27696 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27698 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27699 if (reg_needs_saving_p (i))
27701 rtx addr = gen_frame_mem (V2SImode,
27702 plus_constant (Pmode, hard_frame_pointer_rtx,
27703 - lrm_count * 4));
27704 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27705 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27706 gen_rtx_REG (V2SImode, i),
27707 NULL_RTX);
27708 lrm_count += 2;
27712 /* saved_regs_mask should contain IP which contains old stack pointer
27713 at the time of activation creation. Since SP and IP are adjacent registers,
27714 we can restore the value directly into SP. */
27715 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27716 saved_regs_mask &= ~(1 << IP_REGNUM);
27717 saved_regs_mask |= (1 << SP_REGNUM);
27719 /* There are two registers left in saved_regs_mask - LR and PC. We
27720 only need to restore LR (the return address), but to
27721 save time we can load it directly into PC, unless we need a
27722 special function exit sequence, or we are not really returning. */
27723 if (really_return
27724 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27725 && !crtl->calls_eh_return)
27726 /* Delete LR from the register mask, so that LR on
27727 the stack is loaded into the PC in the register mask. */
27728 saved_regs_mask &= ~(1 << LR_REGNUM);
27729 else
27730 saved_regs_mask &= ~(1 << PC_REGNUM);
27732 num_regs = bit_count (saved_regs_mask);
27733 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27735 rtx_insn *insn;
27736 emit_insn (gen_blockage ());
27737 /* Unwind the stack to just below the saved registers. */
27738 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27739 hard_frame_pointer_rtx,
27740 GEN_INT (- 4 * num_regs)));
27742 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27743 stack_pointer_rtx, hard_frame_pointer_rtx);
27746 arm_emit_multi_reg_pop (saved_regs_mask);
27748 if (IS_INTERRUPT (func_type))
27750 /* Interrupt handlers will have pushed the
27751 IP onto the stack, so restore it now. */
27752 rtx_insn *insn;
27753 rtx addr = gen_rtx_MEM (SImode,
27754 gen_rtx_POST_INC (SImode,
27755 stack_pointer_rtx));
27756 set_mem_alias_set (addr, get_frame_alias_set ());
27757 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27758 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27759 gen_rtx_REG (SImode, IP_REGNUM),
27760 NULL_RTX);
27763 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27764 return;
27766 if (crtl->calls_eh_return)
27767 emit_insn (gen_addsi3 (stack_pointer_rtx,
27768 stack_pointer_rtx,
27769 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27771 if (IS_STACKALIGN (func_type))
27772 /* Restore the original stack pointer. Before prologue, the stack was
27773 realigned and the original stack pointer saved in r0. For details,
27774 see comment in arm_expand_prologue. */
27775 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27777 emit_jump_insn (simple_return_rtx);
27780 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27781 function is not a sibcall. */
27782 void
27783 arm_expand_epilogue (bool really_return)
27785 unsigned long func_type;
27786 unsigned long saved_regs_mask;
27787 int num_regs = 0;
27788 int i;
27789 int amount;
27790 arm_stack_offsets *offsets;
27792 func_type = arm_current_func_type ();
27794 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27795 let output_return_instruction take care of instruction emission if any. */
27796 if (IS_NAKED (func_type)
27797 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27799 if (really_return)
27800 emit_jump_insn (simple_return_rtx);
27801 return;
27804 /* If we are throwing an exception, then we really must be doing a
27805 return, so we can't tail-call. */
27806 gcc_assert (!crtl->calls_eh_return || really_return);
27808 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27810 arm_expand_epilogue_apcs_frame (really_return);
27811 return;
27814 /* Get frame offsets for ARM. */
27815 offsets = arm_get_frame_offsets ();
27816 saved_regs_mask = offsets->saved_regs_mask;
27817 num_regs = bit_count (saved_regs_mask);
27819 if (frame_pointer_needed)
27821 rtx_insn *insn;
27822 /* Restore stack pointer if necessary. */
27823 if (TARGET_ARM)
27825 /* In ARM mode, frame pointer points to first saved register.
27826 Restore stack pointer to last saved register. */
27827 amount = offsets->frame - offsets->saved_regs;
27829 /* Force out any pending memory operations that reference stacked data
27830 before stack de-allocation occurs. */
27831 emit_insn (gen_blockage ());
27832 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27833 hard_frame_pointer_rtx,
27834 GEN_INT (amount)));
27835 arm_add_cfa_adjust_cfa_note (insn, amount,
27836 stack_pointer_rtx,
27837 hard_frame_pointer_rtx);
27839 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27840 deleted. */
27841 emit_insn (gen_force_register_use (stack_pointer_rtx));
27843 else
27845 /* In Thumb-2 mode, the frame pointer points to the last saved
27846 register. */
27847 amount = offsets->locals_base - offsets->saved_regs;
27848 if (amount)
27850 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27851 hard_frame_pointer_rtx,
27852 GEN_INT (amount)));
27853 arm_add_cfa_adjust_cfa_note (insn, amount,
27854 hard_frame_pointer_rtx,
27855 hard_frame_pointer_rtx);
27858 /* Force out any pending memory operations that reference stacked data
27859 before stack de-allocation occurs. */
27860 emit_insn (gen_blockage ());
27861 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27862 hard_frame_pointer_rtx));
27863 arm_add_cfa_adjust_cfa_note (insn, 0,
27864 stack_pointer_rtx,
27865 hard_frame_pointer_rtx);
27866 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27867 deleted. */
27868 emit_insn (gen_force_register_use (stack_pointer_rtx));
27871 else
27873 /* Pop off outgoing args and local frame to adjust stack pointer to
27874 last saved register. */
27875 amount = offsets->outgoing_args - offsets->saved_regs;
27876 if (amount)
27878 rtx_insn *tmp;
27879 /* Force out any pending memory operations that reference stacked data
27880 before stack de-allocation occurs. */
27881 emit_insn (gen_blockage ());
27882 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27883 stack_pointer_rtx,
27884 GEN_INT (amount)));
27885 arm_add_cfa_adjust_cfa_note (tmp, amount,
27886 stack_pointer_rtx, stack_pointer_rtx);
27887 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27888 not deleted. */
27889 emit_insn (gen_force_register_use (stack_pointer_rtx));
27893 if (TARGET_VFP_BASE)
27895 /* Generate VFP register multi-pop. */
27896 int end_reg = LAST_VFP_REGNUM + 1;
27898 /* Scan the registers in reverse order. We need to match
27899 any groupings made in the prologue and generate matching
27900 vldm operations. The need to match groups is because,
27901 unlike pop, vldm can only do consecutive regs. */
27902 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27903 /* Look for a case where a reg does not need restoring. */
27904 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27906 /* Restore the regs discovered so far (from reg+2 to
27907 end_reg). */
27908 if (end_reg > i + 2)
27909 arm_emit_vfp_multi_reg_pop (i + 2,
27910 (end_reg - (i + 2)) / 2,
27911 stack_pointer_rtx);
27912 end_reg = i;
27915 /* Restore the remaining regs that we have discovered (or possibly
27916 even all of them, if the conditional in the for loop never
27917 fired). */
27918 if (end_reg > i + 2)
27919 arm_emit_vfp_multi_reg_pop (i + 2,
27920 (end_reg - (i + 2)) / 2,
27921 stack_pointer_rtx);
27924 if (TARGET_IWMMXT)
27925 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27926 if (reg_needs_saving_p (i))
27928 rtx_insn *insn;
27929 rtx addr = gen_rtx_MEM (V2SImode,
27930 gen_rtx_POST_INC (SImode,
27931 stack_pointer_rtx));
27932 set_mem_alias_set (addr, get_frame_alias_set ());
27933 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27934 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27935 gen_rtx_REG (V2SImode, i),
27936 NULL_RTX);
27937 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27938 stack_pointer_rtx, stack_pointer_rtx);
27941 if (saved_regs_mask)
27943 rtx insn;
27944 bool return_in_pc = false;
27946 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27947 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27948 && !IS_CMSE_ENTRY (func_type)
27949 && !IS_STACKALIGN (func_type)
27950 && really_return
27951 && crtl->args.pretend_args_size == 0
27952 && saved_regs_mask & (1 << LR_REGNUM)
27953 && !crtl->calls_eh_return
27954 && !arm_current_function_pac_enabled_p ())
27956 saved_regs_mask &= ~(1 << LR_REGNUM);
27957 saved_regs_mask |= (1 << PC_REGNUM);
27958 return_in_pc = true;
27961 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27963 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27964 if (saved_regs_mask & (1 << i))
27966 rtx addr = gen_rtx_MEM (SImode,
27967 gen_rtx_POST_INC (SImode,
27968 stack_pointer_rtx));
27969 set_mem_alias_set (addr, get_frame_alias_set ());
27971 if (i == PC_REGNUM)
27973 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27974 XVECEXP (insn, 0, 0) = ret_rtx;
27975 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27976 addr);
27977 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27978 insn = emit_jump_insn (insn);
27980 else
27982 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27983 addr));
27984 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27985 gen_rtx_REG (SImode, i),
27986 NULL_RTX);
27987 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27988 stack_pointer_rtx,
27989 stack_pointer_rtx);
27993 else
27995 if (TARGET_LDRD
27996 && current_tune->prefer_ldrd_strd
27997 && !optimize_function_for_size_p (cfun))
27999 if (TARGET_THUMB2)
28000 thumb2_emit_ldrd_pop (saved_regs_mask);
28001 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28002 arm_emit_ldrd_pop (saved_regs_mask);
28003 else
28004 arm_emit_multi_reg_pop (saved_regs_mask);
28006 else
28007 arm_emit_multi_reg_pop (saved_regs_mask);
28010 if (return_in_pc)
28011 return;
28014 amount
28015 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28016 if (amount)
28018 int i, j;
28019 rtx dwarf = NULL_RTX;
28020 rtx_insn *tmp =
28021 emit_insn (gen_addsi3 (stack_pointer_rtx,
28022 stack_pointer_rtx,
28023 GEN_INT (amount)));
28025 RTX_FRAME_RELATED_P (tmp) = 1;
28027 if (cfun->machine->uses_anonymous_args)
28029 /* Restore pretend args. Refer arm_expand_prologue on how to save
28030 pretend_args in stack. */
28031 int num_regs = crtl->args.pretend_args_size / 4;
28032 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28033 for (j = 0, i = 0; j < num_regs; i++)
28034 if (saved_regs_mask & (1 << i))
28036 rtx reg = gen_rtx_REG (SImode, i);
28037 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28038 j++;
28040 REG_NOTES (tmp) = dwarf;
28042 arm_add_cfa_adjust_cfa_note (tmp, amount,
28043 stack_pointer_rtx, stack_pointer_rtx);
28046 if (IS_CMSE_ENTRY (func_type))
28048 /* CMSE_ENTRY always returns. */
28049 gcc_assert (really_return);
28050 /* Clear all caller-saved regs that are not used to return. */
28051 cmse_nonsecure_entry_clear_before_return ();
28053 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28054 VLDR. */
28055 if (TARGET_HAVE_FPCXT_CMSE)
28057 rtx_insn *insn;
28059 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28060 GEN_INT (FPCXTNS_ENUM)));
28061 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28062 plus_constant (Pmode, stack_pointer_rtx, 4));
28063 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28064 RTX_FRAME_RELATED_P (insn) = 1;
28068 if (arm_current_function_pac_enabled_p ())
28069 emit_insn (gen_aut_nop ());
28071 if (!really_return)
28072 return;
28074 if (crtl->calls_eh_return)
28075 emit_insn (gen_addsi3 (stack_pointer_rtx,
28076 stack_pointer_rtx,
28077 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28079 if (IS_STACKALIGN (func_type))
28080 /* Restore the original stack pointer. Before prologue, the stack was
28081 realigned and the original stack pointer saved in r0. For details,
28082 see comment in arm_expand_prologue. */
28083 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28085 emit_jump_insn (simple_return_rtx);
28088 /* Implementation of insn prologue_thumb1_interwork. This is the first
28089 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28091 const char *
28092 thumb1_output_interwork (void)
28094 const char * name;
28095 FILE *f = asm_out_file;
28097 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28098 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28099 == SYMBOL_REF);
28100 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28102 /* Generate code sequence to switch us into Thumb mode. */
28103 /* The .code 32 directive has already been emitted by
28104 ASM_DECLARE_FUNCTION_NAME. */
28105 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28106 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28108 /* Generate a label, so that the debugger will notice the
28109 change in instruction sets. This label is also used by
28110 the assembler to bypass the ARM code when this function
28111 is called from a Thumb encoded function elsewhere in the
28112 same file. Hence the definition of STUB_NAME here must
28113 agree with the definition in gas/config/tc-arm.c. */
28115 #define STUB_NAME ".real_start_of"
28117 fprintf (f, "\t.code\t16\n");
28118 #ifdef ARM_PE
28119 if (arm_dllexport_name_p (name))
28120 name = arm_strip_name_encoding (name);
28121 #endif
28122 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28123 fprintf (f, "\t.thumb_func\n");
28124 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28126 return "";
28129 /* Handle the case of a double word load into a low register from
28130 a computed memory address. The computed address may involve a
28131 register which is overwritten by the load. */
28132 const char *
28133 thumb_load_double_from_address (rtx *operands)
28135 rtx addr;
28136 rtx base;
28137 rtx offset;
28138 rtx arg1;
28139 rtx arg2;
28141 gcc_assert (REG_P (operands[0]));
28142 gcc_assert (MEM_P (operands[1]));
28144 /* Get the memory address. */
28145 addr = XEXP (operands[1], 0);
28147 /* Work out how the memory address is computed. */
28148 switch (GET_CODE (addr))
28150 case REG:
28151 operands[2] = adjust_address (operands[1], SImode, 4);
28153 if (REGNO (operands[0]) == REGNO (addr))
28155 output_asm_insn ("ldr\t%H0, %2", operands);
28156 output_asm_insn ("ldr\t%0, %1", operands);
28158 else
28160 output_asm_insn ("ldr\t%0, %1", operands);
28161 output_asm_insn ("ldr\t%H0, %2", operands);
28163 break;
28165 case CONST:
28166 /* Compute <address> + 4 for the high order load. */
28167 operands[2] = adjust_address (operands[1], SImode, 4);
28169 output_asm_insn ("ldr\t%0, %1", operands);
28170 output_asm_insn ("ldr\t%H0, %2", operands);
28171 break;
28173 case PLUS:
28174 arg1 = XEXP (addr, 0);
28175 arg2 = XEXP (addr, 1);
28177 if (CONSTANT_P (arg1))
28178 base = arg2, offset = arg1;
28179 else
28180 base = arg1, offset = arg2;
28182 gcc_assert (REG_P (base));
28184 /* Catch the case of <address> = <reg> + <reg> */
28185 if (REG_P (offset))
28187 int reg_offset = REGNO (offset);
28188 int reg_base = REGNO (base);
28189 int reg_dest = REGNO (operands[0]);
28191 /* Add the base and offset registers together into the
28192 higher destination register. */
28193 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28194 reg_dest + 1, reg_base, reg_offset);
28196 /* Load the lower destination register from the address in
28197 the higher destination register. */
28198 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28199 reg_dest, reg_dest + 1);
28201 /* Load the higher destination register from its own address
28202 plus 4. */
28203 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28204 reg_dest + 1, reg_dest + 1);
28206 else
28208 /* Compute <address> + 4 for the high order load. */
28209 operands[2] = adjust_address (operands[1], SImode, 4);
28211 /* If the computed address is held in the low order register
28212 then load the high order register first, otherwise always
28213 load the low order register first. */
28214 if (REGNO (operands[0]) == REGNO (base))
28216 output_asm_insn ("ldr\t%H0, %2", operands);
28217 output_asm_insn ("ldr\t%0, %1", operands);
28219 else
28221 output_asm_insn ("ldr\t%0, %1", operands);
28222 output_asm_insn ("ldr\t%H0, %2", operands);
28225 break;
28227 case LABEL_REF:
28228 /* With no registers to worry about we can just load the value
28229 directly. */
28230 operands[2] = adjust_address (operands[1], SImode, 4);
28232 output_asm_insn ("ldr\t%H0, %2", operands);
28233 output_asm_insn ("ldr\t%0, %1", operands);
28234 break;
28236 default:
28237 gcc_unreachable ();
28240 return "";
28243 const char *
28244 thumb_output_move_mem_multiple (int n, rtx *operands)
28246 switch (n)
28248 case 2:
28249 if (REGNO (operands[4]) > REGNO (operands[5]))
28250 std::swap (operands[4], operands[5]);
28252 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28253 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28254 break;
28256 case 3:
28257 if (REGNO (operands[4]) > REGNO (operands[5]))
28258 std::swap (operands[4], operands[5]);
28259 if (REGNO (operands[5]) > REGNO (operands[6]))
28260 std::swap (operands[5], operands[6]);
28261 if (REGNO (operands[4]) > REGNO (operands[5]))
28262 std::swap (operands[4], operands[5]);
28264 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28265 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28266 break;
28268 default:
28269 gcc_unreachable ();
28272 return "";
28275 /* Output a call-via instruction for thumb state. */
28276 const char *
28277 thumb_call_via_reg (rtx reg)
28279 int regno = REGNO (reg);
28280 rtx *labelp;
28282 gcc_assert (regno < LR_REGNUM);
28284 /* If we are in the normal text section we can use a single instance
28285 per compilation unit. If we are doing function sections, then we need
28286 an entry per section, since we can't rely on reachability. */
28287 if (in_section == text_section)
28289 thumb_call_reg_needed = 1;
28291 if (thumb_call_via_label[regno] == NULL)
28292 thumb_call_via_label[regno] = gen_label_rtx ();
28293 labelp = thumb_call_via_label + regno;
28295 else
28297 if (cfun->machine->call_via[regno] == NULL)
28298 cfun->machine->call_via[regno] = gen_label_rtx ();
28299 labelp = cfun->machine->call_via + regno;
28302 output_asm_insn ("bl\t%a0", labelp);
28303 return "";
28306 /* Routines for generating rtl. */
28307 void
28308 thumb_expand_cpymemqi (rtx *operands)
28310 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28311 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28312 HOST_WIDE_INT len = INTVAL (operands[2]);
28313 HOST_WIDE_INT offset = 0;
28315 while (len >= 12)
28317 emit_insn (gen_cpymem12b (out, in, out, in));
28318 len -= 12;
28321 if (len >= 8)
28323 emit_insn (gen_cpymem8b (out, in, out, in));
28324 len -= 8;
28327 if (len >= 4)
28329 rtx reg = gen_reg_rtx (SImode);
28330 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28331 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28332 len -= 4;
28333 offset += 4;
28336 if (len >= 2)
28338 rtx reg = gen_reg_rtx (HImode);
28339 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28340 plus_constant (Pmode, in,
28341 offset))));
28342 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28343 offset)),
28344 reg));
28345 len -= 2;
28346 offset += 2;
28349 if (len)
28351 rtx reg = gen_reg_rtx (QImode);
28352 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28353 plus_constant (Pmode, in,
28354 offset))));
28355 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28356 offset)),
28357 reg));
28361 void
28362 thumb_reload_out_hi (rtx *operands)
28364 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28367 /* Return the length of a function name prefix
28368 that starts with the character 'c'. */
28369 static int
28370 arm_get_strip_length (int c)
28372 switch (c)
28374 ARM_NAME_ENCODING_LENGTHS
28375 default: return 0;
28379 /* Return a pointer to a function's name with any
28380 and all prefix encodings stripped from it. */
28381 const char *
28382 arm_strip_name_encoding (const char *name)
28384 int skip;
28386 while ((skip = arm_get_strip_length (* name)))
28387 name += skip;
28389 return name;
28392 /* If there is a '*' anywhere in the name's prefix, then
28393 emit the stripped name verbatim, otherwise prepend an
28394 underscore if leading underscores are being used. */
28395 void
28396 arm_asm_output_labelref (FILE *stream, const char *name)
28398 int skip;
28399 int verbatim = 0;
28401 while ((skip = arm_get_strip_length (* name)))
28403 verbatim |= (*name == '*');
28404 name += skip;
28407 if (verbatim)
28408 fputs (name, stream);
28409 else
28410 asm_fprintf (stream, "%U%s", name);
28413 /* This function is used to emit an EABI tag and its associated value.
28414 We emit the numerical value of the tag in case the assembler does not
28415 support textual tags. (Eg gas prior to 2.20). If requested we include
28416 the tag name in a comment so that anyone reading the assembler output
28417 will know which tag is being set.
28419 This function is not static because arm-c.cc needs it too. */
28421 void
28422 arm_emit_eabi_attribute (const char *name, int num, int val)
28424 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28425 if (flag_verbose_asm || flag_debug_asm)
28426 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28427 asm_fprintf (asm_out_file, "\n");
28430 /* This function is used to print CPU tuning information as comment
28431 in assembler file. Pointers are not printed for now. */
28433 void
28434 arm_print_tune_info (void)
28436 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28437 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28438 current_tune->constant_limit);
28439 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28440 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28441 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28442 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28443 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28444 "prefetch.l1_cache_size:\t%d\n",
28445 current_tune->prefetch.l1_cache_size);
28446 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28447 "prefetch.l1_cache_line_size:\t%d\n",
28448 current_tune->prefetch.l1_cache_line_size);
28449 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28450 "prefer_constant_pool:\t%d\n",
28451 (int) current_tune->prefer_constant_pool);
28452 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28453 "branch_cost:\t(s:speed, p:predictable)\n");
28454 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28455 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28456 current_tune->branch_cost (false, false));
28457 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28458 current_tune->branch_cost (false, true));
28459 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28460 current_tune->branch_cost (true, false));
28461 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28462 current_tune->branch_cost (true, true));
28463 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28464 "prefer_ldrd_strd:\t%d\n",
28465 (int) current_tune->prefer_ldrd_strd);
28466 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28467 "logical_op_non_short_circuit:\t[%d,%d]\n",
28468 (int) current_tune->logical_op_non_short_circuit_thumb,
28469 (int) current_tune->logical_op_non_short_circuit_arm);
28470 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28471 "disparage_flag_setting_t16_encodings:\t%d\n",
28472 (int) current_tune->disparage_flag_setting_t16_encodings);
28473 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28474 "string_ops_prefer_neon:\t%d\n",
28475 (int) current_tune->string_ops_prefer_neon);
28476 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28477 "max_insns_inline_memset:\t%d\n",
28478 current_tune->max_insns_inline_memset);
28479 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28480 current_tune->fusible_ops);
28481 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28482 (int) current_tune->sched_autopref);
28485 /* The last set of target options used to emit .arch directives, etc. This
28486 could be a function-local static if it were not required to expose it as a
28487 root to the garbage collector. */
28488 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28490 /* Print .arch and .arch_extension directives corresponding to the
28491 current architecture configuration. */
28492 static void
28493 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28495 arm_build_target build_target;
28496 /* If the target options haven't changed since the last time we were called
28497 there is nothing to do. This should be sufficient to suppress the
28498 majority of redundant work. */
28499 if (last_asm_targ_options == targ_options)
28500 return;
28502 last_asm_targ_options = targ_options;
28504 build_target.isa = sbitmap_alloc (isa_num_bits);
28505 arm_configure_build_target (&build_target, targ_options, false);
28507 if (build_target.core_name
28508 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28510 const char* truncated_name
28511 = arm_rewrite_selected_cpu (build_target.core_name);
28512 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28515 const arch_option *arch
28516 = arm_parse_arch_option_name (all_architectures, "-march",
28517 build_target.arch_name);
28518 auto_sbitmap opt_bits (isa_num_bits);
28520 gcc_assert (arch);
28522 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28524 /* Keep backward compatability for assemblers which don't support
28525 armv7ve. Fortunately, none of the following extensions are reset
28526 by a .fpu directive. */
28527 asm_fprintf (stream, "\t.arch armv7-a\n");
28528 asm_fprintf (stream, "\t.arch_extension virt\n");
28529 asm_fprintf (stream, "\t.arch_extension idiv\n");
28530 asm_fprintf (stream, "\t.arch_extension sec\n");
28531 asm_fprintf (stream, "\t.arch_extension mp\n");
28533 else
28534 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28536 /* The .fpu directive will reset any architecture extensions from the
28537 assembler that relate to the fp/vector extensions. So put this out before
28538 any .arch_extension directives. */
28539 const char *fpu_name = (TARGET_SOFT_FLOAT
28540 ? "softvfp"
28541 : arm_identify_fpu_from_isa (build_target.isa));
28542 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28544 if (!arch->common.extensions)
28545 return;
28547 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28548 opt->name != NULL;
28549 opt++)
28551 if (!opt->remove)
28553 arm_initialize_isa (opt_bits, opt->isa_bits);
28555 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28556 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28557 floating point instructions is disabled. So the following check
28558 restricts the printing of ".arch_extension mve" and
28559 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28560 this special behaviour because the feature bit "mve" and
28561 "mve_float" are not part of "fpu bits", so they are not cleared
28562 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28563 TARGET_HAVE_MVE_FLOAT are disabled. */
28564 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28565 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28566 && !TARGET_HAVE_MVE_FLOAT))
28567 continue;
28569 /* If every feature bit of this option is set in the target ISA
28570 specification, print out the option name. However, don't print
28571 anything if all the bits are part of the FPU specification. */
28572 if (bitmap_subset_p (opt_bits, build_target.isa)
28573 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28574 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28579 static void
28580 arm_file_start (void)
28582 int val;
28583 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28584 bool bti = (aarch_enable_bti == 1);
28586 arm_print_asm_arch_directives
28587 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28589 if (TARGET_BPABI)
28591 /* If we have a named cpu, but we the assembler does not support that
28592 name via .cpu, put out a cpu name attribute; but don't do this if the
28593 name starts with the fictitious prefix, 'generic'. */
28594 if (arm_active_target.core_name
28595 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28596 && !startswith (arm_active_target.core_name, "generic"))
28598 const char* truncated_name
28599 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28600 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28601 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28602 truncated_name);
28605 if (print_tune_info)
28606 arm_print_tune_info ();
28608 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28609 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28611 if (TARGET_HARD_FLOAT_ABI)
28612 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28614 /* Some of these attributes only apply when the corresponding features
28615 are used. However we don't have any easy way of figuring this out.
28616 Conservatively record the setting that would have been used. */
28618 if (flag_rounding_math)
28619 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28621 if (!flag_unsafe_math_optimizations)
28623 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28624 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28626 if (flag_signaling_nans)
28627 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28629 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28630 flag_finite_math_only ? 1 : 3);
28632 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28633 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28634 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28635 flag_short_enums ? 1 : 2);
28637 /* Tag_ABI_optimization_goals. */
28638 if (optimize_size)
28639 val = 4;
28640 else if (optimize >= 2)
28641 val = 2;
28642 else if (optimize)
28643 val = 1;
28644 else
28645 val = 6;
28646 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28648 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28649 unaligned_access);
28651 if (arm_fp16_format)
28652 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28653 (int) arm_fp16_format);
28655 if (TARGET_HAVE_PACBTI)
28657 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28658 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28660 else if (pac || bti)
28662 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28663 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28666 if (bti)
28667 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28668 if (pac)
28669 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28671 if (arm_lang_output_object_attributes_hook)
28672 arm_lang_output_object_attributes_hook();
28675 default_file_start ();
28678 static void
28679 arm_file_end (void)
28681 int regno;
28683 /* Just in case the last function output in the assembler had non-default
28684 architecture directives, we force the assembler state back to the default
28685 set, so that any 'calculated' build attributes are based on the default
28686 options rather than the special options for that function. */
28687 arm_print_asm_arch_directives
28688 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28690 if (NEED_INDICATE_EXEC_STACK)
28691 /* Add .note.GNU-stack. */
28692 file_end_indicate_exec_stack ();
28694 if (! thumb_call_reg_needed)
28695 return;
28697 switch_to_section (text_section);
28698 asm_fprintf (asm_out_file, "\t.code 16\n");
28699 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28701 for (regno = 0; regno < LR_REGNUM; regno++)
28703 rtx label = thumb_call_via_label[regno];
28705 if (label != 0)
28707 targetm.asm_out.internal_label (asm_out_file, "L",
28708 CODE_LABEL_NUMBER (label));
28709 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28714 #ifndef ARM_PE
28715 /* Symbols in the text segment can be accessed without indirecting via the
28716 constant pool; it may take an extra binary operation, but this is still
28717 faster than indirecting via memory. Don't do this when not optimizing,
28718 since we won't be calculating al of the offsets necessary to do this
28719 simplification. */
28721 static void
28722 arm_encode_section_info (tree decl, rtx rtl, int first)
28724 if (optimize > 0 && TREE_CONSTANT (decl))
28725 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28727 default_encode_section_info (decl, rtl, first);
28729 #endif /* !ARM_PE */
28731 static void
28732 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28734 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28735 && !strcmp (prefix, "L"))
28737 arm_ccfsm_state = 0;
28738 arm_target_insn = NULL;
28740 default_internal_label (stream, prefix, labelno);
28743 /* Define classes to generate code as RTL or output asm to a file.
28744 Using templates then allows to use the same code to output code
28745 sequences in the two formats. */
28746 class thumb1_const_rtl
28748 public:
28749 thumb1_const_rtl (rtx dst) : dst (dst) {}
28751 void mov (HOST_WIDE_INT val)
28753 emit_set_insn (dst, GEN_INT (val));
28756 void add (HOST_WIDE_INT val)
28758 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28761 void ashift (HOST_WIDE_INT shift)
28763 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28766 void neg ()
28768 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28771 private:
28772 rtx dst;
28775 class thumb1_const_print
28777 public:
28778 thumb1_const_print (FILE *f, int regno)
28780 t_file = f;
28781 dst_regname = reg_names[regno];
28784 void mov (HOST_WIDE_INT val)
28786 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28787 dst_regname, val);
28790 void add (HOST_WIDE_INT val)
28792 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28793 dst_regname, val);
28796 void ashift (HOST_WIDE_INT shift)
28798 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28799 dst_regname, shift);
28802 void neg ()
28804 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28807 private:
28808 FILE *t_file;
28809 const char *dst_regname;
28812 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28813 Avoid generating useless code when one of the bytes is zero. */
28814 template <class T>
28815 void
28816 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28818 bool mov_done_p = false;
28819 unsigned HOST_WIDE_INT val = op1;
28820 int shift = 0;
28821 int i;
28823 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28825 if (val <= 255)
28827 dst.mov (val);
28828 return;
28831 /* For negative numbers with the first nine bits set, build the
28832 opposite of OP1, then negate it, it's generally shorter and not
28833 longer. */
28834 if ((val & 0xFF800000) == 0xFF800000)
28836 thumb1_gen_const_int_1 (dst, -op1);
28837 dst.neg ();
28838 return;
28841 /* In the general case, we need 7 instructions to build
28842 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28843 do better if VAL is small enough, or
28844 right-shiftable by a suitable amount. If the
28845 right-shift enables to encode at least one less byte,
28846 it's worth it: we save a adds and a lsls at the
28847 expense of a final lsls. */
28848 int final_shift = number_of_first_bit_set (val);
28850 int leading_zeroes = clz_hwi (val);
28851 int number_of_bytes_needed
28852 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28853 / BITS_PER_UNIT) + 1;
28854 int number_of_bytes_needed2
28855 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28856 / BITS_PER_UNIT) + 1;
28858 if (number_of_bytes_needed2 < number_of_bytes_needed)
28859 val >>= final_shift;
28860 else
28861 final_shift = 0;
28863 /* If we are in a very small range, we can use either a single movs
28864 or movs+adds. */
28865 if (val <= 510)
28867 if (val > 255)
28869 unsigned HOST_WIDE_INT high = val - 255;
28871 dst.mov (high);
28872 dst.add (255);
28874 else
28875 dst.mov (val);
28877 if (final_shift > 0)
28878 dst.ashift (final_shift);
28880 else
28882 /* General case, emit upper 3 bytes as needed. */
28883 for (i = 0; i < 3; i++)
28885 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28887 if (byte)
28889 /* We are about to emit new bits, stop accumulating a
28890 shift amount, and left-shift only if we have already
28891 emitted some upper bits. */
28892 if (mov_done_p)
28894 dst.ashift (shift);
28895 dst.add (byte);
28897 else
28898 dst.mov (byte);
28900 /* Stop accumulating shift amount since we've just
28901 emitted some bits. */
28902 shift = 0;
28904 mov_done_p = true;
28907 if (mov_done_p)
28908 shift += 8;
28911 /* Emit lower byte. */
28912 if (!mov_done_p)
28913 dst.mov (val & 0xff);
28914 else
28916 dst.ashift (shift);
28917 if (val & 0xff)
28918 dst.add (val & 0xff);
28921 if (final_shift > 0)
28922 dst.ashift (final_shift);
28926 /* Proxies for thumb1.md, since the thumb1_const_print and
28927 thumb1_const_rtl classes are not exported. */
28928 void
28929 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28931 thumb1_const_rtl t (dst);
28932 thumb1_gen_const_int_1 (t, op1);
28935 void
28936 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28938 thumb1_const_print t (asm_out_file, REGNO (dst));
28939 thumb1_gen_const_int_1 (t, op1);
28942 /* Output code to add DELTA to the first argument, and then jump
28943 to FUNCTION. Used for C++ multiple inheritance. */
28945 static void
28946 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28947 HOST_WIDE_INT, tree function)
28949 static int thunk_label = 0;
28950 char label[256];
28951 char labelpc[256];
28952 int mi_delta = delta;
28953 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28954 int shift = 0;
28955 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28956 ? 1 : 0);
28957 if (mi_delta < 0)
28958 mi_delta = - mi_delta;
28960 final_start_function (emit_barrier (), file, 1);
28962 if (TARGET_THUMB1)
28964 int labelno = thunk_label++;
28965 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28966 /* Thunks are entered in arm mode when available. */
28967 if (TARGET_THUMB1_ONLY)
28969 /* push r3 so we can use it as a temporary. */
28970 /* TODO: Omit this save if r3 is not used. */
28971 fputs ("\tpush {r3}\n", file);
28973 /* With -mpure-code, we cannot load the address from the
28974 constant pool: we build it explicitly. */
28975 if (target_pure_code)
28977 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28978 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28979 fputc ('\n', file);
28980 fputs ("\tlsls r3, #8\n", file);
28981 fputs ("\tadds\tr3, #:upper0_7:#", file);
28982 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28983 fputc ('\n', file);
28984 fputs ("\tlsls r3, #8\n", file);
28985 fputs ("\tadds\tr3, #:lower8_15:#", file);
28986 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28987 fputc ('\n', file);
28988 fputs ("\tlsls r3, #8\n", file);
28989 fputs ("\tadds\tr3, #:lower0_7:#", file);
28990 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28991 fputc ('\n', file);
28993 else
28994 fputs ("\tldr\tr3, ", file);
28996 else
28998 fputs ("\tldr\tr12, ", file);
29001 if (!target_pure_code)
29003 assemble_name (file, label);
29004 fputc ('\n', file);
29007 if (flag_pic)
29009 /* If we are generating PIC, the ldr instruction below loads
29010 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29011 the address of the add + 8, so we have:
29013 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29014 = target + 1.
29016 Note that we have "+ 1" because some versions of GNU ld
29017 don't set the low bit of the result for R_ARM_REL32
29018 relocations against thumb function symbols.
29019 On ARMv6M this is +4, not +8. */
29020 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29021 assemble_name (file, labelpc);
29022 fputs (":\n", file);
29023 if (TARGET_THUMB1_ONLY)
29025 /* This is 2 insns after the start of the thunk, so we know it
29026 is 4-byte aligned. */
29027 fputs ("\tadd\tr3, pc, r3\n", file);
29028 fputs ("\tmov r12, r3\n", file);
29030 else
29031 fputs ("\tadd\tr12, pc, r12\n", file);
29033 else if (TARGET_THUMB1_ONLY)
29034 fputs ("\tmov r12, r3\n", file);
29036 if (TARGET_THUMB1_ONLY)
29038 if (mi_delta > 255)
29040 /* With -mpure-code, we cannot load MI_DELTA from the
29041 constant pool: we build it explicitly. */
29042 if (target_pure_code)
29044 thumb1_const_print r3 (file, 3);
29045 thumb1_gen_const_int_1 (r3, mi_delta);
29047 else
29049 fputs ("\tldr\tr3, ", file);
29050 assemble_name (file, label);
29051 fputs ("+4\n", file);
29053 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29054 mi_op, this_regno, this_regno);
29056 else if (mi_delta != 0)
29058 /* Thumb1 unified syntax requires s suffix in instruction name when
29059 one of the operands is immediate. */
29060 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29061 mi_op, this_regno, this_regno,
29062 mi_delta);
29065 else
29067 /* TODO: Use movw/movt for large constants when available. */
29068 while (mi_delta != 0)
29070 if ((mi_delta & (3 << shift)) == 0)
29071 shift += 2;
29072 else
29074 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29075 mi_op, this_regno, this_regno,
29076 mi_delta & (0xff << shift));
29077 mi_delta &= ~(0xff << shift);
29078 shift += 8;
29082 if (TARGET_THUMB1)
29084 if (TARGET_THUMB1_ONLY)
29085 fputs ("\tpop\t{r3}\n", file);
29087 fprintf (file, "\tbx\tr12\n");
29089 /* With -mpure-code, we don't need to emit literals for the
29090 function address and delta since we emitted code to build
29091 them. */
29092 if (!target_pure_code)
29094 ASM_OUTPUT_ALIGN (file, 2);
29095 assemble_name (file, label);
29096 fputs (":\n", file);
29097 if (flag_pic)
29099 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29100 rtx tem = XEXP (DECL_RTL (function), 0);
29101 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29102 pipeline offset is four rather than eight. Adjust the offset
29103 accordingly. */
29104 tem = plus_constant (GET_MODE (tem), tem,
29105 TARGET_THUMB1_ONLY ? -3 : -7);
29106 tem = gen_rtx_MINUS (GET_MODE (tem),
29107 tem,
29108 gen_rtx_SYMBOL_REF (Pmode,
29109 ggc_strdup (labelpc)));
29110 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29112 else
29113 /* Output ".word .LTHUNKn". */
29114 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29116 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29117 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29120 else
29122 fputs ("\tb\t", file);
29123 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29124 if (NEED_PLT_RELOC)
29125 fputs ("(PLT)", file);
29126 fputc ('\n', file);
29129 final_end_function ();
29132 /* MI thunk handling for TARGET_32BIT. */
29134 static void
29135 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29136 HOST_WIDE_INT vcall_offset, tree function)
29138 const bool long_call_p = arm_is_long_call_p (function);
29140 /* On ARM, this_regno is R0 or R1 depending on
29141 whether the function returns an aggregate or not.
29143 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29144 function)
29145 ? R1_REGNUM : R0_REGNUM);
29147 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29148 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29149 reload_completed = 1;
29150 emit_note (NOTE_INSN_PROLOGUE_END);
29152 /* Add DELTA to THIS_RTX. */
29153 if (delta != 0)
29154 arm_split_constant (PLUS, Pmode, NULL_RTX,
29155 delta, this_rtx, this_rtx, false);
29157 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29158 if (vcall_offset != 0)
29160 /* Load *THIS_RTX. */
29161 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29162 /* Compute *THIS_RTX + VCALL_OFFSET. */
29163 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29164 false);
29165 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29166 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29167 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29170 /* Generate a tail call to the target function. */
29171 if (!TREE_USED (function))
29173 assemble_external (function);
29174 TREE_USED (function) = 1;
29176 rtx funexp = XEXP (DECL_RTL (function), 0);
29177 if (long_call_p)
29179 emit_move_insn (temp, funexp);
29180 funexp = temp;
29182 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29183 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29184 SIBLING_CALL_P (insn) = 1;
29185 emit_barrier ();
29187 /* Indirect calls require a bit of fixup in PIC mode. */
29188 if (long_call_p)
29190 split_all_insns_noflow ();
29191 arm_reorg ();
29194 insn = get_insns ();
29195 shorten_branches (insn);
29196 final_start_function (insn, file, 1);
29197 final (insn, file, 1);
29198 final_end_function ();
29200 /* Stop pretending this is a post-reload pass. */
29201 reload_completed = 0;
29204 /* Output code to add DELTA to the first argument, and then jump
29205 to FUNCTION. Used for C++ multiple inheritance. */
29207 static void
29208 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29209 HOST_WIDE_INT vcall_offset, tree function)
29211 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29213 assemble_start_function (thunk, fnname);
29214 if (TARGET_32BIT)
29215 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29216 else
29217 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29218 assemble_end_function (thunk, fnname);
29222 arm_emit_vector_const (FILE *file, rtx x)
29224 int i;
29225 const char * pattern;
29227 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29229 switch (GET_MODE (x))
29231 case E_V2SImode: pattern = "%08x"; break;
29232 case E_V4HImode: pattern = "%04x"; break;
29233 case E_V8QImode: pattern = "%02x"; break;
29234 default: gcc_unreachable ();
29237 fprintf (file, "0x");
29238 for (i = CONST_VECTOR_NUNITS (x); i--;)
29240 rtx element;
29242 element = CONST_VECTOR_ELT (x, i);
29243 fprintf (file, pattern, INTVAL (element));
29246 return 1;
29249 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29250 HFmode constant pool entries are actually loaded with ldr. */
29251 void
29252 arm_emit_fp16_const (rtx c)
29254 long bits;
29256 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29257 if (WORDS_BIG_ENDIAN)
29258 assemble_zeros (2);
29259 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29260 if (!WORDS_BIG_ENDIAN)
29261 assemble_zeros (2);
29264 const char *
29265 arm_output_load_gr (rtx *operands)
29267 rtx reg;
29268 rtx offset;
29269 rtx wcgr;
29270 rtx sum;
29272 if (!MEM_P (operands [1])
29273 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29274 || !REG_P (reg = XEXP (sum, 0))
29275 || !CONST_INT_P (offset = XEXP (sum, 1))
29276 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29277 return "wldrw%?\t%0, %1";
29279 /* Fix up an out-of-range load of a GR register. */
29280 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29281 wcgr = operands[0];
29282 operands[0] = reg;
29283 output_asm_insn ("ldr%?\t%0, %1", operands);
29285 operands[0] = wcgr;
29286 operands[1] = reg;
29287 output_asm_insn ("tmcr%?\t%0, %1", operands);
29288 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29290 return "";
29293 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29295 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29296 named arg and all anonymous args onto the stack.
29297 XXX I know the prologue shouldn't be pushing registers, but it is faster
29298 that way. */
29300 static void
29301 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29302 const function_arg_info &arg,
29303 int *pretend_size,
29304 int second_time ATTRIBUTE_UNUSED)
29306 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29307 int nregs;
29309 cfun->machine->uses_anonymous_args = 1;
29310 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29312 nregs = pcum->aapcs_ncrn;
29313 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29314 && (nregs & 1))
29316 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29317 if (res < 0 && warn_psabi)
29318 inform (input_location, "parameter passing for argument of "
29319 "type %qT changed in GCC 7.1", arg.type);
29320 else if (res > 0)
29322 nregs++;
29323 if (res > 1 && warn_psabi)
29324 inform (input_location,
29325 "parameter passing for argument of type "
29326 "%qT changed in GCC 9.1", arg.type);
29330 else
29331 nregs = pcum->nregs;
29333 if (nregs < NUM_ARG_REGS)
29334 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29337 /* We can't rely on the caller doing the proper promotion when
29338 using APCS or ATPCS. */
29340 static bool
29341 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29343 return !TARGET_AAPCS_BASED;
29346 static machine_mode
29347 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29348 machine_mode mode,
29349 int *punsignedp ATTRIBUTE_UNUSED,
29350 const_tree fntype ATTRIBUTE_UNUSED,
29351 int for_return ATTRIBUTE_UNUSED)
29353 if (GET_MODE_CLASS (mode) == MODE_INT
29354 && GET_MODE_SIZE (mode) < 4)
29355 return SImode;
29357 return mode;
29361 static bool
29362 arm_default_short_enums (void)
29364 return ARM_DEFAULT_SHORT_ENUMS;
29368 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29370 static bool
29371 arm_align_anon_bitfield (void)
29373 return TARGET_AAPCS_BASED;
29377 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29379 static tree
29380 arm_cxx_guard_type (void)
29382 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29386 /* The EABI says test the least significant bit of a guard variable. */
29388 static bool
29389 arm_cxx_guard_mask_bit (void)
29391 return TARGET_AAPCS_BASED;
29395 /* The EABI specifies that all array cookies are 8 bytes long. */
29397 static tree
29398 arm_get_cookie_size (tree type)
29400 tree size;
29402 if (!TARGET_AAPCS_BASED)
29403 return default_cxx_get_cookie_size (type);
29405 size = build_int_cst (sizetype, 8);
29406 return size;
29410 /* The EABI says that array cookies should also contain the element size. */
29412 static bool
29413 arm_cookie_has_size (void)
29415 return TARGET_AAPCS_BASED;
29419 /* The EABI says constructors and destructors should return a pointer to
29420 the object constructed/destroyed. */
29422 static bool
29423 arm_cxx_cdtor_returns_this (void)
29425 return TARGET_AAPCS_BASED;
29428 /* The EABI says that an inline function may never be the key
29429 method. */
29431 static bool
29432 arm_cxx_key_method_may_be_inline (void)
29434 return !TARGET_AAPCS_BASED;
29437 static void
29438 arm_cxx_determine_class_data_visibility (tree decl)
29440 if (!TARGET_AAPCS_BASED
29441 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29442 return;
29444 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29445 is exported. However, on systems without dynamic vague linkage,
29446 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29447 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29448 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29449 else
29450 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29451 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29454 static bool
29455 arm_cxx_class_data_always_comdat (void)
29457 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29458 vague linkage if the class has no key function. */
29459 return !TARGET_AAPCS_BASED;
29463 /* The EABI says __aeabi_atexit should be used to register static
29464 destructors. */
29466 static bool
29467 arm_cxx_use_aeabi_atexit (void)
29469 return TARGET_AAPCS_BASED;
29473 void
29474 arm_set_return_address (rtx source, rtx scratch)
29476 arm_stack_offsets *offsets;
29477 HOST_WIDE_INT delta;
29478 rtx addr, mem;
29479 unsigned long saved_regs;
29481 offsets = arm_get_frame_offsets ();
29482 saved_regs = offsets->saved_regs_mask;
29484 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29485 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29486 else
29488 if (frame_pointer_needed)
29489 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29490 else
29492 /* LR will be the first saved register. */
29493 delta = offsets->outgoing_args - (offsets->frame + 4);
29496 if (delta >= 4096)
29498 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29499 GEN_INT (delta & ~4095)));
29500 addr = scratch;
29501 delta &= 4095;
29503 else
29504 addr = stack_pointer_rtx;
29506 addr = plus_constant (Pmode, addr, delta);
29509 /* The store needs to be marked to prevent DSE from deleting
29510 it as dead if it is based on fp. */
29511 mem = gen_frame_mem (Pmode, addr);
29512 MEM_VOLATILE_P (mem) = true;
29513 emit_move_insn (mem, source);
29518 void
29519 thumb_set_return_address (rtx source, rtx scratch)
29521 arm_stack_offsets *offsets;
29522 HOST_WIDE_INT delta;
29523 HOST_WIDE_INT limit;
29524 int reg;
29525 rtx addr, mem;
29526 unsigned long mask;
29528 emit_use (source);
29530 offsets = arm_get_frame_offsets ();
29531 mask = offsets->saved_regs_mask;
29532 if (mask & (1 << LR_REGNUM))
29534 limit = 1024;
29535 /* Find the saved regs. */
29536 if (frame_pointer_needed)
29538 delta = offsets->soft_frame - offsets->saved_args;
29539 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29540 if (TARGET_THUMB1)
29541 limit = 128;
29543 else
29545 delta = offsets->outgoing_args - offsets->saved_args;
29546 reg = SP_REGNUM;
29548 /* Allow for the stack frame. */
29549 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29550 delta -= 16;
29551 /* The link register is always the first saved register. */
29552 delta -= 4;
29554 /* Construct the address. */
29555 addr = gen_rtx_REG (SImode, reg);
29556 if (delta > limit)
29558 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29559 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29560 addr = scratch;
29562 else
29563 addr = plus_constant (Pmode, addr, delta);
29565 /* The store needs to be marked to prevent DSE from deleting
29566 it as dead if it is based on fp. */
29567 mem = gen_frame_mem (Pmode, addr);
29568 MEM_VOLATILE_P (mem) = true;
29569 emit_move_insn (mem, source);
29571 else
29572 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29575 /* Implements target hook vector_mode_supported_p. */
29576 bool
29577 arm_vector_mode_supported_p (machine_mode mode)
29579 /* Neon also supports V2SImode, etc. listed in the clause below. */
29580 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29581 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29582 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29583 || mode == V8BFmode))
29584 return true;
29586 if ((TARGET_NEON || TARGET_IWMMXT)
29587 && ((mode == V2SImode)
29588 || (mode == V4HImode)
29589 || (mode == V8QImode)))
29590 return true;
29592 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29593 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29594 || mode == V2HAmode))
29595 return true;
29597 if (TARGET_HAVE_MVE
29598 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29599 return true;
29601 if (TARGET_HAVE_MVE_FLOAT
29602 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29603 return true;
29605 return false;
29608 /* Implements target hook array_mode_supported_p. */
29610 static bool
29611 arm_array_mode_supported_p (machine_mode mode,
29612 unsigned HOST_WIDE_INT nelems)
29614 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29615 for now, as the lane-swapping logic needs to be extended in the expanders.
29616 See PR target/82518. */
29617 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29618 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29619 && (nelems >= 2 && nelems <= 4))
29620 return true;
29622 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29623 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29624 return true;
29626 return false;
29629 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29630 registers when autovectorizing for Neon, at least until multiple vector
29631 widths are supported properly by the middle-end. */
29633 static machine_mode
29634 arm_preferred_simd_mode (scalar_mode mode)
29636 if (TARGET_NEON)
29637 switch (mode)
29639 case E_HFmode:
29640 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29641 case E_SFmode:
29642 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29643 case E_SImode:
29644 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29645 case E_HImode:
29646 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29647 case E_QImode:
29648 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29649 case E_DImode:
29650 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29651 return V2DImode;
29652 break;
29654 default:;
29657 if (TARGET_REALLY_IWMMXT)
29658 switch (mode)
29660 case E_SImode:
29661 return V2SImode;
29662 case E_HImode:
29663 return V4HImode;
29664 case E_QImode:
29665 return V8QImode;
29667 default:;
29670 if (TARGET_HAVE_MVE)
29671 switch (mode)
29673 case E_QImode:
29674 return V16QImode;
29675 case E_HImode:
29676 return V8HImode;
29677 case E_SImode:
29678 return V4SImode;
29680 default:;
29683 if (TARGET_HAVE_MVE_FLOAT)
29684 switch (mode)
29686 case E_HFmode:
29687 return V8HFmode;
29688 case E_SFmode:
29689 return V4SFmode;
29691 default:;
29694 return word_mode;
29697 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29699 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29700 using r0-r4 for function arguments, r7 for the stack frame and don't have
29701 enough left over to do doubleword arithmetic. For Thumb-2 all the
29702 potentially problematic instructions accept high registers so this is not
29703 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29704 that require many low registers. */
29705 static bool
29706 arm_class_likely_spilled_p (reg_class_t rclass)
29708 if ((TARGET_THUMB1 && rclass == LO_REGS)
29709 || rclass == CC_REG)
29710 return true;
29712 return default_class_likely_spilled_p (rclass);
29715 /* Implements target hook small_register_classes_for_mode_p. */
29716 bool
29717 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29719 return TARGET_THUMB1;
29722 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29723 ARM insns and therefore guarantee that the shift count is modulo 256.
29724 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29725 guarantee no particular behavior for out-of-range counts. */
29727 static unsigned HOST_WIDE_INT
29728 arm_shift_truncation_mask (machine_mode mode)
29730 return mode == SImode ? 255 : 0;
29734 /* Map internal gcc register numbers to DWARF2 register numbers. */
29736 unsigned int
29737 arm_debugger_regno (unsigned int regno)
29739 if (regno < 16)
29740 return regno;
29742 if (IS_VFP_REGNUM (regno))
29744 /* See comment in arm_dwarf_register_span. */
29745 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29746 return 64 + regno - FIRST_VFP_REGNUM;
29747 else
29748 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29751 if (IS_IWMMXT_GR_REGNUM (regno))
29752 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29754 if (IS_IWMMXT_REGNUM (regno))
29755 return 112 + regno - FIRST_IWMMXT_REGNUM;
29757 if (IS_PAC_REGNUM (regno))
29758 return DWARF_PAC_REGNUM;
29760 return DWARF_FRAME_REGISTERS;
29763 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29764 GCC models tham as 64 32-bit registers, so we need to describe this to
29765 the DWARF generation code. Other registers can use the default. */
29766 static rtx
29767 arm_dwarf_register_span (rtx rtl)
29769 machine_mode mode;
29770 unsigned regno;
29771 rtx parts[16];
29772 int nregs;
29773 int i;
29775 regno = REGNO (rtl);
29776 if (!IS_VFP_REGNUM (regno))
29777 return NULL_RTX;
29779 /* XXX FIXME: The EABI defines two VFP register ranges:
29780 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29781 256-287: D0-D31
29782 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29783 corresponding D register. Until GDB supports this, we shall use the
29784 legacy encodings. We also use these encodings for D0-D15 for
29785 compatibility with older debuggers. */
29786 mode = GET_MODE (rtl);
29787 if (GET_MODE_SIZE (mode) < 8)
29788 return NULL_RTX;
29790 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29792 nregs = GET_MODE_SIZE (mode) / 4;
29793 for (i = 0; i < nregs; i += 2)
29794 if (TARGET_BIG_END)
29796 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29797 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29799 else
29801 parts[i] = gen_rtx_REG (SImode, regno + i);
29802 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29805 else
29807 nregs = GET_MODE_SIZE (mode) / 8;
29808 for (i = 0; i < nregs; i++)
29809 parts[i] = gen_rtx_REG (DImode, regno + i);
29812 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29815 #if ARM_UNWIND_INFO
29816 /* Emit unwind directives for a store-multiple instruction or stack pointer
29817 push during alignment.
29818 These should only ever be generated by the function prologue code, so
29819 expect them to have a particular form.
29820 The store-multiple instruction sometimes pushes pc as the last register,
29821 although it should not be tracked into unwind information, or for -Os
29822 sometimes pushes some dummy registers before first register that needs
29823 to be tracked in unwind information; such dummy registers are there just
29824 to avoid separate stack adjustment, and will not be restored in the
29825 epilogue. */
29827 static void
29828 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29830 int i;
29831 HOST_WIDE_INT offset;
29832 HOST_WIDE_INT nregs;
29833 int reg_size;
29834 unsigned reg;
29835 unsigned lastreg;
29836 unsigned padfirst = 0, padlast = 0;
29837 rtx e;
29839 e = XVECEXP (p, 0, 0);
29840 gcc_assert (GET_CODE (e) == SET);
29842 /* First insn will adjust the stack pointer. */
29843 gcc_assert (GET_CODE (e) == SET
29844 && REG_P (SET_DEST (e))
29845 && REGNO (SET_DEST (e)) == SP_REGNUM
29846 && GET_CODE (SET_SRC (e)) == PLUS);
29848 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29849 nregs = XVECLEN (p, 0) - 1;
29850 gcc_assert (nregs);
29852 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29853 if (reg < 16 || IS_PAC_REGNUM (reg))
29855 /* For -Os dummy registers can be pushed at the beginning to
29856 avoid separate stack pointer adjustment. */
29857 e = XVECEXP (p, 0, 1);
29858 e = XEXP (SET_DEST (e), 0);
29859 if (GET_CODE (e) == PLUS)
29860 padfirst = INTVAL (XEXP (e, 1));
29861 gcc_assert (padfirst == 0 || optimize_size);
29862 /* The function prologue may also push pc, but not annotate it as it is
29863 never restored. We turn this into a stack pointer adjustment. */
29864 e = XVECEXP (p, 0, nregs);
29865 e = XEXP (SET_DEST (e), 0);
29866 if (GET_CODE (e) == PLUS)
29867 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29868 else
29869 padlast = offset - 4;
29870 gcc_assert (padlast == 0 || padlast == 4);
29871 if (padlast == 4)
29872 fprintf (out_file, "\t.pad #4\n");
29873 reg_size = 4;
29874 fprintf (out_file, "\t.save {");
29876 else if (IS_VFP_REGNUM (reg))
29878 reg_size = 8;
29879 fprintf (out_file, "\t.vsave {");
29881 else
29882 /* Unknown register type. */
29883 gcc_unreachable ();
29885 /* If the stack increment doesn't match the size of the saved registers,
29886 something has gone horribly wrong. */
29887 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29889 offset = padfirst;
29890 lastreg = 0;
29891 /* The remaining insns will describe the stores. */
29892 for (i = 1; i <= nregs; i++)
29894 /* Expect (set (mem <addr>) (reg)).
29895 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29896 e = XVECEXP (p, 0, i);
29897 gcc_assert (GET_CODE (e) == SET
29898 && MEM_P (SET_DEST (e))
29899 && REG_P (SET_SRC (e)));
29901 reg = REGNO (SET_SRC (e));
29902 gcc_assert (reg >= lastreg);
29904 if (i != 1)
29905 fprintf (out_file, ", ");
29906 /* We can't use %r for vfp because we need to use the
29907 double precision register names. */
29908 if (IS_VFP_REGNUM (reg))
29909 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29910 else if (IS_PAC_REGNUM (reg))
29911 asm_fprintf (asm_out_file, "ra_auth_code");
29912 else
29913 asm_fprintf (out_file, "%r", reg);
29915 if (flag_checking)
29917 /* Check that the addresses are consecutive. */
29918 e = XEXP (SET_DEST (e), 0);
29919 if (GET_CODE (e) == PLUS)
29920 gcc_assert (REG_P (XEXP (e, 0))
29921 && REGNO (XEXP (e, 0)) == SP_REGNUM
29922 && CONST_INT_P (XEXP (e, 1))
29923 && offset == INTVAL (XEXP (e, 1)));
29924 else
29925 gcc_assert (i == 1
29926 && REG_P (e)
29927 && REGNO (e) == SP_REGNUM);
29928 offset += reg_size;
29931 fprintf (out_file, "}\n");
29932 if (padfirst)
29933 fprintf (out_file, "\t.pad #%d\n", padfirst);
29936 /* Emit unwind directives for a SET. */
29938 static void
29939 arm_unwind_emit_set (FILE * out_file, rtx p)
29941 rtx e0;
29942 rtx e1;
29943 unsigned reg;
29945 e0 = XEXP (p, 0);
29946 e1 = XEXP (p, 1);
29947 switch (GET_CODE (e0))
29949 case MEM:
29950 /* Pushing a single register. */
29951 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29952 || !REG_P (XEXP (XEXP (e0, 0), 0))
29953 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29954 abort ();
29956 asm_fprintf (out_file, "\t.save ");
29957 if (IS_VFP_REGNUM (REGNO (e1)))
29958 asm_fprintf(out_file, "{d%d}\n",
29959 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29960 else
29961 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29962 break;
29964 case REG:
29965 if (REGNO (e0) == SP_REGNUM)
29967 /* A stack increment. */
29968 if (GET_CODE (e1) != PLUS
29969 || !REG_P (XEXP (e1, 0))
29970 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29971 || !CONST_INT_P (XEXP (e1, 1)))
29972 abort ();
29974 asm_fprintf (out_file, "\t.pad #%wd\n",
29975 -INTVAL (XEXP (e1, 1)));
29977 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29979 HOST_WIDE_INT offset;
29981 if (GET_CODE (e1) == PLUS)
29983 if (!REG_P (XEXP (e1, 0))
29984 || !CONST_INT_P (XEXP (e1, 1)))
29985 abort ();
29986 reg = REGNO (XEXP (e1, 0));
29987 offset = INTVAL (XEXP (e1, 1));
29988 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29989 HARD_FRAME_POINTER_REGNUM, reg,
29990 offset);
29992 else if (REG_P (e1))
29994 reg = REGNO (e1);
29995 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29996 HARD_FRAME_POINTER_REGNUM, reg);
29998 else
29999 abort ();
30001 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30003 /* Move from sp to reg. */
30004 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30006 else if (GET_CODE (e1) == PLUS
30007 && REG_P (XEXP (e1, 0))
30008 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30009 && CONST_INT_P (XEXP (e1, 1)))
30011 /* Set reg to offset from sp. */
30012 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30013 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30015 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30017 if (cfun->machine->pacspval_needed)
30018 asm_fprintf (out_file, "\t.pacspval\n");
30020 else
30021 abort ();
30022 break;
30024 default:
30025 abort ();
30030 /* Emit unwind directives for the given insn. */
30032 static void
30033 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30035 rtx note, pat;
30036 bool handled_one = false;
30038 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30039 return;
30041 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30042 && (TREE_NOTHROW (current_function_decl)
30043 || crtl->all_throwers_are_sibcalls))
30044 return;
30046 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30047 return;
30049 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30051 switch (REG_NOTE_KIND (note))
30053 case REG_FRAME_RELATED_EXPR:
30054 pat = XEXP (note, 0);
30055 goto found;
30057 case REG_CFA_REGISTER:
30058 pat = XEXP (note, 0);
30059 if (pat == NULL)
30061 pat = PATTERN (insn);
30062 if (GET_CODE (pat) == PARALLEL)
30063 pat = XVECEXP (pat, 0, 0);
30066 /* Only emitted for IS_STACKALIGN re-alignment. */
30068 rtx dest, src;
30069 unsigned reg;
30071 src = SET_SRC (pat);
30072 dest = SET_DEST (pat);
30074 gcc_assert (src == stack_pointer_rtx
30075 || IS_PAC_REGNUM (REGNO (src)));
30076 reg = REGNO (dest);
30078 if (IS_PAC_REGNUM (REGNO (src)))
30079 arm_unwind_emit_set (out_file, PATTERN (insn));
30080 else
30081 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30082 reg + 0x90, reg);
30084 handled_one = true;
30085 break;
30087 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30088 to get correct dwarf information for shrink-wrap. We should not
30089 emit unwind information for it because these are used either for
30090 pretend arguments or notes to adjust sp and restore registers from
30091 stack. */
30092 case REG_CFA_DEF_CFA:
30093 case REG_CFA_ADJUST_CFA:
30094 case REG_CFA_RESTORE:
30095 return;
30097 case REG_CFA_EXPRESSION:
30098 case REG_CFA_OFFSET:
30099 /* ??? Only handling here what we actually emit. */
30100 gcc_unreachable ();
30102 default:
30103 break;
30106 if (handled_one)
30107 return;
30108 pat = PATTERN (insn);
30109 found:
30111 switch (GET_CODE (pat))
30113 case SET:
30114 arm_unwind_emit_set (out_file, pat);
30115 break;
30117 case SEQUENCE:
30118 /* Store multiple. */
30119 arm_unwind_emit_sequence (out_file, pat);
30120 break;
30122 default:
30123 abort();
30128 /* Output a reference from a function exception table to the type_info
30129 object X. The EABI specifies that the symbol should be relocated by
30130 an R_ARM_TARGET2 relocation. */
30132 static bool
30133 arm_output_ttype (rtx x)
30135 fputs ("\t.word\t", asm_out_file);
30136 output_addr_const (asm_out_file, x);
30137 /* Use special relocations for symbol references. */
30138 if (!CONST_INT_P (x))
30139 fputs ("(TARGET2)", asm_out_file);
30140 fputc ('\n', asm_out_file);
30142 return TRUE;
30145 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30147 static void
30148 arm_asm_emit_except_personality (rtx personality)
30150 fputs ("\t.personality\t", asm_out_file);
30151 output_addr_const (asm_out_file, personality);
30152 fputc ('\n', asm_out_file);
30154 #endif /* ARM_UNWIND_INFO */
30156 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30158 static void
30159 arm_asm_init_sections (void)
30161 #if ARM_UNWIND_INFO
30162 exception_section = get_unnamed_section (0, output_section_asm_op,
30163 "\t.handlerdata");
30164 #endif /* ARM_UNWIND_INFO */
30166 #ifdef OBJECT_FORMAT_ELF
30167 if (target_pure_code)
30168 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30169 #endif
30172 /* Output unwind directives for the start/end of a function. */
30174 void
30175 arm_output_fn_unwind (FILE * f, bool prologue)
30177 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30178 return;
30180 if (prologue)
30181 fputs ("\t.fnstart\n", f);
30182 else
30184 /* If this function will never be unwound, then mark it as such.
30185 The came condition is used in arm_unwind_emit to suppress
30186 the frame annotations. */
30187 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30188 && (TREE_NOTHROW (current_function_decl)
30189 || crtl->all_throwers_are_sibcalls))
30190 fputs("\t.cantunwind\n", f);
30192 fputs ("\t.fnend\n", f);
30196 static bool
30197 arm_emit_tls_decoration (FILE *fp, rtx x)
30199 enum tls_reloc reloc;
30200 rtx val;
30202 val = XVECEXP (x, 0, 0);
30203 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30205 output_addr_const (fp, val);
30207 switch (reloc)
30209 case TLS_GD32:
30210 fputs ("(tlsgd)", fp);
30211 break;
30212 case TLS_GD32_FDPIC:
30213 fputs ("(tlsgd_fdpic)", fp);
30214 break;
30215 case TLS_LDM32:
30216 fputs ("(tlsldm)", fp);
30217 break;
30218 case TLS_LDM32_FDPIC:
30219 fputs ("(tlsldm_fdpic)", fp);
30220 break;
30221 case TLS_LDO32:
30222 fputs ("(tlsldo)", fp);
30223 break;
30224 case TLS_IE32:
30225 fputs ("(gottpoff)", fp);
30226 break;
30227 case TLS_IE32_FDPIC:
30228 fputs ("(gottpoff_fdpic)", fp);
30229 break;
30230 case TLS_LE32:
30231 fputs ("(tpoff)", fp);
30232 break;
30233 case TLS_DESCSEQ:
30234 fputs ("(tlsdesc)", fp);
30235 break;
30236 default:
30237 gcc_unreachable ();
30240 switch (reloc)
30242 case TLS_GD32:
30243 case TLS_LDM32:
30244 case TLS_IE32:
30245 case TLS_DESCSEQ:
30246 fputs (" + (. - ", fp);
30247 output_addr_const (fp, XVECEXP (x, 0, 2));
30248 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30249 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30250 output_addr_const (fp, XVECEXP (x, 0, 3));
30251 fputc (')', fp);
30252 break;
30253 default:
30254 break;
30257 return TRUE;
30260 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30262 static void
30263 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30265 gcc_assert (size == 4);
30266 fputs ("\t.word\t", file);
30267 output_addr_const (file, x);
30268 fputs ("(tlsldo)", file);
30271 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30273 static bool
30274 arm_output_addr_const_extra (FILE *fp, rtx x)
30276 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30277 return arm_emit_tls_decoration (fp, x);
30278 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30280 char label[256];
30281 int labelno = INTVAL (XVECEXP (x, 0, 0));
30283 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30284 assemble_name_raw (fp, label);
30286 return TRUE;
30288 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30290 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30291 if (GOT_PCREL)
30292 fputs ("+.", fp);
30293 fputs ("-(", fp);
30294 output_addr_const (fp, XVECEXP (x, 0, 0));
30295 fputc (')', fp);
30296 return TRUE;
30298 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30300 output_addr_const (fp, XVECEXP (x, 0, 0));
30301 if (GOT_PCREL)
30302 fputs ("+.", fp);
30303 fputs ("-(", fp);
30304 output_addr_const (fp, XVECEXP (x, 0, 1));
30305 fputc (')', fp);
30306 return TRUE;
30308 else if (GET_CODE (x) == CONST_VECTOR)
30309 return arm_emit_vector_const (fp, x);
30311 return FALSE;
30314 /* Output assembly for a shift instruction.
30315 SET_FLAGS determines how the instruction modifies the condition codes.
30316 0 - Do not set condition codes.
30317 1 - Set condition codes.
30318 2 - Use smallest instruction. */
30319 const char *
30320 arm_output_shift(rtx * operands, int set_flags)
30322 char pattern[100];
30323 static const char flag_chars[3] = {'?', '.', '!'};
30324 const char *shift;
30325 HOST_WIDE_INT val;
30326 char c;
30328 c = flag_chars[set_flags];
30329 shift = shift_op(operands[3], &val);
30330 if (shift)
30332 if (val != -1)
30333 operands[2] = GEN_INT(val);
30334 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30336 else
30337 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30339 output_asm_insn (pattern, operands);
30340 return "";
30343 /* Output assembly for a WMMX immediate shift instruction. */
30344 const char *
30345 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30347 int shift = INTVAL (operands[2]);
30348 char templ[50];
30349 machine_mode opmode = GET_MODE (operands[0]);
30351 gcc_assert (shift >= 0);
30353 /* If the shift value in the register versions is > 63 (for D qualifier),
30354 31 (for W qualifier) or 15 (for H qualifier). */
30355 if (((opmode == V4HImode) && (shift > 15))
30356 || ((opmode == V2SImode) && (shift > 31))
30357 || ((opmode == DImode) && (shift > 63)))
30359 if (wror_or_wsra)
30361 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30362 output_asm_insn (templ, operands);
30363 if (opmode == DImode)
30365 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30366 output_asm_insn (templ, operands);
30369 else
30371 /* The destination register will contain all zeros. */
30372 sprintf (templ, "wzero\t%%0");
30373 output_asm_insn (templ, operands);
30375 return "";
30378 if ((opmode == DImode) && (shift > 32))
30380 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30381 output_asm_insn (templ, operands);
30382 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30383 output_asm_insn (templ, operands);
30385 else
30387 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30388 output_asm_insn (templ, operands);
30390 return "";
30393 /* Output assembly for a WMMX tinsr instruction. */
30394 const char *
30395 arm_output_iwmmxt_tinsr (rtx *operands)
30397 int mask = INTVAL (operands[3]);
30398 int i;
30399 char templ[50];
30400 int units = mode_nunits[GET_MODE (operands[0])];
30401 gcc_assert ((mask & (mask - 1)) == 0);
30402 for (i = 0; i < units; ++i)
30404 if ((mask & 0x01) == 1)
30406 break;
30408 mask >>= 1;
30410 gcc_assert (i < units);
30412 switch (GET_MODE (operands[0]))
30414 case E_V8QImode:
30415 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30416 break;
30417 case E_V4HImode:
30418 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30419 break;
30420 case E_V2SImode:
30421 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30422 break;
30423 default:
30424 gcc_unreachable ();
30425 break;
30427 output_asm_insn (templ, operands);
30429 return "";
30432 /* Output a Thumb-1 casesi dispatch sequence. */
30433 const char *
30434 thumb1_output_casesi (rtx *operands)
30436 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30438 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30440 switch (GET_MODE(diff_vec))
30442 case E_QImode:
30443 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30444 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30445 case E_HImode:
30446 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30447 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30448 case E_SImode:
30449 return "bl\t%___gnu_thumb1_case_si";
30450 default:
30451 gcc_unreachable ();
30455 /* Output a Thumb-2 casesi instruction. */
30456 const char *
30457 thumb2_output_casesi (rtx *operands)
30459 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30461 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30463 output_asm_insn ("cmp\t%0, %1", operands);
30464 output_asm_insn ("bhi\t%l3", operands);
30465 switch (GET_MODE(diff_vec))
30467 case E_QImode:
30468 return "tbb\t[%|pc, %0]";
30469 case E_HImode:
30470 return "tbh\t[%|pc, %0, lsl #1]";
30471 case E_SImode:
30472 if (flag_pic)
30474 output_asm_insn ("adr\t%4, %l2", operands);
30475 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30476 output_asm_insn ("add\t%4, %4, %5", operands);
30477 return "bx\t%4";
30479 else
30481 output_asm_insn ("adr\t%4, %l2", operands);
30482 return "ldr\t%|pc, [%4, %0, lsl #2]";
30484 default:
30485 gcc_unreachable ();
30489 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30490 per-core tuning structs. */
30491 static int
30492 arm_issue_rate (void)
30494 return current_tune->issue_rate;
30497 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30498 static int
30499 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30501 if (DEBUG_INSN_P (insn))
30502 return more;
30504 rtx_code code = GET_CODE (PATTERN (insn));
30505 if (code == USE || code == CLOBBER)
30506 return more;
30508 if (get_attr_type (insn) == TYPE_NO_INSN)
30509 return more;
30511 return more - 1;
30514 /* Return how many instructions should scheduler lookahead to choose the
30515 best one. */
30516 static int
30517 arm_first_cycle_multipass_dfa_lookahead (void)
30519 int issue_rate = arm_issue_rate ();
30521 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30524 /* Enable modeling of L2 auto-prefetcher. */
30525 static int
30526 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30528 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30531 const char *
30532 arm_mangle_type (const_tree type)
30534 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30535 has to be managled as if it is in the "std" namespace. */
30536 if (TARGET_AAPCS_BASED
30537 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30538 return "St9__va_list";
30540 /* Half-precision floating point types. */
30541 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30543 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30544 return NULL;
30545 if (TYPE_MODE (type) == BFmode)
30546 return "u6__bf16";
30547 else
30548 return "Dh";
30551 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30552 builtin type. */
30553 if (TYPE_NAME (type) != NULL)
30554 return arm_mangle_builtin_type (type);
30556 /* Use the default mangling. */
30557 return NULL;
30560 /* Order of allocation of core registers for Thumb: this allocation is
30561 written over the corresponding initial entries of the array
30562 initialized with REG_ALLOC_ORDER. We allocate all low registers
30563 first. Saving and restoring a low register is usually cheaper than
30564 using a call-clobbered high register. */
30566 static const int thumb_core_reg_alloc_order[] =
30568 3, 2, 1, 0, 4, 5, 6, 7,
30569 12, 14, 8, 9, 10, 11
30572 /* Adjust register allocation order when compiling for Thumb. */
30574 void
30575 arm_order_regs_for_local_alloc (void)
30577 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30578 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30579 if (TARGET_THUMB)
30580 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30581 sizeof (thumb_core_reg_alloc_order));
30584 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30586 bool
30587 arm_frame_pointer_required (void)
30589 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30590 return true;
30592 /* If the function receives nonlocal gotos, it needs to save the frame
30593 pointer in the nonlocal_goto_save_area object. */
30594 if (cfun->has_nonlocal_label)
30595 return true;
30597 /* The frame pointer is required for non-leaf APCS frames. */
30598 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30599 return true;
30601 /* If we are probing the stack in the prologue, we will have a faulting
30602 instruction prior to the stack adjustment and this requires a frame
30603 pointer if we want to catch the exception using the EABI unwinder. */
30604 if (!IS_INTERRUPT (arm_current_func_type ())
30605 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30606 || flag_stack_clash_protection)
30607 && arm_except_unwind_info (&global_options) == UI_TARGET
30608 && cfun->can_throw_non_call_exceptions)
30610 HOST_WIDE_INT size = get_frame_size ();
30612 /* That's irrelevant if there is no stack adjustment. */
30613 if (size <= 0)
30614 return false;
30616 /* That's relevant only if there is a stack probe. */
30617 if (crtl->is_leaf && !cfun->calls_alloca)
30619 /* We don't have the final size of the frame so adjust. */
30620 size += 32 * UNITS_PER_WORD;
30621 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30622 return true;
30624 else
30625 return true;
30628 return false;
30631 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30632 All modes except THUMB1 have conditional execution.
30633 If we have conditional arithmetic, return false before reload to
30634 enable some ifcvt transformations. */
30635 static bool
30636 arm_have_conditional_execution (void)
30638 bool has_cond_exec, enable_ifcvt_trans;
30640 /* Only THUMB1 cannot support conditional execution. */
30641 has_cond_exec = !TARGET_THUMB1;
30643 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30644 before reload. */
30645 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30647 return has_cond_exec && !enable_ifcvt_trans;
30650 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30651 static HOST_WIDE_INT
30652 arm_vector_alignment (const_tree type)
30654 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30656 if (TARGET_AAPCS_BASED)
30657 align = MIN (align, 64);
30659 return align;
30662 static unsigned int
30663 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30665 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30667 modes->safe_push (V16QImode);
30668 modes->safe_push (V8QImode);
30670 return 0;
30673 static bool
30674 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30676 /* Vectors which aren't in packed structures will not be less aligned than
30677 the natural alignment of their element type, so this is safe. */
30678 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30679 return !is_packed;
30681 return default_builtin_vector_alignment_reachable (type, is_packed);
30684 static bool
30685 arm_builtin_support_vector_misalignment (machine_mode mode,
30686 const_tree type, int misalignment,
30687 bool is_packed)
30689 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30691 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30693 if (is_packed)
30694 return align == 1;
30696 /* If the misalignment is unknown, we should be able to handle the access
30697 so long as it is not to a member of a packed data structure. */
30698 if (misalignment == -1)
30699 return true;
30701 /* Return true if the misalignment is a multiple of the natural alignment
30702 of the vector's element type. This is probably always going to be
30703 true in practice, since we've already established that this isn't a
30704 packed access. */
30705 return ((misalignment % align) == 0);
30708 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30709 is_packed);
30712 static void
30713 arm_conditional_register_usage (void)
30715 int regno;
30717 if (TARGET_THUMB1 && optimize_size)
30719 /* When optimizing for size on Thumb-1, it's better not
30720 to use the HI regs, because of the overhead of
30721 stacking them. */
30722 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30723 fixed_regs[regno] = call_used_regs[regno] = 1;
30726 /* The link register can be clobbered by any branch insn,
30727 but we have no way to track that at present, so mark
30728 it as unavailable. */
30729 if (TARGET_THUMB1)
30730 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30732 if (TARGET_32BIT && TARGET_VFP_BASE)
30734 /* VFPv3 registers are disabled when earlier VFP
30735 versions are selected due to the definition of
30736 LAST_VFP_REGNUM. */
30737 for (regno = FIRST_VFP_REGNUM;
30738 regno <= LAST_VFP_REGNUM; ++ regno)
30740 fixed_regs[regno] = 0;
30741 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30742 || regno >= FIRST_VFP_REGNUM + 32;
30744 if (TARGET_HAVE_MVE)
30745 fixed_regs[VPR_REGNUM] = 0;
30748 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30750 regno = FIRST_IWMMXT_GR_REGNUM;
30751 /* The 2002/10/09 revision of the XScale ABI has wCG0
30752 and wCG1 as call-preserved registers. The 2002/11/21
30753 revision changed this so that all wCG registers are
30754 scratch registers. */
30755 for (regno = FIRST_IWMMXT_GR_REGNUM;
30756 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30757 fixed_regs[regno] = 0;
30758 /* The XScale ABI has wR0 - wR9 as scratch registers,
30759 the rest as call-preserved registers. */
30760 for (regno = FIRST_IWMMXT_REGNUM;
30761 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30763 fixed_regs[regno] = 0;
30764 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30768 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30770 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30771 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30773 else if (TARGET_APCS_STACK)
30775 fixed_regs[10] = 1;
30776 call_used_regs[10] = 1;
30778 /* -mcaller-super-interworking reserves r11 for calls to
30779 _interwork_r11_call_via_rN(). Making the register global
30780 is an easy way of ensuring that it remains valid for all
30781 calls. */
30782 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30783 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30785 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30786 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30787 if (TARGET_CALLER_INTERWORKING)
30788 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30791 /* The Q and GE bits are only accessed via special ACLE patterns. */
30792 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30793 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30795 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30798 static reg_class_t
30799 arm_preferred_rename_class (reg_class_t rclass)
30801 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30802 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30803 and code size can be reduced. */
30804 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30805 return LO_REGS;
30806 else
30807 return NO_REGS;
30810 /* Compute the attribute "length" of insn "*push_multi".
30811 So this function MUST be kept in sync with that insn pattern. */
30813 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30815 int i, regno, hi_reg;
30816 int num_saves = XVECLEN (parallel_op, 0);
30818 /* ARM mode. */
30819 if (TARGET_ARM)
30820 return 4;
30821 /* Thumb1 mode. */
30822 if (TARGET_THUMB1)
30823 return 2;
30825 /* Thumb2 mode. */
30826 regno = REGNO (first_op);
30827 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30828 list is 8-bit. Normally this means all registers in the list must be
30829 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30830 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30831 with 16-bit encoding. */
30832 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30833 for (i = 1; i < num_saves && !hi_reg; i++)
30835 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30836 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30839 if (!hi_reg)
30840 return 2;
30841 return 4;
30844 /* Compute the attribute "length" of insn. Currently, this function is used
30845 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30846 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30847 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30848 true if OPERANDS contains insn which explicit updates base register. */
30851 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30853 /* ARM mode. */
30854 if (TARGET_ARM)
30855 return 4;
30856 /* Thumb1 mode. */
30857 if (TARGET_THUMB1)
30858 return 2;
30860 rtx parallel_op = operands[0];
30861 /* Initialize to elements number of PARALLEL. */
30862 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30863 /* Initialize the value to base register. */
30864 unsigned regno = REGNO (operands[1]);
30865 /* Skip return and write back pattern.
30866 We only need register pop pattern for later analysis. */
30867 unsigned first_indx = 0;
30868 first_indx += return_pc ? 1 : 0;
30869 first_indx += write_back_p ? 1 : 0;
30871 /* A pop operation can be done through LDM or POP. If the base register is SP
30872 and if it's with write back, then a LDM will be alias of POP. */
30873 bool pop_p = (regno == SP_REGNUM && write_back_p);
30874 bool ldm_p = !pop_p;
30876 /* Check base register for LDM. */
30877 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30878 return 4;
30880 /* Check each register in the list. */
30881 for (; indx >= first_indx; indx--)
30883 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30884 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30885 comment in arm_attr_length_push_multi. */
30886 if (REGNO_REG_CLASS (regno) == HI_REGS
30887 && (regno != PC_REGNUM || ldm_p))
30888 return 4;
30891 return 2;
30894 /* Compute the number of instructions emitted by output_move_double. */
30896 arm_count_output_move_double_insns (rtx *operands)
30898 int count;
30899 rtx ops[2];
30900 /* output_move_double may modify the operands array, so call it
30901 here on a copy of the array. */
30902 ops[0] = operands[0];
30903 ops[1] = operands[1];
30904 output_move_double (ops, false, &count);
30905 return count;
30908 /* Same as above, but operands are a register/memory pair in SImode.
30909 Assumes operands has the base register in position 0 and memory in position
30910 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30912 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30914 int count;
30915 rtx ops[2];
30916 int regnum, memnum;
30917 if (load)
30918 regnum = 0, memnum = 1;
30919 else
30920 regnum = 1, memnum = 0;
30921 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30922 ops[memnum] = adjust_address (operands[2], DImode, 0);
30923 output_move_double (ops, false, &count);
30924 return count;
30929 vfp3_const_double_for_fract_bits (rtx operand)
30931 REAL_VALUE_TYPE r0;
30933 if (!CONST_DOUBLE_P (operand))
30934 return 0;
30936 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30937 if (exact_real_inverse (DFmode, &r0)
30938 && !REAL_VALUE_NEGATIVE (r0))
30940 if (exact_real_truncate (DFmode, &r0))
30942 HOST_WIDE_INT value = real_to_integer (&r0);
30943 value = value & 0xffffffff;
30944 if ((value != 0) && ( (value & (value - 1)) == 0))
30946 int ret = exact_log2 (value);
30947 gcc_assert (IN_RANGE (ret, 0, 31));
30948 return ret;
30952 return 0;
30955 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30956 log2 is in [1, 32], return that log2. Otherwise return -1.
30957 This is used in the patterns for vcvt.s32.f32 floating-point to
30958 fixed-point conversions. */
30961 vfp3_const_double_for_bits (rtx x)
30963 const REAL_VALUE_TYPE *r;
30965 if (!CONST_DOUBLE_P (x))
30966 return -1;
30968 r = CONST_DOUBLE_REAL_VALUE (x);
30970 if (REAL_VALUE_NEGATIVE (*r)
30971 || REAL_VALUE_ISNAN (*r)
30972 || REAL_VALUE_ISINF (*r)
30973 || !real_isinteger (r, SFmode))
30974 return -1;
30976 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30978 /* The exact_log2 above will have returned -1 if this is
30979 not an exact log2. */
30980 if (!IN_RANGE (hwint, 1, 32))
30981 return -1;
30983 return hwint;
30987 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30989 static void
30990 arm_pre_atomic_barrier (enum memmodel model)
30992 if (need_atomic_barrier_p (model, true))
30993 emit_insn (gen_memory_barrier ());
30996 static void
30997 arm_post_atomic_barrier (enum memmodel model)
30999 if (need_atomic_barrier_p (model, false))
31000 emit_insn (gen_memory_barrier ());
31003 /* Emit the load-exclusive and store-exclusive instructions.
31004 Use acquire and release versions if necessary. */
31006 static void
31007 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31009 rtx (*gen) (rtx, rtx);
31011 if (acq)
31013 switch (mode)
31015 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31016 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31017 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31018 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31019 default:
31020 gcc_unreachable ();
31023 else
31025 switch (mode)
31027 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31028 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31029 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31030 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31031 default:
31032 gcc_unreachable ();
31036 emit_insn (gen (rval, mem));
31039 static void
31040 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31041 rtx mem, bool rel)
31043 rtx (*gen) (rtx, rtx, rtx);
31045 if (rel)
31047 switch (mode)
31049 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31050 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31051 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31052 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31053 default:
31054 gcc_unreachable ();
31057 else
31059 switch (mode)
31061 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31062 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31063 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31064 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31065 default:
31066 gcc_unreachable ();
31070 emit_insn (gen (bval, rval, mem));
31073 /* Mark the previous jump instruction as unlikely. */
31075 static void
31076 emit_unlikely_jump (rtx insn)
31078 rtx_insn *jump = emit_jump_insn (insn);
31079 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31082 /* Expand a compare and swap pattern. */
31084 void
31085 arm_expand_compare_and_swap (rtx operands[])
31087 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31088 machine_mode mode, cmp_mode;
31090 bval = operands[0];
31091 rval = operands[1];
31092 mem = operands[2];
31093 oldval = operands[3];
31094 newval = operands[4];
31095 is_weak = operands[5];
31096 mod_s = operands[6];
31097 mod_f = operands[7];
31098 mode = GET_MODE (mem);
31100 /* Normally the succ memory model must be stronger than fail, but in the
31101 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31102 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31104 if (TARGET_HAVE_LDACQ
31105 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31106 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31107 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31109 switch (mode)
31111 case E_QImode:
31112 case E_HImode:
31113 /* For narrow modes, we're going to perform the comparison in SImode,
31114 so do the zero-extension now. */
31115 rval = gen_reg_rtx (SImode);
31116 oldval = convert_modes (SImode, mode, oldval, true);
31117 /* FALLTHRU */
31119 case E_SImode:
31120 /* Force the value into a register if needed. We waited until after
31121 the zero-extension above to do this properly. */
31122 if (!arm_add_operand (oldval, SImode))
31123 oldval = force_reg (SImode, oldval);
31124 break;
31126 case E_DImode:
31127 if (!cmpdi_operand (oldval, mode))
31128 oldval = force_reg (mode, oldval);
31129 break;
31131 default:
31132 gcc_unreachable ();
31135 if (TARGET_THUMB1)
31136 cmp_mode = E_SImode;
31137 else
31138 cmp_mode = CC_Zmode;
31140 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31141 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31142 oldval, newval, is_weak, mod_s, mod_f));
31144 if (mode == QImode || mode == HImode)
31145 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31147 /* In all cases, we arrange for success to be signaled by Z set.
31148 This arrangement allows for the boolean result to be used directly
31149 in a subsequent branch, post optimization. For Thumb-1 targets, the
31150 boolean negation of the result is also stored in bval because Thumb-1
31151 backend lacks dependency tracking for CC flag due to flag-setting not
31152 being represented at RTL level. */
31153 if (TARGET_THUMB1)
31154 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31155 else
31157 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31158 emit_insn (gen_rtx_SET (bval, x));
31162 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31163 another memory store between the load-exclusive and store-exclusive can
31164 reset the monitor from Exclusive to Open state. This means we must wait
31165 until after reload to split the pattern, lest we get a register spill in
31166 the middle of the atomic sequence. Success of the compare and swap is
31167 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31168 for Thumb-1 targets (ie. negation of the boolean value returned by
31169 atomic_compare_and_swapmode standard pattern in operand 0). */
31171 void
31172 arm_split_compare_and_swap (rtx operands[])
31174 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31175 machine_mode mode;
31176 enum memmodel mod_s, mod_f;
31177 bool is_weak;
31178 rtx_code_label *label1, *label2;
31179 rtx x, cond;
31181 rval = operands[1];
31182 mem = operands[2];
31183 oldval = operands[3];
31184 newval = operands[4];
31185 is_weak = (operands[5] != const0_rtx);
31186 mod_s_rtx = operands[6];
31187 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31188 mod_f = memmodel_from_int (INTVAL (operands[7]));
31189 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31190 mode = GET_MODE (mem);
31192 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31194 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31195 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31197 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31198 a full barrier is emitted after the store-release. */
31199 if (is_armv8_sync)
31200 use_acquire = false;
31202 /* Checks whether a barrier is needed and emits one accordingly. */
31203 if (!(use_acquire || use_release))
31204 arm_pre_atomic_barrier (mod_s);
31206 label1 = NULL;
31207 if (!is_weak)
31209 label1 = gen_label_rtx ();
31210 emit_label (label1);
31212 label2 = gen_label_rtx ();
31214 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31216 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31217 as required to communicate with arm_expand_compare_and_swap. */
31218 if (TARGET_32BIT)
31220 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31221 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31222 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31223 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31224 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31226 else
31228 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31229 if (thumb1_cmpneg_operand (oldval, SImode))
31231 rtx src = rval;
31232 if (!satisfies_constraint_L (oldval))
31234 gcc_assert (satisfies_constraint_J (oldval));
31236 /* For such immediates, ADDS needs the source and destination regs
31237 to be the same.
31239 Normally this would be handled by RA, but this is all happening
31240 after RA. */
31241 emit_move_insn (neg_bval, rval);
31242 src = neg_bval;
31245 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31246 label2, cond));
31248 else
31250 emit_move_insn (neg_bval, const1_rtx);
31251 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31255 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31257 /* Weak or strong, we want EQ to be true for success, so that we
31258 match the flags that we got from the compare above. */
31259 if (TARGET_32BIT)
31261 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31262 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31263 emit_insn (gen_rtx_SET (cond, x));
31266 if (!is_weak)
31268 /* Z is set to boolean value of !neg_bval, as required to communicate
31269 with arm_expand_compare_and_swap. */
31270 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31271 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31274 if (!is_mm_relaxed (mod_f))
31275 emit_label (label2);
31277 /* Checks whether a barrier is needed and emits one accordingly. */
31278 if (is_armv8_sync
31279 || !(use_acquire || use_release))
31280 arm_post_atomic_barrier (mod_s);
31282 if (is_mm_relaxed (mod_f))
31283 emit_label (label2);
31286 /* Split an atomic operation pattern. Operation is given by CODE and is one
31287 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31288 operation). Operation is performed on the content at MEM and on VALUE
31289 following the memory model MODEL_RTX. The content at MEM before and after
31290 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31291 success of the operation is returned in COND. Using a scratch register or
31292 an operand register for these determines what result is returned for that
31293 pattern. */
31295 void
31296 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31297 rtx value, rtx model_rtx, rtx cond)
31299 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31300 machine_mode mode = GET_MODE (mem);
31301 machine_mode wmode = (mode == DImode ? DImode : SImode);
31302 rtx_code_label *label;
31303 bool all_low_regs, bind_old_new;
31304 rtx x;
31306 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31308 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31309 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31311 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31312 a full barrier is emitted after the store-release. */
31313 if (is_armv8_sync)
31314 use_acquire = false;
31316 /* Checks whether a barrier is needed and emits one accordingly. */
31317 if (!(use_acquire || use_release))
31318 arm_pre_atomic_barrier (model);
31320 label = gen_label_rtx ();
31321 emit_label (label);
31323 if (new_out)
31324 new_out = gen_lowpart (wmode, new_out);
31325 if (old_out)
31326 old_out = gen_lowpart (wmode, old_out);
31327 else
31328 old_out = new_out;
31329 value = simplify_gen_subreg (wmode, value, mode, 0);
31331 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31333 /* Does the operation require destination and first operand to use the same
31334 register? This is decided by register constraints of relevant insn
31335 patterns in thumb1.md. */
31336 gcc_assert (!new_out || REG_P (new_out));
31337 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31338 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31339 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31340 bind_old_new =
31341 (TARGET_THUMB1
31342 && code != SET
31343 && code != MINUS
31344 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31346 /* We want to return the old value while putting the result of the operation
31347 in the same register as the old value so copy the old value over to the
31348 destination register and use that register for the operation. */
31349 if (old_out && bind_old_new)
31351 emit_move_insn (new_out, old_out);
31352 old_out = new_out;
31355 switch (code)
31357 case SET:
31358 new_out = value;
31359 break;
31361 case NOT:
31362 x = gen_rtx_AND (wmode, old_out, value);
31363 emit_insn (gen_rtx_SET (new_out, x));
31364 x = gen_rtx_NOT (wmode, new_out);
31365 emit_insn (gen_rtx_SET (new_out, x));
31366 break;
31368 case MINUS:
31369 if (CONST_INT_P (value))
31371 value = gen_int_mode (-INTVAL (value), wmode);
31372 code = PLUS;
31374 /* FALLTHRU */
31376 case PLUS:
31377 if (mode == DImode)
31379 /* DImode plus/minus need to clobber flags. */
31380 /* The adddi3 and subdi3 patterns are incorrectly written so that
31381 they require matching operands, even when we could easily support
31382 three operands. Thankfully, this can be fixed up post-splitting,
31383 as the individual add+adc patterns do accept three operands and
31384 post-reload cprop can make these moves go away. */
31385 emit_move_insn (new_out, old_out);
31386 if (code == PLUS)
31387 x = gen_adddi3 (new_out, new_out, value);
31388 else
31389 x = gen_subdi3 (new_out, new_out, value);
31390 emit_insn (x);
31391 break;
31393 /* FALLTHRU */
31395 default:
31396 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31397 emit_insn (gen_rtx_SET (new_out, x));
31398 break;
31401 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31402 use_release);
31404 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31405 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31407 /* Checks whether a barrier is needed and emits one accordingly. */
31408 if (is_armv8_sync
31409 || !(use_acquire || use_release))
31410 arm_post_atomic_barrier (model);
31413 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31414 opt_machine_mode
31415 arm_mode_to_pred_mode (machine_mode mode)
31417 switch (GET_MODE_NUNITS (mode))
31419 case 16: return V16BImode;
31420 case 8: return V8BImode;
31421 case 4: return V4BImode;
31422 case 2: return V2QImode;
31424 return opt_machine_mode ();
31427 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31428 If CAN_INVERT, store either the result or its inverse in TARGET
31429 and return true if TARGET contains the inverse. If !CAN_INVERT,
31430 always store the result in TARGET, never its inverse.
31432 Note that the handling of floating-point comparisons is not
31433 IEEE compliant. */
31435 bool
31436 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31437 bool can_invert)
31439 machine_mode cmp_result_mode = GET_MODE (target);
31440 machine_mode cmp_mode = GET_MODE (op0);
31442 bool inverted;
31444 /* MVE supports more comparisons than Neon. */
31445 if (TARGET_HAVE_MVE)
31446 inverted = false;
31447 else
31448 switch (code)
31450 /* For these we need to compute the inverse of the requested
31451 comparison. */
31452 case UNORDERED:
31453 case UNLT:
31454 case UNLE:
31455 case UNGT:
31456 case UNGE:
31457 case UNEQ:
31458 case NE:
31459 code = reverse_condition_maybe_unordered (code);
31460 if (!can_invert)
31462 /* Recursively emit the inverted comparison into a temporary
31463 and then store its inverse in TARGET. This avoids reusing
31464 TARGET (which for integer NE could be one of the inputs). */
31465 rtx tmp = gen_reg_rtx (cmp_result_mode);
31466 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31467 gcc_unreachable ();
31468 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31469 return false;
31471 inverted = true;
31472 break;
31474 default:
31475 inverted = false;
31476 break;
31479 switch (code)
31481 /* These are natively supported by Neon for zero comparisons, but otherwise
31482 require the operands to be swapped. For MVE, we can only compare
31483 registers. */
31484 case LE:
31485 case LT:
31486 if (!TARGET_HAVE_MVE)
31487 if (op1 != CONST0_RTX (cmp_mode))
31489 code = swap_condition (code);
31490 std::swap (op0, op1);
31492 /* Fall through. */
31494 /* These are natively supported by Neon for both register and zero
31495 operands. MVE supports registers only. */
31496 case EQ:
31497 case GE:
31498 case GT:
31499 case NE:
31500 if (TARGET_HAVE_MVE)
31502 switch (GET_MODE_CLASS (cmp_mode))
31504 case MODE_VECTOR_INT:
31505 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31506 op0, force_reg (cmp_mode, op1)));
31507 break;
31508 case MODE_VECTOR_FLOAT:
31509 if (TARGET_HAVE_MVE_FLOAT)
31510 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31511 op0, force_reg (cmp_mode, op1)));
31512 else
31513 gcc_unreachable ();
31514 break;
31515 default:
31516 gcc_unreachable ();
31519 else
31520 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31521 return inverted;
31523 /* These are natively supported for register operands only.
31524 Comparisons with zero aren't useful and should be folded
31525 or canonicalized by target-independent code. */
31526 case GEU:
31527 case GTU:
31528 if (TARGET_HAVE_MVE)
31529 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31530 op0, force_reg (cmp_mode, op1)));
31531 else
31532 emit_insn (gen_neon_vc (code, cmp_mode, target,
31533 op0, force_reg (cmp_mode, op1)));
31534 return inverted;
31536 /* These require the operands to be swapped and likewise do not
31537 support comparisons with zero. */
31538 case LEU:
31539 case LTU:
31540 if (TARGET_HAVE_MVE)
31541 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31542 force_reg (cmp_mode, op1), op0));
31543 else
31544 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31545 target, force_reg (cmp_mode, op1), op0));
31546 return inverted;
31548 /* These need a combination of two comparisons. */
31549 case LTGT:
31550 case ORDERED:
31552 /* Operands are LTGT iff (a > b || a > b).
31553 Operands are ORDERED iff (a > b || a <= b). */
31554 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31555 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31556 rtx_code alt_code = (code == LTGT ? LT : LE);
31557 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31558 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31559 gcc_unreachable ();
31560 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31561 gt_res, alt_res)));
31562 return inverted;
31565 default:
31566 gcc_unreachable ();
31570 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31571 CMP_RESULT_MODE is the mode of the comparison result. */
31573 void
31574 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31576 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31577 arm_expand_vector_compare, and another one here. */
31578 rtx mask;
31580 if (TARGET_HAVE_MVE)
31581 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31582 else
31583 mask = gen_reg_rtx (cmp_result_mode);
31585 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31586 operands[4], operands[5], true);
31587 if (inverted)
31588 std::swap (operands[1], operands[2]);
31589 if (TARGET_NEON)
31590 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31591 mask, operands[1], operands[2]));
31592 else
31594 machine_mode cmp_mode = GET_MODE (operands[0]);
31596 switch (GET_MODE_CLASS (cmp_mode))
31598 case MODE_VECTOR_INT:
31599 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31600 operands[1], operands[2], mask));
31601 break;
31602 case MODE_VECTOR_FLOAT:
31603 if (TARGET_HAVE_MVE_FLOAT)
31604 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31605 operands[1], operands[2], mask));
31606 else
31607 gcc_unreachable ();
31608 break;
31609 default:
31610 gcc_unreachable ();
31615 #define MAX_VECT_LEN 16
31617 struct expand_vec_perm_d
31619 rtx target, op0, op1;
31620 vec_perm_indices perm;
31621 machine_mode vmode;
31622 bool one_vector_p;
31623 bool testing_p;
31626 /* Generate a variable permutation. */
31628 static void
31629 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31631 machine_mode vmode = GET_MODE (target);
31632 bool one_vector_p = rtx_equal_p (op0, op1);
31634 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31635 gcc_checking_assert (GET_MODE (op0) == vmode);
31636 gcc_checking_assert (GET_MODE (op1) == vmode);
31637 gcc_checking_assert (GET_MODE (sel) == vmode);
31638 gcc_checking_assert (TARGET_NEON);
31640 if (one_vector_p)
31642 if (vmode == V8QImode)
31643 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31644 else
31645 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31647 else
31649 rtx pair;
31651 if (vmode == V8QImode)
31653 pair = gen_reg_rtx (V16QImode);
31654 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31655 pair = gen_lowpart (TImode, pair);
31656 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31658 else
31660 pair = gen_reg_rtx (OImode);
31661 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31662 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31667 void
31668 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31670 machine_mode vmode = GET_MODE (target);
31671 unsigned int nelt = GET_MODE_NUNITS (vmode);
31672 bool one_vector_p = rtx_equal_p (op0, op1);
31673 rtx mask;
31675 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31676 numbering of elements for big-endian, we must reverse the order. */
31677 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31679 /* The VTBL instruction does not use a modulo index, so we must take care
31680 of that ourselves. */
31681 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31682 mask = gen_const_vec_duplicate (vmode, mask);
31683 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31685 arm_expand_vec_perm_1 (target, op0, op1, sel);
31688 /* Map lane ordering between architectural lane order, and GCC lane order,
31689 taking into account ABI. See comment above output_move_neon for details. */
31691 static int
31692 neon_endian_lane_map (machine_mode mode, int lane)
31694 if (BYTES_BIG_ENDIAN)
31696 int nelems = GET_MODE_NUNITS (mode);
31697 /* Reverse lane order. */
31698 lane = (nelems - 1 - lane);
31699 /* Reverse D register order, to match ABI. */
31700 if (GET_MODE_SIZE (mode) == 16)
31701 lane = lane ^ (nelems / 2);
31703 return lane;
31706 /* Some permutations index into pairs of vectors, this is a helper function
31707 to map indexes into those pairs of vectors. */
31709 static int
31710 neon_pair_endian_lane_map (machine_mode mode, int lane)
31712 int nelem = GET_MODE_NUNITS (mode);
31713 if (BYTES_BIG_ENDIAN)
31714 lane =
31715 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31716 return lane;
31719 /* Generate or test for an insn that supports a constant permutation. */
31721 /* Recognize patterns for the VUZP insns. */
31723 static bool
31724 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31726 unsigned int i, odd, mask, nelt = d->perm.length ();
31727 rtx out0, out1, in0, in1;
31728 int first_elem;
31729 int swap_nelt;
31731 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31732 return false;
31734 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31735 big endian pattern on 64 bit vectors, so we correct for that. */
31736 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31737 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31739 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31741 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31742 odd = 0;
31743 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31744 odd = 1;
31745 else
31746 return false;
31747 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31749 for (i = 0; i < nelt; i++)
31751 unsigned elt =
31752 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31753 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31754 return false;
31757 /* Success! */
31758 if (d->testing_p)
31759 return true;
31761 in0 = d->op0;
31762 in1 = d->op1;
31763 if (swap_nelt != 0)
31764 std::swap (in0, in1);
31766 out0 = d->target;
31767 out1 = gen_reg_rtx (d->vmode);
31768 if (odd)
31769 std::swap (out0, out1);
31771 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31772 return true;
31775 /* Recognize patterns for the VZIP insns. */
31777 static bool
31778 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31780 unsigned int i, high, mask, nelt = d->perm.length ();
31781 rtx out0, out1, in0, in1;
31782 int first_elem;
31783 bool is_swapped;
31785 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31786 return false;
31788 is_swapped = BYTES_BIG_ENDIAN;
31790 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31792 high = nelt / 2;
31793 if (first_elem == neon_endian_lane_map (d->vmode, high))
31795 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31796 high = 0;
31797 else
31798 return false;
31799 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31801 for (i = 0; i < nelt / 2; i++)
31803 unsigned elt =
31804 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31805 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31806 != elt)
31807 return false;
31808 elt =
31809 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31810 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31811 != elt)
31812 return false;
31815 /* Success! */
31816 if (d->testing_p)
31817 return true;
31819 in0 = d->op0;
31820 in1 = d->op1;
31821 if (is_swapped)
31822 std::swap (in0, in1);
31824 out0 = d->target;
31825 out1 = gen_reg_rtx (d->vmode);
31826 if (high)
31827 std::swap (out0, out1);
31829 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31830 return true;
31833 /* Recognize patterns for the VREV insns. */
31834 static bool
31835 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31837 unsigned int i, j, diff, nelt = d->perm.length ();
31838 rtx (*gen) (machine_mode, rtx, rtx);
31840 if (!d->one_vector_p)
31841 return false;
31843 diff = d->perm[0];
31844 switch (diff)
31846 case 7:
31847 switch (d->vmode)
31849 case E_V16QImode:
31850 case E_V8QImode:
31851 gen = gen_neon_vrev64;
31852 break;
31853 default:
31854 return false;
31856 break;
31857 case 3:
31858 switch (d->vmode)
31860 case E_V16QImode:
31861 case E_V8QImode:
31862 gen = gen_neon_vrev32;
31863 break;
31864 case E_V8HImode:
31865 case E_V4HImode:
31866 case E_V8HFmode:
31867 case E_V4HFmode:
31868 gen = gen_neon_vrev64;
31869 break;
31870 default:
31871 return false;
31873 break;
31874 case 1:
31875 switch (d->vmode)
31877 case E_V16QImode:
31878 case E_V8QImode:
31879 gen = gen_neon_vrev16;
31880 break;
31881 case E_V8HImode:
31882 case E_V4HImode:
31883 gen = gen_neon_vrev32;
31884 break;
31885 case E_V4SImode:
31886 case E_V2SImode:
31887 case E_V4SFmode:
31888 case E_V2SFmode:
31889 gen = gen_neon_vrev64;
31890 break;
31891 default:
31892 return false;
31894 break;
31895 default:
31896 return false;
31899 for (i = 0; i < nelt ; i += diff + 1)
31900 for (j = 0; j <= diff; j += 1)
31902 /* This is guaranteed to be true as the value of diff
31903 is 7, 3, 1 and we should have enough elements in the
31904 queue to generate this. Getting a vector mask with a
31905 value of diff other than these values implies that
31906 something is wrong by the time we get here. */
31907 gcc_assert (i + j < nelt);
31908 if (d->perm[i + j] != i + diff - j)
31909 return false;
31912 /* Success! */
31913 if (d->testing_p)
31914 return true;
31916 emit_insn (gen (d->vmode, d->target, d->op0));
31917 return true;
31920 /* Recognize patterns for the VTRN insns. */
31922 static bool
31923 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31925 unsigned int i, odd, mask, nelt = d->perm.length ();
31926 rtx out0, out1, in0, in1;
31928 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31929 return false;
31931 /* Note that these are little-endian tests. Adjust for big-endian later. */
31932 if (d->perm[0] == 0)
31933 odd = 0;
31934 else if (d->perm[0] == 1)
31935 odd = 1;
31936 else
31937 return false;
31938 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31940 for (i = 0; i < nelt; i += 2)
31942 if (d->perm[i] != i + odd)
31943 return false;
31944 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31945 return false;
31948 /* Success! */
31949 if (d->testing_p)
31950 return true;
31952 in0 = d->op0;
31953 in1 = d->op1;
31954 if (BYTES_BIG_ENDIAN)
31956 std::swap (in0, in1);
31957 odd = !odd;
31960 out0 = d->target;
31961 out1 = gen_reg_rtx (d->vmode);
31962 if (odd)
31963 std::swap (out0, out1);
31965 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31966 return true;
31969 /* Recognize patterns for the VEXT insns. */
31971 static bool
31972 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31974 unsigned int i, nelt = d->perm.length ();
31975 rtx offset;
31977 unsigned int location;
31979 unsigned int next = d->perm[0] + 1;
31981 /* TODO: Handle GCC's numbering of elements for big-endian. */
31982 if (BYTES_BIG_ENDIAN)
31983 return false;
31985 /* Check if the extracted indexes are increasing by one. */
31986 for (i = 1; i < nelt; next++, i++)
31988 /* If we hit the most significant element of the 2nd vector in
31989 the previous iteration, no need to test further. */
31990 if (next == 2 * nelt)
31991 return false;
31993 /* If we are operating on only one vector: it could be a
31994 rotation. If there are only two elements of size < 64, let
31995 arm_evpc_neon_vrev catch it. */
31996 if (d->one_vector_p && (next == nelt))
31998 if ((nelt == 2) && (d->vmode != V2DImode))
31999 return false;
32000 else
32001 next = 0;
32004 if (d->perm[i] != next)
32005 return false;
32008 location = d->perm[0];
32010 /* Success! */
32011 if (d->testing_p)
32012 return true;
32014 offset = GEN_INT (location);
32016 if(d->vmode == E_DImode)
32017 return false;
32019 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32020 return true;
32023 /* The NEON VTBL instruction is a fully variable permuation that's even
32024 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32025 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32026 can do slightly better by expanding this as a constant where we don't
32027 have to apply a mask. */
32029 static bool
32030 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32032 rtx rperm[MAX_VECT_LEN], sel;
32033 machine_mode vmode = d->vmode;
32034 unsigned int i, nelt = d->perm.length ();
32036 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32037 numbering of elements for big-endian, we must reverse the order. */
32038 if (BYTES_BIG_ENDIAN)
32039 return false;
32041 if (d->testing_p)
32042 return true;
32044 /* Generic code will try constant permutation twice. Once with the
32045 original mode and again with the elements lowered to QImode.
32046 So wait and don't do the selector expansion ourselves. */
32047 if (vmode != V8QImode && vmode != V16QImode)
32048 return false;
32050 for (i = 0; i < nelt; ++i)
32051 rperm[i] = GEN_INT (d->perm[i]);
32052 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32053 sel = force_reg (vmode, sel);
32055 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32056 return true;
32059 static bool
32060 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32062 /* Check if the input mask matches vext before reordering the
32063 operands. */
32064 if (TARGET_NEON)
32065 if (arm_evpc_neon_vext (d))
32066 return true;
32068 /* The pattern matching functions above are written to look for a small
32069 number to begin the sequence (0, 1, N/2). If we begin with an index
32070 from the second operand, we can swap the operands. */
32071 unsigned int nelt = d->perm.length ();
32072 if (d->perm[0] >= nelt)
32074 d->perm.rotate_inputs (1);
32075 std::swap (d->op0, d->op1);
32078 if (TARGET_NEON)
32080 if (arm_evpc_neon_vuzp (d))
32081 return true;
32082 if (arm_evpc_neon_vzip (d))
32083 return true;
32084 if (arm_evpc_neon_vrev (d))
32085 return true;
32086 if (arm_evpc_neon_vtrn (d))
32087 return true;
32088 return arm_evpc_neon_vtbl (d);
32090 return false;
32093 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32095 static bool
32096 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32097 rtx target, rtx op0, rtx op1,
32098 const vec_perm_indices &sel)
32100 if (vmode != op_mode)
32101 return false;
32103 struct expand_vec_perm_d d;
32104 int i, nelt, which;
32106 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32107 return false;
32109 d.target = target;
32110 if (op0)
32112 rtx nop0 = force_reg (vmode, op0);
32113 if (op0 == op1)
32114 op1 = nop0;
32115 op0 = nop0;
32117 if (op1)
32118 op1 = force_reg (vmode, op1);
32119 d.op0 = op0;
32120 d.op1 = op1;
32122 d.vmode = vmode;
32123 gcc_assert (VECTOR_MODE_P (d.vmode));
32124 d.testing_p = !target;
32126 nelt = GET_MODE_NUNITS (d.vmode);
32127 for (i = which = 0; i < nelt; ++i)
32129 int ei = sel[i] & (2 * nelt - 1);
32130 which |= (ei < nelt ? 1 : 2);
32133 switch (which)
32135 default:
32136 gcc_unreachable();
32138 case 3:
32139 d.one_vector_p = false;
32140 if (d.testing_p || !rtx_equal_p (op0, op1))
32141 break;
32143 /* The elements of PERM do not suggest that only the first operand
32144 is used, but both operands are identical. Allow easier matching
32145 of the permutation by folding the permutation into the single
32146 input vector. */
32147 /* FALLTHRU */
32148 case 2:
32149 d.op0 = op1;
32150 d.one_vector_p = true;
32151 break;
32153 case 1:
32154 d.op1 = op0;
32155 d.one_vector_p = true;
32156 break;
32159 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32161 if (!d.testing_p)
32162 return arm_expand_vec_perm_const_1 (&d);
32164 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32165 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32166 if (!d.one_vector_p)
32167 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32169 start_sequence ();
32170 bool ret = arm_expand_vec_perm_const_1 (&d);
32171 end_sequence ();
32173 return ret;
32176 bool
32177 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32179 /* If we are soft float and we do not have ldrd
32180 then all auto increment forms are ok. */
32181 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32182 return true;
32184 switch (code)
32186 /* Post increment and Pre Decrement are supported for all
32187 instruction forms except for vector forms. */
32188 case ARM_POST_INC:
32189 case ARM_PRE_DEC:
32190 if (VECTOR_MODE_P (mode))
32192 if (code != ARM_PRE_DEC)
32193 return true;
32194 else
32195 return false;
32198 return true;
32200 case ARM_POST_DEC:
32201 case ARM_PRE_INC:
32202 /* Without LDRD and mode size greater than
32203 word size, there is no point in auto-incrementing
32204 because ldm and stm will not have these forms. */
32205 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32206 return false;
32208 /* Vector and floating point modes do not support
32209 these auto increment forms. */
32210 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32211 return false;
32213 return true;
32215 default:
32216 return false;
32220 return false;
32223 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32224 on ARM, since we know that shifts by negative amounts are no-ops.
32225 Additionally, the default expansion code is not available or suitable
32226 for post-reload insn splits (this can occur when the register allocator
32227 chooses not to do a shift in NEON).
32229 This function is used in both initial expand and post-reload splits, and
32230 handles all kinds of 64-bit shifts.
32232 Input requirements:
32233 - It is safe for the input and output to be the same register, but
32234 early-clobber rules apply for the shift amount and scratch registers.
32235 - Shift by register requires both scratch registers. In all other cases
32236 the scratch registers may be NULL.
32237 - Ashiftrt by a register also clobbers the CC register. */
32238 void
32239 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32240 rtx amount, rtx scratch1, rtx scratch2)
32242 rtx out_high = gen_highpart (SImode, out);
32243 rtx out_low = gen_lowpart (SImode, out);
32244 rtx in_high = gen_highpart (SImode, in);
32245 rtx in_low = gen_lowpart (SImode, in);
32247 /* Terminology:
32248 in = the register pair containing the input value.
32249 out = the destination register pair.
32250 up = the high- or low-part of each pair.
32251 down = the opposite part to "up".
32252 In a shift, we can consider bits to shift from "up"-stream to
32253 "down"-stream, so in a left-shift "up" is the low-part and "down"
32254 is the high-part of each register pair. */
32256 rtx out_up = code == ASHIFT ? out_low : out_high;
32257 rtx out_down = code == ASHIFT ? out_high : out_low;
32258 rtx in_up = code == ASHIFT ? in_low : in_high;
32259 rtx in_down = code == ASHIFT ? in_high : in_low;
32261 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32262 gcc_assert (out
32263 && (REG_P (out) || SUBREG_P (out))
32264 && GET_MODE (out) == DImode);
32265 gcc_assert (in
32266 && (REG_P (in) || SUBREG_P (in))
32267 && GET_MODE (in) == DImode);
32268 gcc_assert (amount
32269 && (((REG_P (amount) || SUBREG_P (amount))
32270 && GET_MODE (amount) == SImode)
32271 || CONST_INT_P (amount)));
32272 gcc_assert (scratch1 == NULL
32273 || (GET_CODE (scratch1) == SCRATCH)
32274 || (GET_MODE (scratch1) == SImode
32275 && REG_P (scratch1)));
32276 gcc_assert (scratch2 == NULL
32277 || (GET_CODE (scratch2) == SCRATCH)
32278 || (GET_MODE (scratch2) == SImode
32279 && REG_P (scratch2)));
32280 gcc_assert (!REG_P (out) || !REG_P (amount)
32281 || !HARD_REGISTER_P (out)
32282 || (REGNO (out) != REGNO (amount)
32283 && REGNO (out) + 1 != REGNO (amount)));
32285 /* Macros to make following code more readable. */
32286 #define SUB_32(DEST,SRC) \
32287 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32288 #define RSB_32(DEST,SRC) \
32289 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32290 #define SUB_S_32(DEST,SRC) \
32291 gen_addsi3_compare0 ((DEST), (SRC), \
32292 GEN_INT (-32))
32293 #define SET(DEST,SRC) \
32294 gen_rtx_SET ((DEST), (SRC))
32295 #define SHIFT(CODE,SRC,AMOUNT) \
32296 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32297 #define LSHIFT(CODE,SRC,AMOUNT) \
32298 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32299 SImode, (SRC), (AMOUNT))
32300 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32301 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32302 SImode, (SRC), (AMOUNT))
32303 #define ORR(A,B) \
32304 gen_rtx_IOR (SImode, (A), (B))
32305 #define BRANCH(COND,LABEL) \
32306 gen_arm_cond_branch ((LABEL), \
32307 gen_rtx_ ## COND (CCmode, cc_reg, \
32308 const0_rtx), \
32309 cc_reg)
32311 /* Shifts by register and shifts by constant are handled separately. */
32312 if (CONST_INT_P (amount))
32314 /* We have a shift-by-constant. */
32316 /* First, handle out-of-range shift amounts.
32317 In both cases we try to match the result an ARM instruction in a
32318 shift-by-register would give. This helps reduce execution
32319 differences between optimization levels, but it won't stop other
32320 parts of the compiler doing different things. This is "undefined
32321 behavior, in any case. */
32322 if (INTVAL (amount) <= 0)
32323 emit_insn (gen_movdi (out, in));
32324 else if (INTVAL (amount) >= 64)
32326 if (code == ASHIFTRT)
32328 rtx const31_rtx = GEN_INT (31);
32329 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32330 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32332 else
32333 emit_insn (gen_movdi (out, const0_rtx));
32336 /* Now handle valid shifts. */
32337 else if (INTVAL (amount) < 32)
32339 /* Shifts by a constant less than 32. */
32340 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32342 /* Clearing the out register in DImode first avoids lots
32343 of spilling and results in less stack usage.
32344 Later this redundant insn is completely removed.
32345 Do that only if "in" and "out" are different registers. */
32346 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32347 emit_insn (SET (out, const0_rtx));
32348 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32349 emit_insn (SET (out_down,
32350 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32351 out_down)));
32352 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32354 else
32356 /* Shifts by a constant greater than 31. */
32357 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32359 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32360 emit_insn (SET (out, const0_rtx));
32361 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32362 if (code == ASHIFTRT)
32363 emit_insn (gen_ashrsi3 (out_up, in_up,
32364 GEN_INT (31)));
32365 else
32366 emit_insn (SET (out_up, const0_rtx));
32369 else
32371 /* We have a shift-by-register. */
32372 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32374 /* This alternative requires the scratch registers. */
32375 gcc_assert (scratch1 && REG_P (scratch1));
32376 gcc_assert (scratch2 && REG_P (scratch2));
32378 /* We will need the values "amount-32" and "32-amount" later.
32379 Swapping them around now allows the later code to be more general. */
32380 switch (code)
32382 case ASHIFT:
32383 emit_insn (SUB_32 (scratch1, amount));
32384 emit_insn (RSB_32 (scratch2, amount));
32385 break;
32386 case ASHIFTRT:
32387 emit_insn (RSB_32 (scratch1, amount));
32388 /* Also set CC = amount > 32. */
32389 emit_insn (SUB_S_32 (scratch2, amount));
32390 break;
32391 case LSHIFTRT:
32392 emit_insn (RSB_32 (scratch1, amount));
32393 emit_insn (SUB_32 (scratch2, amount));
32394 break;
32395 default:
32396 gcc_unreachable ();
32399 /* Emit code like this:
32401 arithmetic-left:
32402 out_down = in_down << amount;
32403 out_down = (in_up << (amount - 32)) | out_down;
32404 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32405 out_up = in_up << amount;
32407 arithmetic-right:
32408 out_down = in_down >> amount;
32409 out_down = (in_up << (32 - amount)) | out_down;
32410 if (amount < 32)
32411 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32412 out_up = in_up << amount;
32414 logical-right:
32415 out_down = in_down >> amount;
32416 out_down = (in_up << (32 - amount)) | out_down;
32417 if (amount < 32)
32418 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32419 out_up = in_up << amount;
32421 The ARM and Thumb2 variants are the same but implemented slightly
32422 differently. If this were only called during expand we could just
32423 use the Thumb2 case and let combine do the right thing, but this
32424 can also be called from post-reload splitters. */
32426 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32428 if (!TARGET_THUMB2)
32430 /* Emit code for ARM mode. */
32431 emit_insn (SET (out_down,
32432 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32433 if (code == ASHIFTRT)
32435 rtx_code_label *done_label = gen_label_rtx ();
32436 emit_jump_insn (BRANCH (LT, done_label));
32437 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32438 out_down)));
32439 emit_label (done_label);
32441 else
32442 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32443 out_down)));
32445 else
32447 /* Emit code for Thumb2 mode.
32448 Thumb2 can't do shift and or in one insn. */
32449 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32450 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32452 if (code == ASHIFTRT)
32454 rtx_code_label *done_label = gen_label_rtx ();
32455 emit_jump_insn (BRANCH (LT, done_label));
32456 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32457 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32458 emit_label (done_label);
32460 else
32462 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32463 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32467 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32470 #undef SUB_32
32471 #undef RSB_32
32472 #undef SUB_S_32
32473 #undef SET
32474 #undef SHIFT
32475 #undef LSHIFT
32476 #undef REV_LSHIFT
32477 #undef ORR
32478 #undef BRANCH
32481 /* Returns true if the pattern is a valid symbolic address, which is either a
32482 symbol_ref or (symbol_ref + addend).
32484 According to the ARM ELF ABI, the initial addend of REL-type relocations
32485 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32486 literal field of the instruction as a 16-bit signed value in the range
32487 -32768 <= A < 32768.
32489 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32490 unsigned range of 0 <= A < 256 as described in the AAELF32
32491 relocation handling documentation: REL-type relocations are encoded
32492 as unsigned in this case. */
32494 bool
32495 arm_valid_symbolic_address_p (rtx addr)
32497 rtx xop0, xop1 = NULL_RTX;
32498 rtx tmp = addr;
32500 if (target_word_relocations)
32501 return false;
32503 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32504 return true;
32506 /* (const (plus: symbol_ref const_int)) */
32507 if (GET_CODE (addr) == CONST)
32508 tmp = XEXP (addr, 0);
32510 if (GET_CODE (tmp) == PLUS)
32512 xop0 = XEXP (tmp, 0);
32513 xop1 = XEXP (tmp, 1);
32515 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32517 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32518 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32519 else
32520 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32524 return false;
32527 /* Returns true if a valid comparison operation and makes
32528 the operands in a form that is valid. */
32529 bool
32530 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32532 enum rtx_code code = GET_CODE (*comparison);
32533 int code_int;
32534 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32535 ? GET_MODE (*op2) : GET_MODE (*op1);
32537 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32539 if (code == UNEQ || code == LTGT)
32540 return false;
32542 code_int = (int)code;
32543 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32544 PUT_CODE (*comparison, (enum rtx_code)code_int);
32546 switch (mode)
32548 case E_SImode:
32549 if (!arm_add_operand (*op1, mode))
32550 *op1 = force_reg (mode, *op1);
32551 if (!arm_add_operand (*op2, mode))
32552 *op2 = force_reg (mode, *op2);
32553 return true;
32555 case E_DImode:
32556 /* gen_compare_reg() will sort out any invalid operands. */
32557 return true;
32559 case E_HFmode:
32560 if (!TARGET_VFP_FP16INST)
32561 break;
32562 /* FP16 comparisons are done in SF mode. */
32563 mode = SFmode;
32564 *op1 = convert_to_mode (mode, *op1, 1);
32565 *op2 = convert_to_mode (mode, *op2, 1);
32566 /* Fall through. */
32567 case E_SFmode:
32568 case E_DFmode:
32569 if (!vfp_compare_operand (*op1, mode))
32570 *op1 = force_reg (mode, *op1);
32571 if (!vfp_compare_operand (*op2, mode))
32572 *op2 = force_reg (mode, *op2);
32573 return true;
32574 default:
32575 break;
32578 return false;
32582 /* Maximum number of instructions to set block of memory. */
32583 static int
32584 arm_block_set_max_insns (void)
32586 if (optimize_function_for_size_p (cfun))
32587 return 4;
32588 else
32589 return current_tune->max_insns_inline_memset;
32592 /* Return TRUE if it's profitable to set block of memory for
32593 non-vectorized case. VAL is the value to set the memory
32594 with. LENGTH is the number of bytes to set. ALIGN is the
32595 alignment of the destination memory in bytes. UNALIGNED_P
32596 is TRUE if we can only set the memory with instructions
32597 meeting alignment requirements. USE_STRD_P is TRUE if we
32598 can use strd to set the memory. */
32599 static bool
32600 arm_block_set_non_vect_profit_p (rtx val,
32601 unsigned HOST_WIDE_INT length,
32602 unsigned HOST_WIDE_INT align,
32603 bool unaligned_p, bool use_strd_p)
32605 int num = 0;
32606 /* For leftovers in bytes of 0-7, we can set the memory block using
32607 strb/strh/str with minimum instruction number. */
32608 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32610 if (unaligned_p)
32612 num = arm_const_inline_cost (SET, val);
32613 num += length / align + length % align;
32615 else if (use_strd_p)
32617 num = arm_const_double_inline_cost (val);
32618 num += (length >> 3) + leftover[length & 7];
32620 else
32622 num = arm_const_inline_cost (SET, val);
32623 num += (length >> 2) + leftover[length & 3];
32626 /* We may be able to combine last pair STRH/STRB into a single STR
32627 by shifting one byte back. */
32628 if (unaligned_access && length > 3 && (length & 3) == 3)
32629 num--;
32631 return (num <= arm_block_set_max_insns ());
32634 /* Return TRUE if it's profitable to set block of memory for
32635 vectorized case. LENGTH is the number of bytes to set.
32636 ALIGN is the alignment of destination memory in bytes.
32637 MODE is the vector mode used to set the memory. */
32638 static bool
32639 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32640 unsigned HOST_WIDE_INT align,
32641 machine_mode mode)
32643 int num;
32644 bool unaligned_p = ((align & 3) != 0);
32645 unsigned int nelt = GET_MODE_NUNITS (mode);
32647 /* Instruction loading constant value. */
32648 num = 1;
32649 /* Instructions storing the memory. */
32650 num += (length + nelt - 1) / nelt;
32651 /* Instructions adjusting the address expression. Only need to
32652 adjust address expression if it's 4 bytes aligned and bytes
32653 leftover can only be stored by mis-aligned store instruction. */
32654 if (!unaligned_p && (length & 3) != 0)
32655 num++;
32657 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32658 if (!unaligned_p && mode == V16QImode)
32659 num--;
32661 return (num <= arm_block_set_max_insns ());
32664 /* Set a block of memory using vectorization instructions for the
32665 unaligned case. We fill the first LENGTH bytes of the memory
32666 area starting from DSTBASE with byte constant VALUE. ALIGN is
32667 the alignment requirement of memory. Return TRUE if succeeded. */
32668 static bool
32669 arm_block_set_unaligned_vect (rtx dstbase,
32670 unsigned HOST_WIDE_INT length,
32671 unsigned HOST_WIDE_INT value,
32672 unsigned HOST_WIDE_INT align)
32674 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32675 rtx dst, mem;
32676 rtx val_vec, reg;
32677 rtx (*gen_func) (rtx, rtx);
32678 machine_mode mode;
32679 unsigned HOST_WIDE_INT v = value;
32680 unsigned int offset = 0;
32681 gcc_assert ((align & 0x3) != 0);
32682 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32683 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32684 if (length >= nelt_v16)
32686 mode = V16QImode;
32687 gen_func = gen_movmisalignv16qi;
32689 else
32691 mode = V8QImode;
32692 gen_func = gen_movmisalignv8qi;
32694 nelt_mode = GET_MODE_NUNITS (mode);
32695 gcc_assert (length >= nelt_mode);
32696 /* Skip if it isn't profitable. */
32697 if (!arm_block_set_vect_profit_p (length, align, mode))
32698 return false;
32700 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32701 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32703 v = sext_hwi (v, BITS_PER_WORD);
32705 reg = gen_reg_rtx (mode);
32706 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32707 /* Emit instruction loading the constant value. */
32708 emit_move_insn (reg, val_vec);
32710 /* Handle nelt_mode bytes in a vector. */
32711 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32713 emit_insn ((*gen_func) (mem, reg));
32714 if (i + 2 * nelt_mode <= length)
32716 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32717 offset += nelt_mode;
32718 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32722 /* If there are not less than nelt_v8 bytes leftover, we must be in
32723 V16QI mode. */
32724 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32726 /* Handle (8, 16) bytes leftover. */
32727 if (i + nelt_v8 < length)
32729 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32730 offset += length - i;
32731 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32733 /* We are shifting bytes back, set the alignment accordingly. */
32734 if ((length & 1) != 0 && align >= 2)
32735 set_mem_align (mem, BITS_PER_UNIT);
32737 emit_insn (gen_movmisalignv16qi (mem, reg));
32739 /* Handle (0, 8] bytes leftover. */
32740 else if (i < length && i + nelt_v8 >= length)
32742 if (mode == V16QImode)
32743 reg = gen_lowpart (V8QImode, reg);
32745 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32746 + (nelt_mode - nelt_v8))));
32747 offset += (length - i) + (nelt_mode - nelt_v8);
32748 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32750 /* We are shifting bytes back, set the alignment accordingly. */
32751 if ((length & 1) != 0 && align >= 2)
32752 set_mem_align (mem, BITS_PER_UNIT);
32754 emit_insn (gen_movmisalignv8qi (mem, reg));
32757 return true;
32760 /* Set a block of memory using vectorization instructions for the
32761 aligned case. We fill the first LENGTH bytes of the memory area
32762 starting from DSTBASE with byte constant VALUE. ALIGN is the
32763 alignment requirement of memory. Return TRUE if succeeded. */
32764 static bool
32765 arm_block_set_aligned_vect (rtx dstbase,
32766 unsigned HOST_WIDE_INT length,
32767 unsigned HOST_WIDE_INT value,
32768 unsigned HOST_WIDE_INT align)
32770 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32771 rtx dst, addr, mem;
32772 rtx val_vec, reg;
32773 machine_mode mode;
32774 unsigned int offset = 0;
32776 gcc_assert ((align & 0x3) == 0);
32777 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32778 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32779 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32780 mode = V16QImode;
32781 else
32782 mode = V8QImode;
32784 nelt_mode = GET_MODE_NUNITS (mode);
32785 gcc_assert (length >= nelt_mode);
32786 /* Skip if it isn't profitable. */
32787 if (!arm_block_set_vect_profit_p (length, align, mode))
32788 return false;
32790 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32792 reg = gen_reg_rtx (mode);
32793 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32794 /* Emit instruction loading the constant value. */
32795 emit_move_insn (reg, val_vec);
32797 i = 0;
32798 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32799 if (mode == V16QImode)
32801 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32802 emit_insn (gen_movmisalignv16qi (mem, reg));
32803 i += nelt_mode;
32804 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32805 if (i + nelt_v8 < length && i + nelt_v16 > length)
32807 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32808 offset += length - nelt_mode;
32809 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32810 /* We are shifting bytes back, set the alignment accordingly. */
32811 if ((length & 0x3) == 0)
32812 set_mem_align (mem, BITS_PER_UNIT * 4);
32813 else if ((length & 0x1) == 0)
32814 set_mem_align (mem, BITS_PER_UNIT * 2);
32815 else
32816 set_mem_align (mem, BITS_PER_UNIT);
32818 emit_insn (gen_movmisalignv16qi (mem, reg));
32819 return true;
32821 /* Fall through for bytes leftover. */
32822 mode = V8QImode;
32823 nelt_mode = GET_MODE_NUNITS (mode);
32824 reg = gen_lowpart (V8QImode, reg);
32827 /* Handle 8 bytes in a vector. */
32828 for (; (i + nelt_mode <= length); i += nelt_mode)
32830 addr = plus_constant (Pmode, dst, i);
32831 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32832 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32833 emit_move_insn (mem, reg);
32834 else
32835 emit_insn (gen_unaligned_storev8qi (mem, reg));
32838 /* Handle single word leftover by shifting 4 bytes back. We can
32839 use aligned access for this case. */
32840 if (i + UNITS_PER_WORD == length)
32842 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32843 offset += i - UNITS_PER_WORD;
32844 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32845 /* We are shifting 4 bytes back, set the alignment accordingly. */
32846 if (align > UNITS_PER_WORD)
32847 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32849 emit_insn (gen_unaligned_storev8qi (mem, reg));
32851 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32852 We have to use unaligned access for this case. */
32853 else if (i < length)
32855 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32856 offset += length - nelt_mode;
32857 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32858 /* We are shifting bytes back, set the alignment accordingly. */
32859 if ((length & 1) == 0)
32860 set_mem_align (mem, BITS_PER_UNIT * 2);
32861 else
32862 set_mem_align (mem, BITS_PER_UNIT);
32864 emit_insn (gen_movmisalignv8qi (mem, reg));
32867 return true;
32870 /* Set a block of memory using plain strh/strb instructions, only
32871 using instructions allowed by ALIGN on processor. We fill the
32872 first LENGTH bytes of the memory area starting from DSTBASE
32873 with byte constant VALUE. ALIGN is the alignment requirement
32874 of memory. */
32875 static bool
32876 arm_block_set_unaligned_non_vect (rtx dstbase,
32877 unsigned HOST_WIDE_INT length,
32878 unsigned HOST_WIDE_INT value,
32879 unsigned HOST_WIDE_INT align)
32881 unsigned int i;
32882 rtx dst, addr, mem;
32883 rtx val_exp, val_reg, reg;
32884 machine_mode mode;
32885 HOST_WIDE_INT v = value;
32887 gcc_assert (align == 1 || align == 2);
32889 if (align == 2)
32890 v |= (value << BITS_PER_UNIT);
32892 v = sext_hwi (v, BITS_PER_WORD);
32893 val_exp = GEN_INT (v);
32894 /* Skip if it isn't profitable. */
32895 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32896 align, true, false))
32897 return false;
32899 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32900 mode = (align == 2 ? HImode : QImode);
32901 val_reg = force_reg (SImode, val_exp);
32902 reg = gen_lowpart (mode, val_reg);
32904 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32906 addr = plus_constant (Pmode, dst, i);
32907 mem = adjust_automodify_address (dstbase, mode, addr, i);
32908 emit_move_insn (mem, reg);
32911 /* Handle single byte leftover. */
32912 if (i + 1 == length)
32914 reg = gen_lowpart (QImode, val_reg);
32915 addr = plus_constant (Pmode, dst, i);
32916 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32917 emit_move_insn (mem, reg);
32918 i++;
32921 gcc_assert (i == length);
32922 return true;
32925 /* Set a block of memory using plain strd/str/strh/strb instructions,
32926 to permit unaligned copies on processors which support unaligned
32927 semantics for those instructions. We fill the first LENGTH bytes
32928 of the memory area starting from DSTBASE with byte constant VALUE.
32929 ALIGN is the alignment requirement of memory. */
32930 static bool
32931 arm_block_set_aligned_non_vect (rtx dstbase,
32932 unsigned HOST_WIDE_INT length,
32933 unsigned HOST_WIDE_INT value,
32934 unsigned HOST_WIDE_INT align)
32936 unsigned int i;
32937 rtx dst, addr, mem;
32938 rtx val_exp, val_reg, reg;
32939 unsigned HOST_WIDE_INT v;
32940 bool use_strd_p;
32942 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32943 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32945 v = (value | (value << 8) | (value << 16) | (value << 24));
32946 if (length < UNITS_PER_WORD)
32947 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32949 if (use_strd_p)
32950 v |= (v << BITS_PER_WORD);
32951 else
32952 v = sext_hwi (v, BITS_PER_WORD);
32954 val_exp = GEN_INT (v);
32955 /* Skip if it isn't profitable. */
32956 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32957 align, false, use_strd_p))
32959 if (!use_strd_p)
32960 return false;
32962 /* Try without strd. */
32963 v = (v >> BITS_PER_WORD);
32964 v = sext_hwi (v, BITS_PER_WORD);
32965 val_exp = GEN_INT (v);
32966 use_strd_p = false;
32967 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32968 align, false, use_strd_p))
32969 return false;
32972 i = 0;
32973 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32974 /* Handle double words using strd if possible. */
32975 if (use_strd_p)
32977 val_reg = force_reg (DImode, val_exp);
32978 reg = val_reg;
32979 for (; (i + 8 <= length); i += 8)
32981 addr = plus_constant (Pmode, dst, i);
32982 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32983 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32984 emit_move_insn (mem, reg);
32985 else
32986 emit_insn (gen_unaligned_storedi (mem, reg));
32989 else
32990 val_reg = force_reg (SImode, val_exp);
32992 /* Handle words. */
32993 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32994 for (; (i + 4 <= length); i += 4)
32996 addr = plus_constant (Pmode, dst, i);
32997 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32998 if ((align & 3) == 0)
32999 emit_move_insn (mem, reg);
33000 else
33001 emit_insn (gen_unaligned_storesi (mem, reg));
33004 /* Merge last pair of STRH and STRB into a STR if possible. */
33005 if (unaligned_access && i > 0 && (i + 3) == length)
33007 addr = plus_constant (Pmode, dst, i - 1);
33008 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33009 /* We are shifting one byte back, set the alignment accordingly. */
33010 if ((align & 1) == 0)
33011 set_mem_align (mem, BITS_PER_UNIT);
33013 /* Most likely this is an unaligned access, and we can't tell at
33014 compilation time. */
33015 emit_insn (gen_unaligned_storesi (mem, reg));
33016 return true;
33019 /* Handle half word leftover. */
33020 if (i + 2 <= length)
33022 reg = gen_lowpart (HImode, val_reg);
33023 addr = plus_constant (Pmode, dst, i);
33024 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33025 if ((align & 1) == 0)
33026 emit_move_insn (mem, reg);
33027 else
33028 emit_insn (gen_unaligned_storehi (mem, reg));
33030 i += 2;
33033 /* Handle single byte leftover. */
33034 if (i + 1 == length)
33036 reg = gen_lowpart (QImode, val_reg);
33037 addr = plus_constant (Pmode, dst, i);
33038 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33039 emit_move_insn (mem, reg);
33042 return true;
33045 /* Set a block of memory using vectorization instructions for both
33046 aligned and unaligned cases. We fill the first LENGTH bytes of
33047 the memory area starting from DSTBASE with byte constant VALUE.
33048 ALIGN is the alignment requirement of memory. */
33049 static bool
33050 arm_block_set_vect (rtx dstbase,
33051 unsigned HOST_WIDE_INT length,
33052 unsigned HOST_WIDE_INT value,
33053 unsigned HOST_WIDE_INT align)
33055 /* Check whether we need to use unaligned store instruction. */
33056 if (((align & 3) != 0 || (length & 3) != 0)
33057 /* Check whether unaligned store instruction is available. */
33058 && (!unaligned_access || BYTES_BIG_ENDIAN))
33059 return false;
33061 if ((align & 3) == 0)
33062 return arm_block_set_aligned_vect (dstbase, length, value, align);
33063 else
33064 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33067 /* Expand string store operation. Firstly we try to do that by using
33068 vectorization instructions, then try with ARM unaligned access and
33069 double-word store if profitable. OPERANDS[0] is the destination,
33070 OPERANDS[1] is the number of bytes, operands[2] is the value to
33071 initialize the memory, OPERANDS[3] is the known alignment of the
33072 destination. */
33073 bool
33074 arm_gen_setmem (rtx *operands)
33076 rtx dstbase = operands[0];
33077 unsigned HOST_WIDE_INT length;
33078 unsigned HOST_WIDE_INT value;
33079 unsigned HOST_WIDE_INT align;
33081 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33082 return false;
33084 length = UINTVAL (operands[1]);
33085 if (length > 64)
33086 return false;
33088 value = (UINTVAL (operands[2]) & 0xFF);
33089 align = UINTVAL (operands[3]);
33090 if (TARGET_NEON && length >= 8
33091 && current_tune->string_ops_prefer_neon
33092 && arm_block_set_vect (dstbase, length, value, align))
33093 return true;
33095 if (!unaligned_access && (align & 3) != 0)
33096 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33098 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33102 static bool
33103 arm_macro_fusion_p (void)
33105 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33108 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33109 for MOVW / MOVT macro fusion. */
33111 static bool
33112 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33114 /* We are trying to fuse
33115 movw imm / movt imm
33116 instructions as a group that gets scheduled together. */
33118 rtx set_dest = SET_DEST (curr_set);
33120 if (GET_MODE (set_dest) != SImode)
33121 return false;
33123 /* We are trying to match:
33124 prev (movw) == (set (reg r0) (const_int imm16))
33125 curr (movt) == (set (zero_extract (reg r0)
33126 (const_int 16)
33127 (const_int 16))
33128 (const_int imm16_1))
33130 prev (movw) == (set (reg r1)
33131 (high (symbol_ref ("SYM"))))
33132 curr (movt) == (set (reg r0)
33133 (lo_sum (reg r1)
33134 (symbol_ref ("SYM")))) */
33136 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33138 if (CONST_INT_P (SET_SRC (curr_set))
33139 && CONST_INT_P (SET_SRC (prev_set))
33140 && REG_P (XEXP (set_dest, 0))
33141 && REG_P (SET_DEST (prev_set))
33142 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33143 return true;
33146 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33147 && REG_P (SET_DEST (curr_set))
33148 && REG_P (SET_DEST (prev_set))
33149 && GET_CODE (SET_SRC (prev_set)) == HIGH
33150 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33151 return true;
33153 return false;
33156 static bool
33157 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33159 rtx prev_set = single_set (prev);
33160 rtx curr_set = single_set (curr);
33162 if (!prev_set
33163 || !curr_set)
33164 return false;
33166 if (any_condjump_p (curr))
33167 return false;
33169 if (!arm_macro_fusion_p ())
33170 return false;
33172 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33173 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33174 return true;
33176 return false;
33179 /* Return true iff the instruction fusion described by OP is enabled. */
33180 bool
33181 arm_fusion_enabled_p (tune_params::fuse_ops op)
33183 return current_tune->fusible_ops & op;
33186 /* Return TRUE if return address signing mechanism is enabled. */
33187 bool
33188 arm_current_function_pac_enabled_p (void)
33190 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33191 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33192 && !crtl->is_leaf));
33195 /* Raise an error if the current target arch is not bti compatible. */
33196 void aarch_bti_arch_check (void)
33198 if (!arm_arch8m_main)
33199 error ("This architecture does not support branch protection instructions");
33202 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33203 bool
33204 aarch_bti_enabled (void)
33206 return aarch_enable_bti != 0;
33209 /* Check if INSN is a BTI J insn. */
33210 bool
33211 aarch_bti_j_insn_p (rtx_insn *insn)
33213 if (!insn || !INSN_P (insn))
33214 return false;
33216 rtx pat = PATTERN (insn);
33217 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33220 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33221 bool
33222 aarch_pac_insn_p (rtx x)
33224 if (!x || !INSN_P (x))
33225 return false;
33227 rtx pat = PATTERN (x);
33229 if (GET_CODE (pat) == SET)
33231 rtx tmp = XEXP (pat, 1);
33232 if (tmp
33233 && ((GET_CODE (tmp) == UNSPEC
33234 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33235 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33236 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33237 return true;
33240 return false;
33243 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33244 For Arm, both of these map to a simple BTI instruction. */
33247 aarch_gen_bti_c (void)
33249 return gen_bti_nop ();
33253 aarch_gen_bti_j (void)
33255 return gen_bti_nop ();
33258 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33259 scheduled for speculative execution. Reject the long-running division
33260 and square-root instructions. */
33262 static bool
33263 arm_sched_can_speculate_insn (rtx_insn *insn)
33265 switch (get_attr_type (insn))
33267 case TYPE_SDIV:
33268 case TYPE_UDIV:
33269 case TYPE_FDIVS:
33270 case TYPE_FDIVD:
33271 case TYPE_FSQRTS:
33272 case TYPE_FSQRTD:
33273 case TYPE_NEON_FP_SQRT_S:
33274 case TYPE_NEON_FP_SQRT_D:
33275 case TYPE_NEON_FP_SQRT_S_Q:
33276 case TYPE_NEON_FP_SQRT_D_Q:
33277 case TYPE_NEON_FP_DIV_S:
33278 case TYPE_NEON_FP_DIV_D:
33279 case TYPE_NEON_FP_DIV_S_Q:
33280 case TYPE_NEON_FP_DIV_D_Q:
33281 return false;
33282 default:
33283 return true;
33287 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33289 static unsigned HOST_WIDE_INT
33290 arm_asan_shadow_offset (void)
33292 return HOST_WIDE_INT_1U << 29;
33296 /* This is a temporary fix for PR60655. Ideally we need
33297 to handle most of these cases in the generic part but
33298 currently we reject minus (..) (sym_ref). We try to
33299 ameliorate the case with minus (sym_ref1) (sym_ref2)
33300 where they are in the same section. */
33302 static bool
33303 arm_const_not_ok_for_debug_p (rtx p)
33305 tree decl_op0 = NULL;
33306 tree decl_op1 = NULL;
33308 if (GET_CODE (p) == UNSPEC)
33309 return true;
33310 if (GET_CODE (p) == MINUS)
33312 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33314 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33315 if (decl_op1
33316 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33317 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33319 if ((VAR_P (decl_op1)
33320 || TREE_CODE (decl_op1) == CONST_DECL)
33321 && (VAR_P (decl_op0)
33322 || TREE_CODE (decl_op0) == CONST_DECL))
33323 return (get_variable_section (decl_op1, false)
33324 != get_variable_section (decl_op0, false));
33326 if (TREE_CODE (decl_op1) == LABEL_DECL
33327 && TREE_CODE (decl_op0) == LABEL_DECL)
33328 return (DECL_CONTEXT (decl_op1)
33329 != DECL_CONTEXT (decl_op0));
33332 return true;
33336 return false;
33339 /* return TRUE if x is a reference to a value in a constant pool */
33340 extern bool
33341 arm_is_constant_pool_ref (rtx x)
33343 return (MEM_P (x)
33344 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33345 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33348 /* Remember the last target of arm_set_current_function. */
33349 static GTY(()) tree arm_previous_fndecl;
33351 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33353 void
33354 save_restore_target_globals (tree new_tree)
33356 /* If we have a previous state, use it. */
33357 if (TREE_TARGET_GLOBALS (new_tree))
33358 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33359 else if (new_tree == target_option_default_node)
33360 restore_target_globals (&default_target_globals);
33361 else
33363 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33364 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33367 arm_option_params_internal ();
33370 /* Invalidate arm_previous_fndecl. */
33372 void
33373 arm_reset_previous_fndecl (void)
33375 arm_previous_fndecl = NULL_TREE;
33378 /* Establish appropriate back-end context for processing the function
33379 FNDECL. The argument might be NULL to indicate processing at top
33380 level, outside of any function scope. */
33382 static void
33383 arm_set_current_function (tree fndecl)
33385 if (!fndecl || fndecl == arm_previous_fndecl)
33386 return;
33388 tree old_tree = (arm_previous_fndecl
33389 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33390 : NULL_TREE);
33392 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33394 /* If current function has no attributes but previous one did,
33395 use the default node. */
33396 if (! new_tree && old_tree)
33397 new_tree = target_option_default_node;
33399 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33400 the default have been handled by save_restore_target_globals from
33401 arm_pragma_target_parse. */
33402 if (old_tree == new_tree)
33403 return;
33405 arm_previous_fndecl = fndecl;
33407 /* First set the target options. */
33408 cl_target_option_restore (&global_options, &global_options_set,
33409 TREE_TARGET_OPTION (new_tree));
33411 save_restore_target_globals (new_tree);
33413 arm_override_options_after_change_1 (&global_options, &global_options_set);
33416 /* Implement TARGET_OPTION_PRINT. */
33418 static void
33419 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33421 int flags = ptr->x_target_flags;
33422 const char *fpu_name;
33424 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33425 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33427 fprintf (file, "%*sselected isa %s\n", indent, "",
33428 TARGET_THUMB2_P (flags) ? "thumb2" :
33429 TARGET_THUMB_P (flags) ? "thumb1" :
33430 "arm");
33432 if (ptr->x_arm_arch_string)
33433 fprintf (file, "%*sselected architecture %s\n", indent, "",
33434 ptr->x_arm_arch_string);
33436 if (ptr->x_arm_cpu_string)
33437 fprintf (file, "%*sselected CPU %s\n", indent, "",
33438 ptr->x_arm_cpu_string);
33440 if (ptr->x_arm_tune_string)
33441 fprintf (file, "%*sselected tune %s\n", indent, "",
33442 ptr->x_arm_tune_string);
33444 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33447 /* Hook to determine if one function can safely inline another. */
33449 static bool
33450 arm_can_inline_p (tree caller, tree callee)
33452 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33453 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33454 bool can_inline = true;
33456 struct cl_target_option *caller_opts
33457 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33458 : target_option_default_node);
33460 struct cl_target_option *callee_opts
33461 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33462 : target_option_default_node);
33464 if (callee_opts == caller_opts)
33465 return true;
33467 /* Callee's ISA features should be a subset of the caller's. */
33468 struct arm_build_target caller_target;
33469 struct arm_build_target callee_target;
33470 caller_target.isa = sbitmap_alloc (isa_num_bits);
33471 callee_target.isa = sbitmap_alloc (isa_num_bits);
33473 arm_configure_build_target (&caller_target, caller_opts, false);
33474 arm_configure_build_target (&callee_target, callee_opts, false);
33475 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33476 can_inline = false;
33478 sbitmap_free (caller_target.isa);
33479 sbitmap_free (callee_target.isa);
33481 /* OK to inline between different modes.
33482 Function with mode specific instructions, e.g using asm,
33483 must be explicitly protected with noinline. */
33484 return can_inline;
33487 /* Hook to fix function's alignment affected by target attribute. */
33489 static void
33490 arm_relayout_function (tree fndecl)
33492 if (DECL_USER_ALIGN (fndecl))
33493 return;
33495 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33497 if (!callee_tree)
33498 callee_tree = target_option_default_node;
33500 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33501 SET_DECL_ALIGN
33502 (fndecl,
33503 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33506 /* Inner function to process the attribute((target(...))), take an argument and
33507 set the current options from the argument. If we have a list, recursively
33508 go over the list. */
33510 static bool
33511 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33513 if (TREE_CODE (args) == TREE_LIST)
33515 bool ret = true;
33517 for (; args; args = TREE_CHAIN (args))
33518 if (TREE_VALUE (args)
33519 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33520 ret = false;
33521 return ret;
33524 else if (TREE_CODE (args) != STRING_CST)
33526 error ("attribute %<target%> argument not a string");
33527 return false;
33530 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33531 char *q;
33533 while ((q = strtok (argstr, ",")) != NULL)
33535 argstr = NULL;
33536 if (!strcmp (q, "thumb"))
33538 opts->x_target_flags |= MASK_THUMB;
33539 if (TARGET_FDPIC && !arm_arch_thumb2)
33540 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33543 else if (!strcmp (q, "arm"))
33544 opts->x_target_flags &= ~MASK_THUMB;
33546 else if (!strcmp (q, "general-regs-only"))
33547 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33549 else if (startswith (q, "fpu="))
33551 int fpu_index;
33552 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33553 &fpu_index, CL_TARGET))
33555 error ("invalid fpu for target attribute or pragma %qs", q);
33556 return false;
33558 if (fpu_index == TARGET_FPU_auto)
33560 /* This doesn't really make sense until we support
33561 general dynamic selection of the architecture and all
33562 sub-features. */
33563 sorry ("auto fpu selection not currently permitted here");
33564 return false;
33566 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33568 else if (startswith (q, "arch="))
33570 char *arch = q + 5;
33571 const arch_option *arm_selected_arch
33572 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33574 if (!arm_selected_arch)
33576 error ("invalid architecture for target attribute or pragma %qs",
33578 return false;
33581 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33583 else if (q[0] == '+')
33585 opts->x_arm_arch_string
33586 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33588 else
33590 error ("unknown target attribute or pragma %qs", q);
33591 return false;
33595 return true;
33598 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33600 tree
33601 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33602 struct gcc_options *opts_set)
33604 struct cl_target_option cl_opts;
33606 if (!arm_valid_target_attribute_rec (args, opts))
33607 return NULL_TREE;
33609 cl_target_option_save (&cl_opts, opts, opts_set);
33610 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33611 arm_option_check_internal (opts);
33612 /* Do any overrides, such as global options arch=xxx.
33613 We do this since arm_active_target was overridden. */
33614 arm_option_reconfigure_globals ();
33615 arm_options_perform_arch_sanity_checks ();
33616 arm_option_override_internal (opts, opts_set);
33618 return build_target_option_node (opts, opts_set);
33621 static void
33622 add_attribute (const char * mode, tree *attributes)
33624 size_t len = strlen (mode);
33625 tree value = build_string (len, mode);
33627 TREE_TYPE (value) = build_array_type (char_type_node,
33628 build_index_type (size_int (len)));
33630 *attributes = tree_cons (get_identifier ("target"),
33631 build_tree_list (NULL_TREE, value),
33632 *attributes);
33635 /* For testing. Insert thumb or arm modes alternatively on functions. */
33637 static void
33638 arm_insert_attributes (tree fndecl, tree * attributes)
33640 const char *mode;
33642 if (! TARGET_FLIP_THUMB)
33643 return;
33645 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33646 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33647 return;
33649 /* Nested definitions must inherit mode. */
33650 if (current_function_decl)
33652 mode = TARGET_THUMB ? "thumb" : "arm";
33653 add_attribute (mode, attributes);
33654 return;
33657 /* If there is already a setting don't change it. */
33658 if (lookup_attribute ("target", *attributes) != NULL)
33659 return;
33661 mode = thumb_flipper ? "thumb" : "arm";
33662 add_attribute (mode, attributes);
33664 thumb_flipper = !thumb_flipper;
33667 /* Hook to validate attribute((target("string"))). */
33669 static bool
33670 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33671 tree args, int ARG_UNUSED (flags))
33673 bool ret = true;
33674 struct gcc_options func_options, func_options_set;
33675 tree cur_tree, new_optimize;
33676 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33678 /* Get the optimization options of the current function. */
33679 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33681 /* If the function changed the optimization levels as well as setting target
33682 options, start with the optimizations specified. */
33683 if (!func_optimize)
33684 func_optimize = optimization_default_node;
33686 /* Init func_options. */
33687 memset (&func_options, 0, sizeof (func_options));
33688 init_options_struct (&func_options, NULL);
33689 lang_hooks.init_options_struct (&func_options);
33690 memset (&func_options_set, 0, sizeof (func_options_set));
33692 /* Initialize func_options to the defaults. */
33693 cl_optimization_restore (&func_options, &func_options_set,
33694 TREE_OPTIMIZATION (func_optimize));
33696 cl_target_option_restore (&func_options, &func_options_set,
33697 TREE_TARGET_OPTION (target_option_default_node));
33699 /* Set func_options flags with new target mode. */
33700 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33701 &func_options_set);
33703 if (cur_tree == NULL_TREE)
33704 ret = false;
33706 new_optimize = build_optimization_node (&func_options, &func_options_set);
33708 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33710 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33712 return ret;
33715 /* Match an ISA feature bitmap to a named FPU. We always use the
33716 first entry that exactly matches the feature set, so that we
33717 effectively canonicalize the FPU name for the assembler. */
33718 static const char*
33719 arm_identify_fpu_from_isa (sbitmap isa)
33721 auto_sbitmap fpubits (isa_num_bits);
33722 auto_sbitmap cand_fpubits (isa_num_bits);
33724 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33726 /* If there are no ISA feature bits relating to the FPU, we must be
33727 doing soft-float. */
33728 if (bitmap_empty_p (fpubits))
33729 return "softvfp";
33731 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33733 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33734 if (bitmap_equal_p (fpubits, cand_fpubits))
33735 return all_fpus[i].name;
33737 /* We must find an entry, or things have gone wrong. */
33738 gcc_unreachable ();
33741 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33742 by the function fndecl. */
33743 void
33744 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33746 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33748 struct cl_target_option *targ_options;
33749 if (target_parts)
33750 targ_options = TREE_TARGET_OPTION (target_parts);
33751 else
33752 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33753 gcc_assert (targ_options);
33755 arm_print_asm_arch_directives (stream, targ_options);
33757 fprintf (stream, "\t.syntax unified\n");
33759 if (TARGET_THUMB)
33761 if (is_called_in_ARM_mode (decl)
33762 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33763 && cfun->is_thunk))
33764 fprintf (stream, "\t.code 32\n");
33765 else if (TARGET_THUMB1)
33766 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33767 else
33768 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33770 else
33771 fprintf (stream, "\t.arm\n");
33773 if (TARGET_POKE_FUNCTION_NAME)
33774 arm_poke_function_name (stream, (const char *) name);
33777 /* If MEM is in the form of [base+offset], extract the two parts
33778 of address and set to BASE and OFFSET, otherwise return false
33779 after clearing BASE and OFFSET. */
33781 static bool
33782 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33784 rtx addr;
33786 gcc_assert (MEM_P (mem));
33788 addr = XEXP (mem, 0);
33790 /* Strip off const from addresses like (const (addr)). */
33791 if (GET_CODE (addr) == CONST)
33792 addr = XEXP (addr, 0);
33794 if (REG_P (addr))
33796 *base = addr;
33797 *offset = const0_rtx;
33798 return true;
33801 if (GET_CODE (addr) == PLUS
33802 && GET_CODE (XEXP (addr, 0)) == REG
33803 && CONST_INT_P (XEXP (addr, 1)))
33805 *base = XEXP (addr, 0);
33806 *offset = XEXP (addr, 1);
33807 return true;
33810 *base = NULL_RTX;
33811 *offset = NULL_RTX;
33813 return false;
33816 /* If INSN is a load or store of address in the form of [base+offset],
33817 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33818 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33819 otherwise return FALSE. */
33821 static bool
33822 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33824 rtx x, dest, src;
33826 gcc_assert (INSN_P (insn));
33827 x = PATTERN (insn);
33828 if (GET_CODE (x) != SET)
33829 return false;
33831 src = SET_SRC (x);
33832 dest = SET_DEST (x);
33833 if (REG_P (src) && MEM_P (dest))
33835 *is_load = false;
33836 extract_base_offset_in_addr (dest, base, offset);
33838 else if (MEM_P (src) && REG_P (dest))
33840 *is_load = true;
33841 extract_base_offset_in_addr (src, base, offset);
33843 else
33844 return false;
33846 return (*base != NULL_RTX && *offset != NULL_RTX);
33849 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33851 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33852 and PRI are only calculated for these instructions. For other instruction,
33853 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33854 instruction fusion can be supported by returning different priorities.
33856 It's important that irrelevant instructions get the largest FUSION_PRI. */
33858 static void
33859 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33860 int *fusion_pri, int *pri)
33862 int tmp, off_val;
33863 bool is_load;
33864 rtx base, offset;
33866 gcc_assert (INSN_P (insn));
33868 tmp = max_pri - 1;
33869 if (!fusion_load_store (insn, &base, &offset, &is_load))
33871 *pri = tmp;
33872 *fusion_pri = tmp;
33873 return;
33876 /* Load goes first. */
33877 if (is_load)
33878 *fusion_pri = tmp - 1;
33879 else
33880 *fusion_pri = tmp - 2;
33882 tmp /= 2;
33884 /* INSN with smaller base register goes first. */
33885 tmp -= ((REGNO (base) & 0xff) << 20);
33887 /* INSN with smaller offset goes first. */
33888 off_val = (int)(INTVAL (offset));
33889 if (off_val >= 0)
33890 tmp -= (off_val & 0xfffff);
33891 else
33892 tmp += ((- off_val) & 0xfffff);
33894 *pri = tmp;
33895 return;
33899 /* Construct and return a PARALLEL RTX vector with elements numbering the
33900 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33901 the vector - from the perspective of the architecture. This does not
33902 line up with GCC's perspective on lane numbers, so we end up with
33903 different masks depending on our target endian-ness. The diagram
33904 below may help. We must draw the distinction when building masks
33905 which select one half of the vector. An instruction selecting
33906 architectural low-lanes for a big-endian target, must be described using
33907 a mask selecting GCC high-lanes.
33909 Big-Endian Little-Endian
33911 GCC 0 1 2 3 3 2 1 0
33912 | x | x | x | x | | x | x | x | x |
33913 Architecture 3 2 1 0 3 2 1 0
33915 Low Mask: { 2, 3 } { 0, 1 }
33916 High Mask: { 0, 1 } { 2, 3 }
33920 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33922 int nunits = GET_MODE_NUNITS (mode);
33923 rtvec v = rtvec_alloc (nunits / 2);
33924 int high_base = nunits / 2;
33925 int low_base = 0;
33926 int base;
33927 rtx t1;
33928 int i;
33930 if (BYTES_BIG_ENDIAN)
33931 base = high ? low_base : high_base;
33932 else
33933 base = high ? high_base : low_base;
33935 for (i = 0; i < nunits / 2; i++)
33936 RTVEC_ELT (v, i) = GEN_INT (base + i);
33938 t1 = gen_rtx_PARALLEL (mode, v);
33939 return t1;
33942 /* Check OP for validity as a PARALLEL RTX vector with elements
33943 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33944 from the perspective of the architecture. See the diagram above
33945 arm_simd_vect_par_cnst_half_p for more details. */
33947 bool
33948 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33949 bool high)
33951 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33952 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33953 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33954 int i = 0;
33956 if (!VECTOR_MODE_P (mode))
33957 return false;
33959 if (count_op != count_ideal)
33960 return false;
33962 for (i = 0; i < count_ideal; i++)
33964 rtx elt_op = XVECEXP (op, 0, i);
33965 rtx elt_ideal = XVECEXP (ideal, 0, i);
33967 if (!CONST_INT_P (elt_op)
33968 || INTVAL (elt_ideal) != INTVAL (elt_op))
33969 return false;
33971 return true;
33974 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33975 in Thumb1. */
33976 static bool
33977 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33978 const_tree)
33980 /* For now, we punt and not handle this for TARGET_THUMB1. */
33981 if (vcall_offset && TARGET_THUMB1)
33982 return false;
33984 /* Otherwise ok. */
33985 return true;
33988 /* Generate RTL for a conditional branch with rtx comparison CODE in
33989 mode CC_MODE. The destination of the unlikely conditional branch
33990 is LABEL_REF. */
33992 void
33993 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33994 rtx label_ref)
33996 rtx x;
33997 x = gen_rtx_fmt_ee (code, VOIDmode,
33998 gen_rtx_REG (cc_mode, CC_REGNUM),
33999 const0_rtx);
34001 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34002 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34003 pc_rtx);
34004 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34007 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34009 For pure-code sections there is no letter code for this attribute, so
34010 output all the section flags numerically when this is needed. */
34012 static bool
34013 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34016 if (flags & SECTION_ARM_PURECODE)
34018 *num = 0x20000000;
34020 if (!(flags & SECTION_DEBUG))
34021 *num |= 0x2;
34022 if (flags & SECTION_EXCLUDE)
34023 *num |= 0x80000000;
34024 if (flags & SECTION_WRITE)
34025 *num |= 0x1;
34026 if (flags & SECTION_CODE)
34027 *num |= 0x4;
34028 if (flags & SECTION_MERGE)
34029 *num |= 0x10;
34030 if (flags & SECTION_STRINGS)
34031 *num |= 0x20;
34032 if (flags & SECTION_TLS)
34033 *num |= 0x400;
34034 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34035 *num |= 0x200;
34037 return true;
34040 return false;
34043 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34045 If pure-code is passed as an option, make sure all functions are in
34046 sections that have the SHF_ARM_PURECODE attribute. */
34048 static section *
34049 arm_function_section (tree decl, enum node_frequency freq,
34050 bool startup, bool exit)
34052 const char * section_name;
34053 section * sec;
34055 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34056 return default_function_section (decl, freq, startup, exit);
34058 if (!target_pure_code)
34059 return default_function_section (decl, freq, startup, exit);
34062 section_name = DECL_SECTION_NAME (decl);
34064 /* If a function is not in a named section then it falls under the 'default'
34065 text section, also known as '.text'. We can preserve previous behavior as
34066 the default text section already has the SHF_ARM_PURECODE section
34067 attribute. */
34068 if (!section_name)
34070 section *default_sec = default_function_section (decl, freq, startup,
34071 exit);
34073 /* If default_sec is not null, then it must be a special section like for
34074 example .text.startup. We set the pure-code attribute and return the
34075 same section to preserve existing behavior. */
34076 if (default_sec)
34077 default_sec->common.flags |= SECTION_ARM_PURECODE;
34078 return default_sec;
34081 /* Otherwise look whether a section has already been created with
34082 'section_name'. */
34083 sec = get_named_section (decl, section_name, 0);
34084 if (!sec)
34085 /* If that is not the case passing NULL as the section's name to
34086 'get_named_section' will create a section with the declaration's
34087 section name. */
34088 sec = get_named_section (decl, NULL, 0);
34090 /* Set the SHF_ARM_PURECODE attribute. */
34091 sec->common.flags |= SECTION_ARM_PURECODE;
34093 return sec;
34096 /* Implements the TARGET_SECTION_FLAGS hook.
34098 If DECL is a function declaration and pure-code is passed as an option
34099 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34100 section's name and RELOC indicates whether the declarations initializer may
34101 contain runtime relocations. */
34103 static unsigned int
34104 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34106 unsigned int flags = default_section_type_flags (decl, name, reloc);
34108 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34109 flags |= SECTION_ARM_PURECODE;
34111 return flags;
34114 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34116 static void
34117 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34118 rtx op0, rtx op1,
34119 rtx *quot_p, rtx *rem_p)
34121 if (mode == SImode)
34122 gcc_assert (!TARGET_IDIV);
34124 scalar_int_mode libval_mode
34125 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34127 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34128 libval_mode, op0, mode, op1, mode);
34130 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34131 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34132 GET_MODE_SIZE (mode));
34134 gcc_assert (quotient);
34135 gcc_assert (remainder);
34137 *quot_p = quotient;
34138 *rem_p = remainder;
34141 /* This function checks for the availability of the coprocessor builtin passed
34142 in BUILTIN for the current target. Returns true if it is available and
34143 false otherwise. If a BUILTIN is passed for which this function has not
34144 been implemented it will cause an exception. */
34146 bool
34147 arm_coproc_builtin_available (enum unspecv builtin)
34149 /* None of these builtins are available in Thumb mode if the target only
34150 supports Thumb-1. */
34151 if (TARGET_THUMB1)
34152 return false;
34154 switch (builtin)
34156 case VUNSPEC_CDP:
34157 case VUNSPEC_LDC:
34158 case VUNSPEC_LDCL:
34159 case VUNSPEC_STC:
34160 case VUNSPEC_STCL:
34161 case VUNSPEC_MCR:
34162 case VUNSPEC_MRC:
34163 if (arm_arch4)
34164 return true;
34165 break;
34166 case VUNSPEC_CDP2:
34167 case VUNSPEC_LDC2:
34168 case VUNSPEC_LDC2L:
34169 case VUNSPEC_STC2:
34170 case VUNSPEC_STC2L:
34171 case VUNSPEC_MCR2:
34172 case VUNSPEC_MRC2:
34173 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34174 ARMv8-{A,M}. */
34175 if (arm_arch5t)
34176 return true;
34177 break;
34178 case VUNSPEC_MCRR:
34179 case VUNSPEC_MRRC:
34180 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34181 ARMv8-{A,M}. */
34182 if (arm_arch6 || arm_arch5te)
34183 return true;
34184 break;
34185 case VUNSPEC_MCRR2:
34186 case VUNSPEC_MRRC2:
34187 if (arm_arch6)
34188 return true;
34189 break;
34190 default:
34191 gcc_unreachable ();
34193 return false;
34196 /* This function returns true if OP is a valid memory operand for the ldc and
34197 stc coprocessor instructions and false otherwise. */
34199 bool
34200 arm_coproc_ldc_stc_legitimate_address (rtx op)
34202 HOST_WIDE_INT range;
34203 /* Has to be a memory operand. */
34204 if (!MEM_P (op))
34205 return false;
34207 op = XEXP (op, 0);
34209 /* We accept registers. */
34210 if (REG_P (op))
34211 return true;
34213 switch GET_CODE (op)
34215 case PLUS:
34217 /* Or registers with an offset. */
34218 if (!REG_P (XEXP (op, 0)))
34219 return false;
34221 op = XEXP (op, 1);
34223 /* The offset must be an immediate though. */
34224 if (!CONST_INT_P (op))
34225 return false;
34227 range = INTVAL (op);
34229 /* Within the range of [-1020,1020]. */
34230 if (!IN_RANGE (range, -1020, 1020))
34231 return false;
34233 /* And a multiple of 4. */
34234 return (range % 4) == 0;
34236 case PRE_INC:
34237 case POST_INC:
34238 case PRE_DEC:
34239 case POST_DEC:
34240 return REG_P (XEXP (op, 0));
34241 default:
34242 gcc_unreachable ();
34244 return false;
34247 /* Return the diagnostic message string if conversion from FROMTYPE to
34248 TOTYPE is not allowed, NULL otherwise. */
34250 static const char *
34251 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34253 if (element_mode (fromtype) != element_mode (totype))
34255 /* Do no allow conversions to/from BFmode scalar types. */
34256 if (TYPE_MODE (fromtype) == BFmode)
34257 return N_("invalid conversion from type %<bfloat16_t%>");
34258 if (TYPE_MODE (totype) == BFmode)
34259 return N_("invalid conversion to type %<bfloat16_t%>");
34262 /* Conversion allowed. */
34263 return NULL;
34266 /* Return the diagnostic message string if the unary operation OP is
34267 not permitted on TYPE, NULL otherwise. */
34269 static const char *
34270 arm_invalid_unary_op (int op, const_tree type)
34272 /* Reject all single-operand operations on BFmode except for &. */
34273 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34274 return N_("operation not permitted on type %<bfloat16_t%>");
34276 /* Operation allowed. */
34277 return NULL;
34280 /* Return the diagnostic message string if the binary operation OP is
34281 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34283 static const char *
34284 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34285 const_tree type2)
34287 /* Reject all 2-operand operations on BFmode. */
34288 if (element_mode (type1) == BFmode
34289 || element_mode (type2) == BFmode)
34290 return N_("operation not permitted on type %<bfloat16_t%>");
34292 /* Operation allowed. */
34293 return NULL;
34296 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34298 In VFPv1, VFP registers could only be accessed in the mode they were
34299 set, so subregs would be invalid there. However, we don't support
34300 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34302 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34303 VFP registers in little-endian order. We can't describe that accurately to
34304 GCC, so avoid taking subregs of such values.
34306 The only exception is going from a 128-bit to a 64-bit type. In that
34307 case the data layout happens to be consistent for big-endian, so we
34308 explicitly allow that case. */
34310 static bool
34311 arm_can_change_mode_class (machine_mode from, machine_mode to,
34312 reg_class_t rclass)
34314 if (TARGET_BIG_END
34315 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34316 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34317 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34318 && reg_classes_intersect_p (VFP_REGS, rclass))
34319 return false;
34320 return true;
34323 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34324 strcpy from constants will be faster. */
34326 static HOST_WIDE_INT
34327 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34329 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34330 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34331 return MAX (align, BITS_PER_WORD * factor);
34332 return align;
34335 /* Emit a speculation barrier on target architectures that do not have
34336 DSB/ISB directly. Such systems probably don't need a barrier
34337 themselves, but if the code is ever run on a later architecture, it
34338 might become a problem. */
34339 void
34340 arm_emit_speculation_barrier_function ()
34342 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34345 /* Have we recorded an explicit access to the Q bit of APSR?. */
34346 bool
34347 arm_q_bit_access (void)
34349 if (cfun && cfun->decl)
34350 return lookup_attribute ("acle qbit",
34351 DECL_ATTRIBUTES (cfun->decl));
34352 return true;
34355 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34356 bool
34357 arm_ge_bits_access (void)
34359 if (cfun && cfun->decl)
34360 return lookup_attribute ("acle gebits",
34361 DECL_ATTRIBUTES (cfun->decl));
34362 return true;
34365 /* NULL if insn INSN is valid within a low-overhead loop.
34366 Otherwise return why doloop cannot be applied. */
34368 static const char *
34369 arm_invalid_within_doloop (const rtx_insn *insn)
34371 if (!TARGET_HAVE_LOB)
34372 return default_invalid_within_doloop (insn);
34374 if (CALL_P (insn))
34375 return "Function call in the loop.";
34377 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34378 return "LR is used inside loop.";
34380 return NULL;
34383 bool
34384 arm_target_insn_ok_for_lob (rtx insn)
34386 basic_block bb = BLOCK_FOR_INSN (insn);
34387 /* Make sure the basic block of the target insn is a simple latch
34388 having as single predecessor and successor the body of the loop
34389 itself. Only simple loops with a single basic block as body are
34390 supported for 'low over head loop' making sure that LE target is
34391 above LE itself in the generated code. */
34393 return single_succ_p (bb)
34394 && single_pred_p (bb)
34395 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34396 && contains_no_active_insn_p (bb);
34399 #if CHECKING_P
34400 namespace selftest {
34402 /* Scan the static data tables generated by parsecpu.awk looking for
34403 potential issues with the data. We primarily check for
34404 inconsistencies in the option extensions at present (extensions
34405 that duplicate others but aren't marked as aliases). Furthermore,
34406 for correct canonicalization later options must never be a subset
34407 of an earlier option. Any extension should also only specify other
34408 feature bits and never an architecture bit. The architecture is inferred
34409 from the declaration of the extension. */
34410 static void
34411 arm_test_cpu_arch_data (void)
34413 const arch_option *arch;
34414 const cpu_option *cpu;
34415 auto_sbitmap target_isa (isa_num_bits);
34416 auto_sbitmap isa1 (isa_num_bits);
34417 auto_sbitmap isa2 (isa_num_bits);
34419 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34421 const cpu_arch_extension *ext1, *ext2;
34423 if (arch->common.extensions == NULL)
34424 continue;
34426 arm_initialize_isa (target_isa, arch->common.isa_bits);
34428 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34430 if (ext1->alias)
34431 continue;
34433 arm_initialize_isa (isa1, ext1->isa_bits);
34434 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34436 if (ext2->alias || ext1->remove != ext2->remove)
34437 continue;
34439 arm_initialize_isa (isa2, ext2->isa_bits);
34440 /* If the option is a subset of the parent option, it doesn't
34441 add anything and so isn't useful. */
34442 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34444 /* If the extension specifies any architectural bits then
34445 disallow it. Extensions should only specify feature bits. */
34446 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34451 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34453 const cpu_arch_extension *ext1, *ext2;
34455 if (cpu->common.extensions == NULL)
34456 continue;
34458 arm_initialize_isa (target_isa, arch->common.isa_bits);
34460 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34462 if (ext1->alias)
34463 continue;
34465 arm_initialize_isa (isa1, ext1->isa_bits);
34466 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34468 if (ext2->alias || ext1->remove != ext2->remove)
34469 continue;
34471 arm_initialize_isa (isa2, ext2->isa_bits);
34472 /* If the option is a subset of the parent option, it doesn't
34473 add anything and so isn't useful. */
34474 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34476 /* If the extension specifies any architectural bits then
34477 disallow it. Extensions should only specify feature bits. */
34478 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34484 /* Scan the static data tables generated by parsecpu.awk looking for
34485 potential issues with the data. Here we check for consistency between the
34486 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34487 a feature bit that is not defined by any FPU flag. */
34488 static void
34489 arm_test_fpu_data (void)
34491 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34492 auto_sbitmap fpubits (isa_num_bits);
34493 auto_sbitmap tmpset (isa_num_bits);
34495 static const enum isa_feature fpu_bitlist_internal[]
34496 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34497 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34499 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34501 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34502 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34503 bitmap_clear (isa_all_fpubits_internal);
34504 bitmap_copy (isa_all_fpubits_internal, tmpset);
34507 if (!bitmap_empty_p (isa_all_fpubits_internal))
34509 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34510 " group that are not defined by any FPU.\n"
34511 " Check your arm-cpus.in.\n");
34512 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34516 static void
34517 arm_run_selftests (void)
34519 arm_test_cpu_arch_data ();
34520 arm_test_fpu_data ();
34522 } /* Namespace selftest. */
34524 #undef TARGET_RUN_TARGET_SELFTESTS
34525 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34526 #endif /* CHECKING_P */
34528 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34529 global variable based guard use the default else
34530 return a null tree. */
34531 static tree
34532 arm_stack_protect_guard (void)
34534 if (arm_stack_protector_guard == SSP_GLOBAL)
34535 return default_stack_protect_guard ();
34537 return NULL_TREE;
34540 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34541 Unlike the arm version, we do NOT implement asm flag outputs. */
34543 rtx_insn *
34544 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34545 vec<machine_mode> & /*input_modes*/,
34546 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34547 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34549 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34550 if (startswith (constraints[i], "=@cc"))
34552 sorry ("%<asm%> flags not supported in thumb1 mode");
34553 break;
34555 return NULL;
34558 /* Generate code to enable conditional branches in functions over 1 MiB.
34559 Parameters are:
34560 operands: is the operands list of the asm insn (see arm_cond_branch or
34561 arm_cond_branch_reversed).
34562 pos_label: is an index into the operands array where operands[pos_label] is
34563 the asm label of the final jump destination.
34564 dest: is a string which is used to generate the asm label of the intermediate
34565 destination
34566 branch_format: is a string denoting the intermediate branch format, e.g.
34567 "beq", "bne", etc. */
34569 const char *
34570 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34571 const char * branch_format)
34573 rtx_code_label * tmp_label = gen_label_rtx ();
34574 char label_buf[256];
34575 char buffer[128];
34576 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34577 CODE_LABEL_NUMBER (tmp_label));
34578 const char *label_ptr = arm_strip_name_encoding (label_buf);
34579 rtx dest_label = operands[pos_label];
34580 operands[pos_label] = tmp_label;
34582 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34583 output_asm_insn (buffer, operands);
34585 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34586 operands[pos_label] = dest_label;
34587 output_asm_insn (buffer, operands);
34588 return "";
34591 /* If given mode matches, load from memory to LO_REGS.
34592 (i.e [Rn], Rn <= LO_REGS). */
34593 enum reg_class
34594 arm_mode_base_reg_class (machine_mode mode)
34596 if (TARGET_HAVE_MVE
34597 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34598 return LO_REGS;
34600 return MODE_BASE_REG_REG_CLASS (mode);
34603 struct gcc_target targetm = TARGET_INITIALIZER;
34605 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34607 opt_machine_mode
34608 arm_get_mask_mode (machine_mode mode)
34610 if (TARGET_HAVE_MVE)
34611 return arm_mode_to_pred_mode (mode);
34613 return default_get_mask_mode (mode);
34616 #include "gt-arm.h"