rename DBX_REGISTER_NUMBER to DEBUGGER_REGNO
[official-gcc.git] / gcc / config / arm / arm.cc
blob1a375367ac72a51e9811ba89777a207da8ab6f2d
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 /* Forward definitions of types. */
80 typedef struct minipool_node Mnode;
81 typedef struct minipool_fixup Mfix;
83 void (*arm_lang_output_object_attributes_hook)(void);
85 struct four_ints
87 int i[4];
90 /* Forward function declarations. */
91 static bool arm_const_not_ok_for_debug_p (rtx);
92 static int arm_needs_doubleword_align (machine_mode, const_tree);
93 static int arm_compute_static_chain_stack_bytes (void);
94 static arm_stack_offsets *arm_get_frame_offsets (void);
95 static void arm_compute_frame_layout (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
98 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
99 static unsigned bit_count (unsigned long);
100 static unsigned bitmap_popcount (const sbitmap);
101 static int arm_address_register_rtx_p (rtx, int);
102 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
103 static bool is_called_in_ARM_mode (tree);
104 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
105 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
106 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
107 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
108 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
109 inline static int thumb1_index_register_rtx_p (rtx, int);
110 static int thumb_far_jump_used_p (void);
111 static bool thumb_force_lr_save (void);
112 static unsigned arm_size_return_regs (void);
113 static bool arm_assemble_integer (rtx, unsigned int, int);
114 static void arm_print_operand (FILE *, rtx, int);
115 static void arm_print_operand_address (FILE *, machine_mode, rtx);
116 static bool arm_print_operand_punct_valid_p (unsigned char code);
117 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
118 static arm_cc get_arm_condition_code (rtx);
119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
120 static const char *output_multi_immediate (rtx *, const char *, const char *,
121 int, HOST_WIDE_INT);
122 static const char *shift_op (rtx, HOST_WIDE_INT *);
123 static struct machine_function *arm_init_machine_status (void);
124 static void thumb_exit (FILE *, int);
125 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
126 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
127 static Mnode *add_minipool_forward_ref (Mfix *);
128 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_backward_ref (Mfix *);
130 static void assign_minipool_offsets (Mfix *);
131 static void arm_print_value (FILE *, rtx);
132 static void dump_minipool (rtx_insn *);
133 static int arm_barrier_cost (rtx_insn *);
134 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
135 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
136 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
137 machine_mode, rtx);
138 static void arm_reorg (void);
139 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
141 static unsigned long arm_compute_save_core_reg_mask (void);
142 static unsigned long arm_isr_value (tree);
143 static unsigned long arm_compute_func_type (void);
144 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
145 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
148 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
149 #endif
150 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
151 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
152 static void arm_output_function_epilogue (FILE *);
153 static void arm_output_function_prologue (FILE *);
154 static int arm_comp_type_attributes (const_tree, const_tree);
155 static void arm_set_default_type_attributes (tree);
156 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
157 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
158 static int optimal_immediate_sequence (enum rtx_code code,
159 unsigned HOST_WIDE_INT val,
160 struct four_ints *return_sequence);
161 static int optimal_immediate_sequence_1 (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence,
164 int i);
165 static int arm_get_strip_length (int);
166 static bool arm_function_ok_for_sibcall (tree, tree);
167 static machine_mode arm_promote_function_mode (const_tree,
168 machine_mode, int *,
169 const_tree, int);
170 static bool arm_return_in_memory (const_tree, const_tree);
171 static rtx arm_function_value (const_tree, const_tree, bool);
172 static rtx arm_libcall_value_1 (machine_mode);
173 static rtx arm_libcall_value (machine_mode, const_rtx);
174 static bool arm_function_value_regno_p (const unsigned int);
175 static void arm_internal_label (FILE *, const char *, unsigned long);
176 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
177 tree);
178 static bool arm_have_conditional_execution (void);
179 static bool arm_cannot_force_const_mem (machine_mode, rtx);
180 static bool arm_legitimate_constant_p (machine_mode, rtx);
181 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
182 static int arm_insn_cost (rtx_insn *, bool);
183 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
184 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
185 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
186 static void emit_constant_insn (rtx cond, rtx pattern);
187 static rtx_insn *emit_set_insn (rtx, rtx);
188 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static void arm_emit_multi_reg_pop (unsigned long);
191 static int vfp_emit_fstmd (int, int);
192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
193 static int arm_arg_partial_bytes (cumulative_args_t,
194 const function_arg_info &);
195 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
196 static void arm_function_arg_advance (cumulative_args_t,
197 const function_arg_info &);
198 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
201 const_tree);
202 static rtx aapcs_libcall_value (machine_mode);
203 static int aapcs_select_return_coproc (const_tree, const_tree);
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
208 #endif
209 #ifndef ARM_PE
210 static void arm_encode_section_info (tree, rtx, int);
211 #endif
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree, tree *);
217 static void arm_setup_incoming_varargs (cumulative_args_t,
218 const function_arg_info &, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t,
220 const function_arg_info &);
221 static bool arm_promote_prototypes (const_tree);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree);
225 static bool arm_must_pass_in_stack (const function_arg_info &);
226 static bool arm_return_in_memory (const_tree, const_tree);
227 #if ARM_UNWIND_INFO
228 static void arm_unwind_emit (FILE *, rtx_insn *);
229 static bool arm_output_ttype (rtx);
230 static void arm_asm_emit_except_personality (rtx);
231 #endif
232 static void arm_asm_init_sections (void);
233 static rtx arm_dwarf_register_span (rtx);
235 static tree arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree arm_get_cookie_size (tree);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree, rtx);
247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
248 static void arm_option_override (void);
249 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
250 struct cl_target_option *);
251 static void arm_override_options_after_change (void);
252 static void arm_option_print (FILE *, int, struct cl_target_option *);
253 static void arm_set_current_function (tree);
254 static bool arm_can_inline_p (tree, tree);
255 static void arm_relayout_function (tree);
256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
258 static bool arm_sched_can_speculate_insn (rtx_insn *);
259 static bool arm_macro_fusion_p (void);
260 static bool arm_cannot_copy_insn_p (rtx_insn *);
261 static int arm_issue_rate (void);
262 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
266 static bool arm_output_addr_const_extra (FILE *, rtx);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree);
269 static tree arm_promoted_type (const_tree t);
270 static bool arm_scalar_mode_supported_p (scalar_mode);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx, tree, rtx);
275 static rtx arm_trampoline_adjust_address (rtx);
276 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool arm_array_mode_supported_p (machine_mode,
281 unsigned HOST_WIDE_INT);
282 static machine_mode arm_preferred_simd_mode (scalar_mode);
283 static bool arm_class_likely_spilled_p (reg_class_t);
284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 const_tree type,
288 int misalignment,
289 bool is_packed);
290 static void arm_conditional_register_usage (void);
291 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
293 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
300 rtx, const vec_perm_indices &);
302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
305 tree vectype,
306 int misalign ATTRIBUTE_UNUSED);
308 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
309 bool op0_preserve_value);
310 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
312 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
313 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
314 const_tree);
315 static section *arm_function_section (tree, enum node_frequency, bool, bool);
316 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
317 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
318 int reloc);
319 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
320 static opt_scalar_float_mode arm_floatn_mode (int, bool);
321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
323 static bool arm_modes_tieable_p (machine_mode, machine_mode);
324 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
325 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
326 vec<machine_mode> &,
327 vec<const char *> &, vec<rtx> &,
328 HARD_REG_SET &, location_t);
329 static const char *arm_identify_fpu_from_isa (sbitmap);
331 /* Table of machine attributes. */
332 static const struct attribute_spec arm_attribute_table[] =
334 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
335 affects_type_identity, handler, exclude } */
336 /* Function calls made to this symbol must be done indirectly, because
337 it may lie outside of the 26 bit addressing range of a normal function
338 call. */
339 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
340 /* Whereas these functions are always known to reside within the 26 bit
341 addressing range. */
342 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Specify the procedure call conventions for a function. */
344 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
345 NULL },
346 /* Interrupt Service Routines have special prologue and epilogue requirements. */
347 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "naked", 0, 0, true, false, false, false,
352 arm_handle_fndecl_attribute, NULL },
353 #ifdef ARM_PE
354 /* ARM/PE has three new attributes:
355 interfacearm - ?
356 dllexport - for exporting a function/variable that will live in a dll
357 dllimport - for importing a function/variable from a dll
359 Microsoft allows multiple declspecs in one __declspec, separating
360 them with spaces. We do NOT support this. Instead, use __declspec
361 multiple times.
363 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
364 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
365 { "interfacearm", 0, 0, true, false, false, false,
366 arm_handle_fndecl_attribute, NULL },
367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "notshared", 0, 0, false, true, false, false,
373 arm_handle_notshared_attribute, NULL },
374 #endif
375 /* ARMv8-M Security Extensions support. */
376 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
377 arm_handle_cmse_nonsecure_entry, NULL },
378 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
379 arm_handle_cmse_nonsecure_call, NULL },
380 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
381 { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
388 #endif
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
440 #undef TARGET_OPTION_RESTORE
441 #define TARGET_OPTION_RESTORE arm_option_restore
443 #undef TARGET_OPTION_PRINT
444 #define TARGET_OPTION_PRINT arm_option_print
446 #undef TARGET_COMP_TYPE_ATTRIBUTES
447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
452 #undef TARGET_SCHED_MACRO_FUSION_P
453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
458 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
461 #undef TARGET_SCHED_ADJUST_COST
462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
464 #undef TARGET_SET_CURRENT_FUNCTION
465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
470 #undef TARGET_SCHED_REORDER
471 #define TARGET_SCHED_REORDER arm_sched_reorder
473 #undef TARGET_REGISTER_MOVE_COST
474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
476 #undef TARGET_MEMORY_MOVE_COST
477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
479 #undef TARGET_ENCODE_SECTION_INFO
480 #ifdef ARM_PE
481 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
482 #else
483 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
484 #endif
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
489 #undef TARGET_ASM_INTERNAL_LABEL
490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
492 #undef TARGET_FLOATN_MODE
493 #define TARGET_FLOATN_MODE arm_floatn_mode
495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
498 #undef TARGET_FUNCTION_VALUE
499 #define TARGET_FUNCTION_VALUE arm_function_value
501 #undef TARGET_LIBCALL_VALUE
502 #define TARGET_LIBCALL_VALUE arm_libcall_value
504 #undef TARGET_FUNCTION_VALUE_REGNO_P
505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
512 #undef TARGET_RTX_COSTS
513 #define TARGET_RTX_COSTS arm_rtx_costs
514 #undef TARGET_ADDRESS_COST
515 #define TARGET_ADDRESS_COST arm_address_cost
516 #undef TARGET_INSN_COST
517 #define TARGET_INSN_COST arm_insn_cost
519 #undef TARGET_SHIFT_TRUNCATION_MASK
520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
529 arm_autovectorize_vector_modes
531 #undef TARGET_MACHINE_DEPENDENT_REORG
532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
534 #undef TARGET_INIT_BUILTINS
535 #define TARGET_INIT_BUILTINS arm_init_builtins
536 #undef TARGET_EXPAND_BUILTIN
537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
538 #undef TARGET_BUILTIN_DECL
539 #define TARGET_BUILTIN_DECL arm_builtin_decl
541 #undef TARGET_INIT_LIBFUNCS
542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
544 #undef TARGET_PROMOTE_FUNCTION_MODE
545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
552 #undef TARGET_FUNCTION_ARG
553 #define TARGET_FUNCTION_ARG arm_function_arg
554 #undef TARGET_FUNCTION_ARG_ADVANCE
555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
556 #undef TARGET_FUNCTION_ARG_PADDING
557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
558 #undef TARGET_FUNCTION_ARG_BOUNDARY
559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
569 #undef TARGET_TRAMPOLINE_INIT
570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
574 #undef TARGET_WARN_FUNC_RETURN
575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
577 #undef TARGET_DEFAULT_SHORT_ENUMS
578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
580 #undef TARGET_ALIGN_ANON_BITFIELD
581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
583 #undef TARGET_NARROW_VOLATILE_BITFIELD
584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
586 #undef TARGET_CXX_GUARD_TYPE
587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
589 #undef TARGET_CXX_GUARD_MASK_BIT
590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
592 #undef TARGET_CXX_GET_COOKIE_SIZE
593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
595 #undef TARGET_CXX_COOKIE_HAS_SIZE
596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
604 #undef TARGET_CXX_USE_AEABI_ATEXIT
605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
609 arm_cxx_determine_class_data_visibility
611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
614 #undef TARGET_RETURN_IN_MSB
615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
617 #undef TARGET_RETURN_IN_MEMORY
618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
620 #undef TARGET_MUST_PASS_IN_STACK
621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
623 #if ARM_UNWIND_INFO
624 #undef TARGET_ASM_UNWIND_EMIT
625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
627 /* EABI unwinding tables use a different format for the typeinfo tables. */
628 #undef TARGET_ASM_TTYPE
629 #define TARGET_ASM_TTYPE arm_output_ttype
631 #undef TARGET_ARM_EABI_UNWINDER
632 #define TARGET_ARM_EABI_UNWINDER true
634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
637 #endif /* ARM_UNWIND_INFO */
639 #undef TARGET_ASM_INIT_SECTIONS
640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
642 #undef TARGET_DWARF_REGISTER_SPAN
643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
645 #undef TARGET_CANNOT_COPY_INSN_P
646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
648 #ifdef HAVE_AS_TLS
649 #undef TARGET_HAVE_TLS
650 #define TARGET_HAVE_TLS true
651 #endif
653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
656 #undef TARGET_LEGITIMATE_CONSTANT_P
657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
659 #undef TARGET_CANNOT_FORCE_CONST_MEM
660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
662 #undef TARGET_MAX_ANCHOR_OFFSET
663 #define TARGET_MAX_ANCHOR_OFFSET 4095
665 /* The minimum is set such that the total size of the block
666 for a particular anchor is -4088 + 1 + 4095 bytes, which is
667 divisible by eight, ensuring natural spacing of anchors. */
668 #undef TARGET_MIN_ANCHOR_OFFSET
669 #define TARGET_MIN_ANCHOR_OFFSET -4088
671 #undef TARGET_SCHED_ISSUE_RATE
672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
674 #undef TARGET_SCHED_VARIABLE_ISSUE
675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
679 arm_first_cycle_multipass_dfa_lookahead
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
683 arm_first_cycle_multipass_dfa_lookahead_guard
685 #undef TARGET_MANGLE_TYPE
686 #define TARGET_MANGLE_TYPE arm_mangle_type
688 #undef TARGET_INVALID_CONVERSION
689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
691 #undef TARGET_INVALID_UNARY_OP
692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
694 #undef TARGET_INVALID_BINARY_OP
695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
700 #undef TARGET_BUILD_BUILTIN_VA_LIST
701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
702 #undef TARGET_EXPAND_BUILTIN_VA_START
703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
707 #ifdef HAVE_AS_TLS
708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
710 #endif
712 #undef TARGET_LEGITIMATE_ADDRESS_P
713 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
715 #undef TARGET_PREFERRED_RELOAD_CLASS
716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
718 #undef TARGET_PROMOTED_TYPE
719 #define TARGET_PROMOTED_TYPE arm_promoted_type
721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
724 #undef TARGET_COMPUTE_FRAME_LAYOUT
725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
727 #undef TARGET_FRAME_POINTER_REQUIRED
728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
730 #undef TARGET_CAN_ELIMINATE
731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
736 #undef TARGET_CLASS_LIKELY_SPILLED_P
737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
739 #undef TARGET_VECTORIZE_BUILTINS
740 #define TARGET_VECTORIZE_BUILTINS
742 #undef TARGET_VECTOR_ALIGNMENT
743 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
745 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
746 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
747 arm_vector_alignment_reachable
749 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
750 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
751 arm_builtin_support_vector_misalignment
753 #undef TARGET_PREFERRED_RENAME_CLASS
754 #define TARGET_PREFERRED_RENAME_CLASS \
755 arm_preferred_rename_class
757 #undef TARGET_VECTORIZE_VEC_PERM_CONST
758 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
762 arm_builtin_vectorization_cost
764 #undef TARGET_CANONICALIZE_COMPARISON
765 #define TARGET_CANONICALIZE_COMPARISON \
766 arm_canonicalize_comparison
768 #undef TARGET_ASAN_SHADOW_OFFSET
769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
771 #undef MAX_INSN_PER_IT_BLOCK
772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
774 #undef TARGET_CAN_USE_DOLOOP_P
775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
783 #undef TARGET_SCHED_FUSION_PRIORITY
784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
786 #undef TARGET_ASM_FUNCTION_SECTION
787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
792 #undef TARGET_SECTION_TYPE_FLAGS
793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
798 #undef TARGET_C_EXCESS_PRECISION
799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
801 /* Although the architecture reserves bits 0 and 1, only the former is
802 used for ARM/Thumb ISA selection in v7 and earlier versions. */
803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
806 #undef TARGET_FIXED_CONDITION_CODE_REGS
807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
809 #undef TARGET_HARD_REGNO_NREGS
810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
811 #undef TARGET_HARD_REGNO_MODE_OK
812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
814 #undef TARGET_MODES_TIEABLE_P
815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
817 #undef TARGET_CAN_CHANGE_MODE_CLASS
818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
820 #undef TARGET_CONSTANT_ALIGNMENT
821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
823 #undef TARGET_INVALID_WITHIN_DOLOOP
824 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
826 #undef TARGET_MD_ASM_ADJUST
827 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
829 #undef TARGET_STACK_PROTECT_GUARD
830 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
832 #undef TARGET_VECTORIZE_GET_MASK_MODE
833 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
835 /* Obstack for minipool constant handling. */
836 static struct obstack minipool_obstack;
837 static char * minipool_startobj;
839 /* The maximum number of insns skipped which
840 will be conditionalised if possible. */
841 static int max_insns_skipped = 5;
843 /* True if we are currently building a constant table. */
844 int making_const_table;
846 /* The processor for which instructions should be scheduled. */
847 enum processor_type arm_tune = TARGET_CPU_arm_none;
849 /* The current tuning set. */
850 const struct tune_params *current_tune;
852 /* Which floating point hardware to schedule for. */
853 int arm_fpu_attr;
855 /* Used for Thumb call_via trampolines. */
856 rtx thumb_call_via_label[14];
857 static int thumb_call_reg_needed;
859 /* The bits in this mask specify which instruction scheduling options should
860 be used. */
861 unsigned int tune_flags = 0;
863 /* The highest ARM architecture version supported by the
864 target. */
865 enum base_architecture arm_base_arch = BASE_ARCH_0;
867 /* Active target architecture and tuning. */
869 struct arm_build_target arm_active_target;
871 /* The following are used in the arm.md file as equivalents to bits
872 in the above two flag variables. */
874 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
875 int arm_arch4 = 0;
877 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
878 int arm_arch4t = 0;
880 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
881 int arm_arch5t = 0;
883 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
884 int arm_arch5te = 0;
886 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
887 int arm_arch6 = 0;
889 /* Nonzero if this chip supports the ARM 6K extensions. */
890 int arm_arch6k = 0;
892 /* Nonzero if this chip supports the ARM 6KZ extensions. */
893 int arm_arch6kz = 0;
895 /* Nonzero if instructions present in ARMv6-M can be used. */
896 int arm_arch6m = 0;
898 /* Nonzero if this chip supports the ARM 7 extensions. */
899 int arm_arch7 = 0;
901 /* Nonzero if this chip supports the Large Physical Address Extension. */
902 int arm_arch_lpae = 0;
904 /* Nonzero if instructions not present in the 'M' profile can be used. */
905 int arm_arch_notm = 0;
907 /* Nonzero if instructions present in ARMv7E-M can be used. */
908 int arm_arch7em = 0;
910 /* Nonzero if instructions present in ARMv8 can be used. */
911 int arm_arch8 = 0;
913 /* Nonzero if this chip supports the ARMv8.1 extensions. */
914 int arm_arch8_1 = 0;
916 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
917 int arm_arch8_2 = 0;
919 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
920 int arm_arch8_3 = 0;
922 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
923 int arm_arch8_4 = 0;
924 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
925 extensions. */
926 int arm_arch8_1m_main = 0;
928 /* Nonzero if this chip supports the FP16 instructions extension of ARM
929 Architecture 8.2. */
930 int arm_fp16_inst = 0;
932 /* Nonzero if this chip can benefit from load scheduling. */
933 int arm_ld_sched = 0;
935 /* Nonzero if this chip is a StrongARM. */
936 int arm_tune_strongarm = 0;
938 /* Nonzero if this chip supports Intel Wireless MMX technology. */
939 int arm_arch_iwmmxt = 0;
941 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
942 int arm_arch_iwmmxt2 = 0;
944 /* Nonzero if this chip is an XScale. */
945 int arm_arch_xscale = 0;
947 /* Nonzero if tuning for XScale */
948 int arm_tune_xscale = 0;
950 /* Nonzero if we want to tune for stores that access the write-buffer.
951 This typically means an ARM6 or ARM7 with MMU or MPU. */
952 int arm_tune_wbuf = 0;
954 /* Nonzero if tuning for Cortex-A9. */
955 int arm_tune_cortex_a9 = 0;
957 /* Nonzero if we should define __THUMB_INTERWORK__ in the
958 preprocessor.
959 XXX This is a bit of a hack, it's intended to help work around
960 problems in GLD which doesn't understand that armv5t code is
961 interworking clean. */
962 int arm_cpp_interwork = 0;
964 /* Nonzero if chip supports Thumb 1. */
965 int arm_arch_thumb1;
967 /* Nonzero if chip supports Thumb 2. */
968 int arm_arch_thumb2;
970 /* Nonzero if chip supports integer division instruction. */
971 int arm_arch_arm_hwdiv;
972 int arm_arch_thumb_hwdiv;
974 /* Nonzero if chip disallows volatile memory access in IT block. */
975 int arm_arch_no_volatile_ce;
977 /* Nonzero if we shouldn't use literal pools. */
978 bool arm_disable_literal_pool = false;
980 /* The register number to be used for the PIC offset register. */
981 unsigned arm_pic_register = INVALID_REGNUM;
983 enum arm_pcs arm_pcs_default;
985 /* For an explanation of these variables, see final_prescan_insn below. */
986 int arm_ccfsm_state;
987 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
988 enum arm_cond_code arm_current_cc;
990 rtx arm_target_insn;
991 int arm_target_label;
992 /* The number of conditionally executed insns, including the current insn. */
993 int arm_condexec_count = 0;
994 /* A bitmask specifying the patterns for the IT block.
995 Zero means do not output an IT block before this insn. */
996 int arm_condexec_mask = 0;
997 /* The number of bits used in arm_condexec_mask. */
998 int arm_condexec_masklen = 0;
1000 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1001 int arm_arch_crc = 0;
1003 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1004 int arm_arch_dotprod = 0;
1006 /* Nonzero if chip supports the ARMv8-M security extensions. */
1007 int arm_arch_cmse = 0;
1009 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1010 int arm_m_profile_small_mul = 0;
1012 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1013 int arm_arch_i8mm = 0;
1015 /* Nonzero if chip supports the BFloat16 instructions. */
1016 int arm_arch_bf16 = 0;
1018 /* Nonzero if chip supports the Custom Datapath Extension. */
1019 int arm_arch_cde = 0;
1020 int arm_arch_cde_coproc = 0;
1021 const int arm_arch_cde_coproc_bits[] = {
1022 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1025 /* The condition codes of the ARM, and the inverse function. */
1026 static const char * const arm_condition_codes[] =
1028 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1029 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1032 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1033 int arm_regs_in_sequence[] =
1035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1038 #define DEF_FP_SYSREG(reg) #reg,
1039 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1040 FP_SYSREGS
1042 #undef DEF_FP_SYSREG
1044 #define ARM_LSL_NAME "lsl"
1045 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1047 #define THUMB2_WORK_REGS \
1048 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1049 | (1 << SP_REGNUM) \
1050 | (1 << PC_REGNUM) \
1051 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1052 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1053 : 0)))
1055 /* Initialization code. */
1057 struct cpu_tune
1059 enum processor_type scheduler;
1060 unsigned int tune_flags;
1061 const struct tune_params *tune;
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1067 num_slots, \
1068 l1_size, \
1069 l1_line_size \
1072 /* arm generic vectorizer costs. */
1073 static const
1074 struct cpu_vec_costs arm_default_vec_cost = {
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 1, /* vec_unalign_load_cost. */
1083 1, /* vec_unalign_store_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1090 #include "aarch-cost-tables.h"
1094 const struct cpu_cost_table cortexa9_extra_costs =
1096 /* ALU */
1098 0, /* arith. */
1099 0, /* logical. */
1100 0, /* shift. */
1101 COSTS_N_INSNS (1), /* shift_reg. */
1102 COSTS_N_INSNS (1), /* arith_shift. */
1103 COSTS_N_INSNS (2), /* arith_shift_reg. */
1104 0, /* log_shift. */
1105 COSTS_N_INSNS (1), /* log_shift_reg. */
1106 COSTS_N_INSNS (1), /* extend. */
1107 COSTS_N_INSNS (2), /* extend_arith. */
1108 COSTS_N_INSNS (1), /* bfi. */
1109 COSTS_N_INSNS (1), /* bfx. */
1110 0, /* clz. */
1111 0, /* rev. */
1112 0, /* non_exec. */
1113 true /* non_exec_costs_exec. */
1116 /* MULT SImode */
1118 COSTS_N_INSNS (3), /* simple. */
1119 COSTS_N_INSNS (3), /* flag_setting. */
1120 COSTS_N_INSNS (2), /* extend. */
1121 COSTS_N_INSNS (3), /* add. */
1122 COSTS_N_INSNS (2), /* extend_add. */
1123 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1125 /* MULT DImode */
1127 0, /* simple (N/A). */
1128 0, /* flag_setting (N/A). */
1129 COSTS_N_INSNS (4), /* extend. */
1130 0, /* add (N/A). */
1131 COSTS_N_INSNS (4), /* extend_add. */
1132 0 /* idiv (N/A). */
1135 /* LD/ST */
1137 COSTS_N_INSNS (2), /* load. */
1138 COSTS_N_INSNS (2), /* load_sign_extend. */
1139 COSTS_N_INSNS (2), /* ldrd. */
1140 COSTS_N_INSNS (2), /* ldm_1st. */
1141 1, /* ldm_regs_per_insn_1st. */
1142 2, /* ldm_regs_per_insn_subsequent. */
1143 COSTS_N_INSNS (5), /* loadf. */
1144 COSTS_N_INSNS (5), /* loadd. */
1145 COSTS_N_INSNS (1), /* load_unaligned. */
1146 COSTS_N_INSNS (2), /* store. */
1147 COSTS_N_INSNS (2), /* strd. */
1148 COSTS_N_INSNS (2), /* stm_1st. */
1149 1, /* stm_regs_per_insn_1st. */
1150 2, /* stm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* storef. */
1152 COSTS_N_INSNS (1), /* stored. */
1153 COSTS_N_INSNS (1), /* store_unaligned. */
1154 COSTS_N_INSNS (1), /* loadv. */
1155 COSTS_N_INSNS (1) /* storev. */
1158 /* FP SFmode */
1160 COSTS_N_INSNS (14), /* div. */
1161 COSTS_N_INSNS (4), /* mult. */
1162 COSTS_N_INSNS (7), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (3), /* addsub. */
1165 COSTS_N_INSNS (1), /* fpconst. */
1166 COSTS_N_INSNS (1), /* neg. */
1167 COSTS_N_INSNS (3), /* compare. */
1168 COSTS_N_INSNS (3), /* widen. */
1169 COSTS_N_INSNS (3), /* narrow. */
1170 COSTS_N_INSNS (3), /* toint. */
1171 COSTS_N_INSNS (3), /* fromint. */
1172 COSTS_N_INSNS (3) /* roundint. */
1174 /* FP DFmode */
1176 COSTS_N_INSNS (24), /* div. */
1177 COSTS_N_INSNS (5), /* mult. */
1178 COSTS_N_INSNS (8), /* mult_addsub. */
1179 COSTS_N_INSNS (30), /* fma. */
1180 COSTS_N_INSNS (3), /* addsub. */
1181 COSTS_N_INSNS (1), /* fpconst. */
1182 COSTS_N_INSNS (1), /* neg. */
1183 COSTS_N_INSNS (3), /* compare. */
1184 COSTS_N_INSNS (3), /* widen. */
1185 COSTS_N_INSNS (3), /* narrow. */
1186 COSTS_N_INSNS (3), /* toint. */
1187 COSTS_N_INSNS (3), /* fromint. */
1188 COSTS_N_INSNS (3) /* roundint. */
1191 /* Vector */
1193 COSTS_N_INSNS (1), /* alu. */
1194 COSTS_N_INSNS (4), /* mult. */
1195 COSTS_N_INSNS (1), /* movi. */
1196 COSTS_N_INSNS (2), /* dup. */
1197 COSTS_N_INSNS (2) /* extract. */
1201 const struct cpu_cost_table cortexa8_extra_costs =
1203 /* ALU */
1205 0, /* arith. */
1206 0, /* logical. */
1207 COSTS_N_INSNS (1), /* shift. */
1208 0, /* shift_reg. */
1209 COSTS_N_INSNS (1), /* arith_shift. */
1210 0, /* arith_shift_reg. */
1211 COSTS_N_INSNS (1), /* log_shift. */
1212 0, /* log_shift_reg. */
1213 0, /* extend. */
1214 0, /* extend_arith. */
1215 0, /* bfi. */
1216 0, /* bfx. */
1217 0, /* clz. */
1218 0, /* rev. */
1219 0, /* non_exec. */
1220 true /* non_exec_costs_exec. */
1223 /* MULT SImode */
1225 COSTS_N_INSNS (1), /* simple. */
1226 COSTS_N_INSNS (1), /* flag_setting. */
1227 COSTS_N_INSNS (1), /* extend. */
1228 COSTS_N_INSNS (1), /* add. */
1229 COSTS_N_INSNS (1), /* extend_add. */
1230 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1232 /* MULT DImode */
1234 0, /* simple (N/A). */
1235 0, /* flag_setting (N/A). */
1236 COSTS_N_INSNS (2), /* extend. */
1237 0, /* add (N/A). */
1238 COSTS_N_INSNS (2), /* extend_add. */
1239 0 /* idiv (N/A). */
1242 /* LD/ST */
1244 COSTS_N_INSNS (1), /* load. */
1245 COSTS_N_INSNS (1), /* load_sign_extend. */
1246 COSTS_N_INSNS (1), /* ldrd. */
1247 COSTS_N_INSNS (1), /* ldm_1st. */
1248 1, /* ldm_regs_per_insn_1st. */
1249 2, /* ldm_regs_per_insn_subsequent. */
1250 COSTS_N_INSNS (1), /* loadf. */
1251 COSTS_N_INSNS (1), /* loadd. */
1252 COSTS_N_INSNS (1), /* load_unaligned. */
1253 COSTS_N_INSNS (1), /* store. */
1254 COSTS_N_INSNS (1), /* strd. */
1255 COSTS_N_INSNS (1), /* stm_1st. */
1256 1, /* stm_regs_per_insn_1st. */
1257 2, /* stm_regs_per_insn_subsequent. */
1258 COSTS_N_INSNS (1), /* storef. */
1259 COSTS_N_INSNS (1), /* stored. */
1260 COSTS_N_INSNS (1), /* store_unaligned. */
1261 COSTS_N_INSNS (1), /* loadv. */
1262 COSTS_N_INSNS (1) /* storev. */
1265 /* FP SFmode */
1267 COSTS_N_INSNS (36), /* div. */
1268 COSTS_N_INSNS (11), /* mult. */
1269 COSTS_N_INSNS (20), /* mult_addsub. */
1270 COSTS_N_INSNS (30), /* fma. */
1271 COSTS_N_INSNS (9), /* addsub. */
1272 COSTS_N_INSNS (3), /* fpconst. */
1273 COSTS_N_INSNS (3), /* neg. */
1274 COSTS_N_INSNS (6), /* compare. */
1275 COSTS_N_INSNS (4), /* widen. */
1276 COSTS_N_INSNS (4), /* narrow. */
1277 COSTS_N_INSNS (8), /* toint. */
1278 COSTS_N_INSNS (8), /* fromint. */
1279 COSTS_N_INSNS (8) /* roundint. */
1281 /* FP DFmode */
1283 COSTS_N_INSNS (64), /* div. */
1284 COSTS_N_INSNS (16), /* mult. */
1285 COSTS_N_INSNS (25), /* mult_addsub. */
1286 COSTS_N_INSNS (30), /* fma. */
1287 COSTS_N_INSNS (9), /* addsub. */
1288 COSTS_N_INSNS (3), /* fpconst. */
1289 COSTS_N_INSNS (3), /* neg. */
1290 COSTS_N_INSNS (6), /* compare. */
1291 COSTS_N_INSNS (6), /* widen. */
1292 COSTS_N_INSNS (6), /* narrow. */
1293 COSTS_N_INSNS (8), /* toint. */
1294 COSTS_N_INSNS (8), /* fromint. */
1295 COSTS_N_INSNS (8) /* roundint. */
1298 /* Vector */
1300 COSTS_N_INSNS (1), /* alu. */
1301 COSTS_N_INSNS (4), /* mult. */
1302 COSTS_N_INSNS (1), /* movi. */
1303 COSTS_N_INSNS (2), /* dup. */
1304 COSTS_N_INSNS (2) /* extract. */
1308 const struct cpu_cost_table cortexa5_extra_costs =
1310 /* ALU */
1312 0, /* arith. */
1313 0, /* logical. */
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1326 0, /* non_exec. */
1327 true /* non_exec_costs_exec. */
1331 /* MULT SImode */
1333 0, /* simple. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1340 /* MULT DImode */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1345 0, /* add. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1347 0 /* idiv (N/A). */
1350 /* LD/ST */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (6), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (4), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1), /* store_unaligned. */
1369 COSTS_N_INSNS (1), /* loadv. */
1370 COSTS_N_INSNS (1) /* storev. */
1373 /* FP SFmode */
1375 COSTS_N_INSNS (15), /* div. */
1376 COSTS_N_INSNS (3), /* mult. */
1377 COSTS_N_INSNS (7), /* mult_addsub. */
1378 COSTS_N_INSNS (7), /* fma. */
1379 COSTS_N_INSNS (3), /* addsub. */
1380 COSTS_N_INSNS (3), /* fpconst. */
1381 COSTS_N_INSNS (3), /* neg. */
1382 COSTS_N_INSNS (3), /* compare. */
1383 COSTS_N_INSNS (3), /* widen. */
1384 COSTS_N_INSNS (3), /* narrow. */
1385 COSTS_N_INSNS (3), /* toint. */
1386 COSTS_N_INSNS (3), /* fromint. */
1387 COSTS_N_INSNS (3) /* roundint. */
1389 /* FP DFmode */
1391 COSTS_N_INSNS (30), /* div. */
1392 COSTS_N_INSNS (6), /* mult. */
1393 COSTS_N_INSNS (10), /* mult_addsub. */
1394 COSTS_N_INSNS (7), /* fma. */
1395 COSTS_N_INSNS (3), /* addsub. */
1396 COSTS_N_INSNS (3), /* fpconst. */
1397 COSTS_N_INSNS (3), /* neg. */
1398 COSTS_N_INSNS (3), /* compare. */
1399 COSTS_N_INSNS (3), /* widen. */
1400 COSTS_N_INSNS (3), /* narrow. */
1401 COSTS_N_INSNS (3), /* toint. */
1402 COSTS_N_INSNS (3), /* fromint. */
1403 COSTS_N_INSNS (3) /* roundint. */
1406 /* Vector */
1408 COSTS_N_INSNS (1), /* alu. */
1409 COSTS_N_INSNS (4), /* mult. */
1410 COSTS_N_INSNS (1), /* movi. */
1411 COSTS_N_INSNS (2), /* dup. */
1412 COSTS_N_INSNS (2) /* extract. */
1417 const struct cpu_cost_table cortexa7_extra_costs =
1419 /* ALU */
1421 0, /* arith. */
1422 0, /* logical. */
1423 COSTS_N_INSNS (1), /* shift. */
1424 COSTS_N_INSNS (1), /* shift_reg. */
1425 COSTS_N_INSNS (1), /* arith_shift. */
1426 COSTS_N_INSNS (1), /* arith_shift_reg. */
1427 COSTS_N_INSNS (1), /* log_shift. */
1428 COSTS_N_INSNS (1), /* log_shift_reg. */
1429 COSTS_N_INSNS (1), /* extend. */
1430 COSTS_N_INSNS (1), /* extend_arith. */
1431 COSTS_N_INSNS (1), /* bfi. */
1432 COSTS_N_INSNS (1), /* bfx. */
1433 COSTS_N_INSNS (1), /* clz. */
1434 COSTS_N_INSNS (1), /* rev. */
1435 0, /* non_exec. */
1436 true /* non_exec_costs_exec. */
1440 /* MULT SImode */
1442 0, /* simple. */
1443 COSTS_N_INSNS (1), /* flag_setting. */
1444 COSTS_N_INSNS (1), /* extend. */
1445 COSTS_N_INSNS (1), /* add. */
1446 COSTS_N_INSNS (1), /* extend_add. */
1447 COSTS_N_INSNS (7) /* idiv. */
1449 /* MULT DImode */
1451 0, /* simple (N/A). */
1452 0, /* flag_setting (N/A). */
1453 COSTS_N_INSNS (1), /* extend. */
1454 0, /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 0 /* idiv (N/A). */
1459 /* LD/ST */
1461 COSTS_N_INSNS (1), /* load. */
1462 COSTS_N_INSNS (1), /* load_sign_extend. */
1463 COSTS_N_INSNS (3), /* ldrd. */
1464 COSTS_N_INSNS (1), /* ldm_1st. */
1465 1, /* ldm_regs_per_insn_1st. */
1466 2, /* ldm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* loadf. */
1468 COSTS_N_INSNS (2), /* loadd. */
1469 COSTS_N_INSNS (1), /* load_unaligned. */
1470 COSTS_N_INSNS (1), /* store. */
1471 COSTS_N_INSNS (3), /* strd. */
1472 COSTS_N_INSNS (1), /* stm_1st. */
1473 1, /* stm_regs_per_insn_1st. */
1474 2, /* stm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (2), /* storef. */
1476 COSTS_N_INSNS (2), /* stored. */
1477 COSTS_N_INSNS (1), /* store_unaligned. */
1478 COSTS_N_INSNS (1), /* loadv. */
1479 COSTS_N_INSNS (1) /* storev. */
1482 /* FP SFmode */
1484 COSTS_N_INSNS (15), /* div. */
1485 COSTS_N_INSNS (3), /* mult. */
1486 COSTS_N_INSNS (7), /* mult_addsub. */
1487 COSTS_N_INSNS (7), /* fma. */
1488 COSTS_N_INSNS (3), /* addsub. */
1489 COSTS_N_INSNS (3), /* fpconst. */
1490 COSTS_N_INSNS (3), /* neg. */
1491 COSTS_N_INSNS (3), /* compare. */
1492 COSTS_N_INSNS (3), /* widen. */
1493 COSTS_N_INSNS (3), /* narrow. */
1494 COSTS_N_INSNS (3), /* toint. */
1495 COSTS_N_INSNS (3), /* fromint. */
1496 COSTS_N_INSNS (3) /* roundint. */
1498 /* FP DFmode */
1500 COSTS_N_INSNS (30), /* div. */
1501 COSTS_N_INSNS (6), /* mult. */
1502 COSTS_N_INSNS (10), /* mult_addsub. */
1503 COSTS_N_INSNS (7), /* fma. */
1504 COSTS_N_INSNS (3), /* addsub. */
1505 COSTS_N_INSNS (3), /* fpconst. */
1506 COSTS_N_INSNS (3), /* neg. */
1507 COSTS_N_INSNS (3), /* compare. */
1508 COSTS_N_INSNS (3), /* widen. */
1509 COSTS_N_INSNS (3), /* narrow. */
1510 COSTS_N_INSNS (3), /* toint. */
1511 COSTS_N_INSNS (3), /* fromint. */
1512 COSTS_N_INSNS (3) /* roundint. */
1515 /* Vector */
1517 COSTS_N_INSNS (1), /* alu. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (1), /* movi. */
1520 COSTS_N_INSNS (2), /* dup. */
1521 COSTS_N_INSNS (2) /* extract. */
1525 const struct cpu_cost_table cortexa12_extra_costs =
1527 /* ALU */
1529 0, /* arith. */
1530 0, /* logical. */
1531 0, /* shift. */
1532 COSTS_N_INSNS (1), /* shift_reg. */
1533 COSTS_N_INSNS (1), /* arith_shift. */
1534 COSTS_N_INSNS (1), /* arith_shift_reg. */
1535 COSTS_N_INSNS (1), /* log_shift. */
1536 COSTS_N_INSNS (1), /* log_shift_reg. */
1537 0, /* extend. */
1538 COSTS_N_INSNS (1), /* extend_arith. */
1539 0, /* bfi. */
1540 COSTS_N_INSNS (1), /* bfx. */
1541 COSTS_N_INSNS (1), /* clz. */
1542 COSTS_N_INSNS (1), /* rev. */
1543 0, /* non_exec. */
1544 true /* non_exec_costs_exec. */
1546 /* MULT SImode */
1549 COSTS_N_INSNS (2), /* simple. */
1550 COSTS_N_INSNS (3), /* flag_setting. */
1551 COSTS_N_INSNS (2), /* extend. */
1552 COSTS_N_INSNS (3), /* add. */
1553 COSTS_N_INSNS (2), /* extend_add. */
1554 COSTS_N_INSNS (18) /* idiv. */
1556 /* MULT DImode */
1558 0, /* simple (N/A). */
1559 0, /* flag_setting (N/A). */
1560 COSTS_N_INSNS (3), /* extend. */
1561 0, /* add (N/A). */
1562 COSTS_N_INSNS (3), /* extend_add. */
1563 0 /* idiv (N/A). */
1566 /* LD/ST */
1568 COSTS_N_INSNS (3), /* load. */
1569 COSTS_N_INSNS (3), /* load_sign_extend. */
1570 COSTS_N_INSNS (3), /* ldrd. */
1571 COSTS_N_INSNS (3), /* ldm_1st. */
1572 1, /* ldm_regs_per_insn_1st. */
1573 2, /* ldm_regs_per_insn_subsequent. */
1574 COSTS_N_INSNS (3), /* loadf. */
1575 COSTS_N_INSNS (3), /* loadd. */
1576 0, /* load_unaligned. */
1577 0, /* store. */
1578 0, /* strd. */
1579 0, /* stm_1st. */
1580 1, /* stm_regs_per_insn_1st. */
1581 2, /* stm_regs_per_insn_subsequent. */
1582 COSTS_N_INSNS (2), /* storef. */
1583 COSTS_N_INSNS (2), /* stored. */
1584 0, /* store_unaligned. */
1585 COSTS_N_INSNS (1), /* loadv. */
1586 COSTS_N_INSNS (1) /* storev. */
1589 /* FP SFmode */
1591 COSTS_N_INSNS (17), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1605 /* FP DFmode */
1607 COSTS_N_INSNS (31), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (2), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1622 /* Vector */
1624 COSTS_N_INSNS (1), /* alu. */
1625 COSTS_N_INSNS (4), /* mult. */
1626 COSTS_N_INSNS (1), /* movi. */
1627 COSTS_N_INSNS (2), /* dup. */
1628 COSTS_N_INSNS (2) /* extract. */
1632 const struct cpu_cost_table cortexa15_extra_costs =
1634 /* ALU */
1636 0, /* arith. */
1637 0, /* logical. */
1638 0, /* shift. */
1639 0, /* shift_reg. */
1640 COSTS_N_INSNS (1), /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 COSTS_N_INSNS (1), /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1644 0, /* extend. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 COSTS_N_INSNS (1), /* bfi. */
1647 0, /* bfx. */
1648 0, /* clz. */
1649 0, /* rev. */
1650 0, /* non_exec. */
1651 true /* non_exec_costs_exec. */
1653 /* MULT SImode */
1656 COSTS_N_INSNS (2), /* simple. */
1657 COSTS_N_INSNS (3), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (2), /* add. */
1660 COSTS_N_INSNS (2), /* extend_add. */
1661 COSTS_N_INSNS (18) /* idiv. */
1663 /* MULT DImode */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (3), /* extend. */
1668 0, /* add (N/A). */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 0 /* idiv (N/A). */
1673 /* LD/ST */
1675 COSTS_N_INSNS (3), /* load. */
1676 COSTS_N_INSNS (3), /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (4), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 2, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (4), /* loadf. */
1682 COSTS_N_INSNS (4), /* loadd. */
1683 0, /* load_unaligned. */
1684 0, /* store. */
1685 0, /* strd. */
1686 COSTS_N_INSNS (1), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 2, /* stm_regs_per_insn_subsequent. */
1689 0, /* storef. */
1690 0, /* stored. */
1691 0, /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1696 /* FP SFmode */
1698 COSTS_N_INSNS (17), /* div. */
1699 COSTS_N_INSNS (4), /* mult. */
1700 COSTS_N_INSNS (8), /* mult_addsub. */
1701 COSTS_N_INSNS (8), /* fma. */
1702 COSTS_N_INSNS (4), /* addsub. */
1703 COSTS_N_INSNS (2), /* fpconst. */
1704 COSTS_N_INSNS (2), /* neg. */
1705 COSTS_N_INSNS (5), /* compare. */
1706 COSTS_N_INSNS (4), /* widen. */
1707 COSTS_N_INSNS (4), /* narrow. */
1708 COSTS_N_INSNS (4), /* toint. */
1709 COSTS_N_INSNS (4), /* fromint. */
1710 COSTS_N_INSNS (4) /* roundint. */
1712 /* FP DFmode */
1714 COSTS_N_INSNS (31), /* div. */
1715 COSTS_N_INSNS (4), /* mult. */
1716 COSTS_N_INSNS (8), /* mult_addsub. */
1717 COSTS_N_INSNS (8), /* fma. */
1718 COSTS_N_INSNS (4), /* addsub. */
1719 COSTS_N_INSNS (2), /* fpconst. */
1720 COSTS_N_INSNS (2), /* neg. */
1721 COSTS_N_INSNS (2), /* compare. */
1722 COSTS_N_INSNS (4), /* widen. */
1723 COSTS_N_INSNS (4), /* narrow. */
1724 COSTS_N_INSNS (4), /* toint. */
1725 COSTS_N_INSNS (4), /* fromint. */
1726 COSTS_N_INSNS (4) /* roundint. */
1729 /* Vector */
1731 COSTS_N_INSNS (1), /* alu. */
1732 COSTS_N_INSNS (4), /* mult. */
1733 COSTS_N_INSNS (1), /* movi. */
1734 COSTS_N_INSNS (2), /* dup. */
1735 COSTS_N_INSNS (2) /* extract. */
1739 const struct cpu_cost_table v7m_extra_costs =
1741 /* ALU */
1743 0, /* arith. */
1744 0, /* logical. */
1745 0, /* shift. */
1746 0, /* shift_reg. */
1747 0, /* arith_shift. */
1748 COSTS_N_INSNS (1), /* arith_shift_reg. */
1749 0, /* log_shift. */
1750 COSTS_N_INSNS (1), /* log_shift_reg. */
1751 0, /* extend. */
1752 COSTS_N_INSNS (1), /* extend_arith. */
1753 0, /* bfi. */
1754 0, /* bfx. */
1755 0, /* clz. */
1756 0, /* rev. */
1757 COSTS_N_INSNS (1), /* non_exec. */
1758 false /* non_exec_costs_exec. */
1761 /* MULT SImode */
1763 COSTS_N_INSNS (1), /* simple. */
1764 COSTS_N_INSNS (1), /* flag_setting. */
1765 COSTS_N_INSNS (2), /* extend. */
1766 COSTS_N_INSNS (1), /* add. */
1767 COSTS_N_INSNS (3), /* extend_add. */
1768 COSTS_N_INSNS (8) /* idiv. */
1770 /* MULT DImode */
1772 0, /* simple (N/A). */
1773 0, /* flag_setting (N/A). */
1774 COSTS_N_INSNS (2), /* extend. */
1775 0, /* add (N/A). */
1776 COSTS_N_INSNS (3), /* extend_add. */
1777 0 /* idiv (N/A). */
1780 /* LD/ST */
1782 COSTS_N_INSNS (2), /* load. */
1783 0, /* load_sign_extend. */
1784 COSTS_N_INSNS (3), /* ldrd. */
1785 COSTS_N_INSNS (2), /* ldm_1st. */
1786 1, /* ldm_regs_per_insn_1st. */
1787 1, /* ldm_regs_per_insn_subsequent. */
1788 COSTS_N_INSNS (2), /* loadf. */
1789 COSTS_N_INSNS (3), /* loadd. */
1790 COSTS_N_INSNS (1), /* load_unaligned. */
1791 COSTS_N_INSNS (2), /* store. */
1792 COSTS_N_INSNS (3), /* strd. */
1793 COSTS_N_INSNS (2), /* stm_1st. */
1794 1, /* stm_regs_per_insn_1st. */
1795 1, /* stm_regs_per_insn_subsequent. */
1796 COSTS_N_INSNS (2), /* storef. */
1797 COSTS_N_INSNS (3), /* stored. */
1798 COSTS_N_INSNS (1), /* store_unaligned. */
1799 COSTS_N_INSNS (1), /* loadv. */
1800 COSTS_N_INSNS (1) /* storev. */
1803 /* FP SFmode */
1805 COSTS_N_INSNS (7), /* div. */
1806 COSTS_N_INSNS (2), /* mult. */
1807 COSTS_N_INSNS (5), /* mult_addsub. */
1808 COSTS_N_INSNS (3), /* fma. */
1809 COSTS_N_INSNS (1), /* addsub. */
1810 0, /* fpconst. */
1811 0, /* neg. */
1812 0, /* compare. */
1813 0, /* widen. */
1814 0, /* narrow. */
1815 0, /* toint. */
1816 0, /* fromint. */
1817 0 /* roundint. */
1819 /* FP DFmode */
1821 COSTS_N_INSNS (15), /* div. */
1822 COSTS_N_INSNS (5), /* mult. */
1823 COSTS_N_INSNS (7), /* mult_addsub. */
1824 COSTS_N_INSNS (7), /* fma. */
1825 COSTS_N_INSNS (3), /* addsub. */
1826 0, /* fpconst. */
1827 0, /* neg. */
1828 0, /* compare. */
1829 0, /* widen. */
1830 0, /* narrow. */
1831 0, /* toint. */
1832 0, /* fromint. */
1833 0 /* roundint. */
1836 /* Vector */
1838 COSTS_N_INSNS (1), /* alu. */
1839 COSTS_N_INSNS (4), /* mult. */
1840 COSTS_N_INSNS (1), /* movi. */
1841 COSTS_N_INSNS (2), /* dup. */
1842 COSTS_N_INSNS (2) /* extract. */
1846 const struct addr_mode_cost_table generic_addr_mode_costs =
1848 /* int. */
1850 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1851 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1852 COSTS_N_INSNS (0) /* AMO_WB. */
1854 /* float. */
1856 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1857 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1858 COSTS_N_INSNS (0) /* AMO_WB. */
1860 /* vector. */
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1868 const struct tune_params arm_slowmul_tune =
1870 &generic_extra_costs, /* Insn extra costs. */
1871 &generic_addr_mode_costs, /* Addressing mode costs. */
1872 NULL, /* Sched adj cost. */
1873 arm_default_branch_cost,
1874 &arm_default_vec_cost,
1875 3, /* Constant limit. */
1876 5, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL,
1880 tune_params::PREF_CONST_POOL_TRUE,
1881 tune_params::PREF_LDRD_FALSE,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE,
1886 tune_params::FUSE_NOTHING,
1887 tune_params::SCHED_AUTOPREF_OFF
1890 const struct tune_params arm_fastmul_tune =
1892 &generic_extra_costs, /* Insn extra costs. */
1893 &generic_addr_mode_costs, /* Addressing mode costs. */
1894 NULL, /* Sched adj cost. */
1895 arm_default_branch_cost,
1896 &arm_default_vec_cost,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL,
1902 tune_params::PREF_CONST_POOL_TRUE,
1903 tune_params::PREF_LDRD_FALSE,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE,
1908 tune_params::FUSE_NOTHING,
1909 tune_params::SCHED_AUTOPREF_OFF
1912 /* StrongARM has early execution of branches, so a sequence that is worth
1913 skipping is shorter. Set max_insns_skipped to a lower value. */
1915 const struct tune_params arm_strongarm_tune =
1917 &generic_extra_costs, /* Insn extra costs. */
1918 &generic_addr_mode_costs, /* Addressing mode costs. */
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 3, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 1, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_TRUE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_STRINGOPS_FALSE,
1933 tune_params::FUSE_NOTHING,
1934 tune_params::SCHED_AUTOPREF_OFF
1937 const struct tune_params arm_xscale_tune =
1939 &generic_extra_costs, /* Insn extra costs. */
1940 &generic_addr_mode_costs, /* Addressing mode costs. */
1941 xscale_sched_adjust_cost,
1942 arm_default_branch_cost,
1943 &arm_default_vec_cost,
1944 2, /* Constant limit. */
1945 3, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 1, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 tune_params::PREF_CONST_POOL_TRUE,
1950 tune_params::PREF_LDRD_FALSE,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER,
1954 tune_params::PREF_NEON_STRINGOPS_FALSE,
1955 tune_params::FUSE_NOTHING,
1956 tune_params::SCHED_AUTOPREF_OFF
1959 const struct tune_params arm_9e_tune =
1961 &generic_extra_costs, /* Insn extra costs. */
1962 &generic_addr_mode_costs, /* Addressing mode costs. */
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 1, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_TRUE,
1972 tune_params::PREF_LDRD_FALSE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER,
1976 tune_params::PREF_NEON_STRINGOPS_FALSE,
1977 tune_params::FUSE_NOTHING,
1978 tune_params::SCHED_AUTOPREF_OFF
1981 const struct tune_params arm_marvell_pj4_tune =
1983 &generic_extra_costs, /* Insn extra costs. */
1984 &generic_addr_mode_costs, /* Addressing mode costs. */
1985 NULL, /* Sched adj cost. */
1986 arm_default_branch_cost,
1987 &arm_default_vec_cost,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 2, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 tune_params::PREF_CONST_POOL_TRUE,
1994 tune_params::PREF_LDRD_FALSE,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER,
1998 tune_params::PREF_NEON_STRINGOPS_FALSE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_v6t2_tune =
2005 &generic_extra_costs, /* Insn extra costs. */
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 1, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_STRINGOPS_FALSE,
2021 tune_params::FUSE_NOTHING,
2022 tune_params::SCHED_AUTOPREF_OFF
2026 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2027 const struct tune_params arm_cortex_tune =
2029 &generic_extra_costs,
2030 &generic_addr_mode_costs, /* Addressing mode costs. */
2031 NULL, /* Sched adj cost. */
2032 arm_default_branch_cost,
2033 &arm_default_vec_cost,
2034 1, /* Constant limit. */
2035 5, /* Max cond insns. */
2036 8, /* Memset max inline. */
2037 2, /* Issue rate. */
2038 ARM_PREFETCH_NOT_BENEFICIAL,
2039 tune_params::PREF_CONST_POOL_FALSE,
2040 tune_params::PREF_LDRD_FALSE,
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2043 tune_params::DISPARAGE_FLAGS_NEITHER,
2044 tune_params::PREF_NEON_STRINGOPS_FALSE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_OFF
2049 const struct tune_params arm_cortex_a8_tune =
2051 &cortexa8_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 2, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_cortex_a7_tune =
2073 &cortexa7_extra_costs,
2074 &generic_addr_mode_costs, /* Addressing mode costs. */
2075 NULL, /* Sched adj cost. */
2076 arm_default_branch_cost,
2077 &arm_default_vec_cost,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL,
2083 tune_params::PREF_CONST_POOL_FALSE,
2084 tune_params::PREF_LDRD_FALSE,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_NEITHER,
2088 tune_params::PREF_NEON_STRINGOPS_TRUE,
2089 tune_params::FUSE_NOTHING,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_cortex_a15_tune =
2095 &cortexa15_extra_costs,
2096 &generic_addr_mode_costs, /* Addressing mode costs. */
2097 NULL, /* Sched adj cost. */
2098 arm_default_branch_cost,
2099 &arm_default_vec_cost,
2100 1, /* Constant limit. */
2101 2, /* Max cond insns. */
2102 8, /* Memset max inline. */
2103 3, /* Issue rate. */
2104 ARM_PREFETCH_NOT_BENEFICIAL,
2105 tune_params::PREF_CONST_POOL_FALSE,
2106 tune_params::PREF_LDRD_TRUE,
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2108 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2109 tune_params::DISPARAGE_FLAGS_ALL,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE,
2111 tune_params::FUSE_NOTHING,
2112 tune_params::SCHED_AUTOPREF_FULL
2115 const struct tune_params arm_cortex_a35_tune =
2117 &cortexa53_extra_costs,
2118 &generic_addr_mode_costs, /* Addressing mode costs. */
2119 NULL, /* Sched adj cost. */
2120 arm_default_branch_cost,
2121 &arm_default_vec_cost,
2122 1, /* Constant limit. */
2123 5, /* Max cond insns. */
2124 8, /* Memset max inline. */
2125 1, /* Issue rate. */
2126 ARM_PREFETCH_NOT_BENEFICIAL,
2127 tune_params::PREF_CONST_POOL_FALSE,
2128 tune_params::PREF_LDRD_FALSE,
2129 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2131 tune_params::DISPARAGE_FLAGS_NEITHER,
2132 tune_params::PREF_NEON_STRINGOPS_TRUE,
2133 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2134 tune_params::SCHED_AUTOPREF_OFF
2137 const struct tune_params arm_cortex_a53_tune =
2139 &cortexa53_extra_costs,
2140 &generic_addr_mode_costs, /* Addressing mode costs. */
2141 NULL, /* Sched adj cost. */
2142 arm_default_branch_cost,
2143 &arm_default_vec_cost,
2144 1, /* Constant limit. */
2145 5, /* Max cond insns. */
2146 8, /* Memset max inline. */
2147 2, /* Issue rate. */
2148 ARM_PREFETCH_NOT_BENEFICIAL,
2149 tune_params::PREF_CONST_POOL_FALSE,
2150 tune_params::PREF_LDRD_FALSE,
2151 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2153 tune_params::DISPARAGE_FLAGS_NEITHER,
2154 tune_params::PREF_NEON_STRINGOPS_TRUE,
2155 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2156 tune_params::SCHED_AUTOPREF_OFF
2159 const struct tune_params arm_cortex_a57_tune =
2161 &cortexa57_extra_costs,
2162 &generic_addr_mode_costs, /* addressing mode costs */
2163 NULL, /* Sched adj cost. */
2164 arm_default_branch_cost,
2165 &arm_default_vec_cost,
2166 1, /* Constant limit. */
2167 2, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 3, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL,
2171 tune_params::PREF_CONST_POOL_FALSE,
2172 tune_params::PREF_LDRD_TRUE,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_ALL,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE,
2177 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2178 tune_params::SCHED_AUTOPREF_FULL
2181 const struct tune_params arm_exynosm1_tune =
2183 &exynosm1_extra_costs,
2184 &generic_addr_mode_costs, /* Addressing mode costs. */
2185 NULL, /* Sched adj cost. */
2186 arm_default_branch_cost,
2187 &arm_default_vec_cost,
2188 1, /* Constant limit. */
2189 2, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 3, /* Issue rate. */
2192 ARM_PREFETCH_NOT_BENEFICIAL,
2193 tune_params::PREF_CONST_POOL_FALSE,
2194 tune_params::PREF_LDRD_TRUE,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_ALL,
2198 tune_params::PREF_NEON_STRINGOPS_TRUE,
2199 tune_params::FUSE_NOTHING,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 const struct tune_params arm_xgene1_tune =
2205 &xgene1_extra_costs,
2206 &generic_addr_mode_costs, /* Addressing mode costs. */
2207 NULL, /* Sched adj cost. */
2208 arm_default_branch_cost,
2209 &arm_default_vec_cost,
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 32, /* Memset max inline. */
2213 4, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL,
2215 tune_params::PREF_CONST_POOL_FALSE,
2216 tune_params::PREF_LDRD_TRUE,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL,
2220 tune_params::PREF_NEON_STRINGOPS_FALSE,
2221 tune_params::FUSE_NOTHING,
2222 tune_params::SCHED_AUTOPREF_OFF
2225 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2226 less appealing. Set max_insns_skipped to a low value. */
2228 const struct tune_params arm_cortex_a5_tune =
2230 &cortexa5_extra_costs,
2231 &generic_addr_mode_costs, /* Addressing mode costs. */
2232 NULL, /* Sched adj cost. */
2233 arm_cortex_a5_branch_cost,
2234 &arm_default_vec_cost,
2235 1, /* Constant limit. */
2236 1, /* Max cond insns. */
2237 8, /* Memset max inline. */
2238 2, /* Issue rate. */
2239 ARM_PREFETCH_NOT_BENEFICIAL,
2240 tune_params::PREF_CONST_POOL_FALSE,
2241 tune_params::PREF_LDRD_FALSE,
2242 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2243 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2244 tune_params::DISPARAGE_FLAGS_NEITHER,
2245 tune_params::PREF_NEON_STRINGOPS_TRUE,
2246 tune_params::FUSE_NOTHING,
2247 tune_params::SCHED_AUTOPREF_OFF
2250 const struct tune_params arm_cortex_a9_tune =
2252 &cortexa9_extra_costs,
2253 &generic_addr_mode_costs, /* Addressing mode costs. */
2254 cortex_a9_sched_adjust_cost,
2255 arm_default_branch_cost,
2256 &arm_default_vec_cost,
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 2, /* Issue rate. */
2261 ARM_PREFETCH_BENEFICIAL(4,32,32),
2262 tune_params::PREF_CONST_POOL_FALSE,
2263 tune_params::PREF_LDRD_FALSE,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER,
2267 tune_params::PREF_NEON_STRINGOPS_FALSE,
2268 tune_params::FUSE_NOTHING,
2269 tune_params::SCHED_AUTOPREF_OFF
2272 const struct tune_params arm_cortex_a12_tune =
2274 &cortexa12_extra_costs,
2275 &generic_addr_mode_costs, /* Addressing mode costs. */
2276 NULL, /* Sched adj cost. */
2277 arm_default_branch_cost,
2278 &arm_default_vec_cost, /* Vectorizer costs. */
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_FALSE,
2285 tune_params::PREF_LDRD_TRUE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_ALL,
2289 tune_params::PREF_NEON_STRINGOPS_TRUE,
2290 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2291 tune_params::SCHED_AUTOPREF_OFF
2294 const struct tune_params arm_cortex_a73_tune =
2296 &cortexa57_extra_costs,
2297 &generic_addr_mode_costs, /* Addressing mode costs. */
2298 NULL, /* Sched adj cost. */
2299 arm_default_branch_cost,
2300 &arm_default_vec_cost, /* Vectorizer costs. */
2301 1, /* Constant limit. */
2302 2, /* Max cond insns. */
2303 8, /* Memset max inline. */
2304 2, /* Issue rate. */
2305 ARM_PREFETCH_NOT_BENEFICIAL,
2306 tune_params::PREF_CONST_POOL_FALSE,
2307 tune_params::PREF_LDRD_TRUE,
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2309 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2310 tune_params::DISPARAGE_FLAGS_ALL,
2311 tune_params::PREF_NEON_STRINGOPS_TRUE,
2312 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2313 tune_params::SCHED_AUTOPREF_FULL
2316 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2317 cycle to execute each. An LDR from the constant pool also takes two cycles
2318 to execute, but mildly increases pipelining opportunity (consecutive
2319 loads/stores can be pipelined together, saving one cycle), and may also
2320 improve icache utilisation. Hence we prefer the constant pool for such
2321 processors. */
2323 const struct tune_params arm_v7m_tune =
2325 &v7m_extra_costs,
2326 &generic_addr_mode_costs, /* Addressing mode costs. */
2327 NULL, /* Sched adj cost. */
2328 arm_cortex_m_branch_cost,
2329 &arm_default_vec_cost,
2330 1, /* Constant limit. */
2331 2, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL,
2335 tune_params::PREF_CONST_POOL_TRUE,
2336 tune_params::PREF_LDRD_FALSE,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER,
2340 tune_params::PREF_NEON_STRINGOPS_FALSE,
2341 tune_params::FUSE_NOTHING,
2342 tune_params::SCHED_AUTOPREF_OFF
2345 /* Cortex-M7 tuning. */
2347 const struct tune_params arm_cortex_m7_tune =
2349 &v7m_extra_costs,
2350 &generic_addr_mode_costs, /* Addressing mode costs. */
2351 NULL, /* Sched adj cost. */
2352 arm_cortex_m7_branch_cost,
2353 &arm_default_vec_cost,
2354 0, /* Constant limit. */
2355 1, /* Max cond insns. */
2356 8, /* Memset max inline. */
2357 2, /* Issue rate. */
2358 ARM_PREFETCH_NOT_BENEFICIAL,
2359 tune_params::PREF_CONST_POOL_TRUE,
2360 tune_params::PREF_LDRD_FALSE,
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2362 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2363 tune_params::DISPARAGE_FLAGS_NEITHER,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE,
2365 tune_params::FUSE_NOTHING,
2366 tune_params::SCHED_AUTOPREF_OFF
2369 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2370 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2371 cortex-m23. */
2372 const struct tune_params arm_v6m_tune =
2374 &generic_extra_costs, /* Insn extra costs. */
2375 &generic_addr_mode_costs, /* Addressing mode costs. */
2376 NULL, /* Sched adj cost. */
2377 arm_default_branch_cost,
2378 &arm_default_vec_cost, /* Vectorizer costs. */
2379 1, /* Constant limit. */
2380 5, /* Max cond insns. */
2381 8, /* Memset max inline. */
2382 1, /* Issue rate. */
2383 ARM_PREFETCH_NOT_BENEFICIAL,
2384 tune_params::PREF_CONST_POOL_FALSE,
2385 tune_params::PREF_LDRD_FALSE,
2386 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2387 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2388 tune_params::DISPARAGE_FLAGS_NEITHER,
2389 tune_params::PREF_NEON_STRINGOPS_FALSE,
2390 tune_params::FUSE_NOTHING,
2391 tune_params::SCHED_AUTOPREF_OFF
2394 const struct tune_params arm_fa726te_tune =
2396 &generic_extra_costs, /* Insn extra costs. */
2397 &generic_addr_mode_costs, /* Addressing mode costs. */
2398 fa726te_sched_adjust_cost,
2399 arm_default_branch_cost,
2400 &arm_default_vec_cost,
2401 1, /* Constant limit. */
2402 5, /* Max cond insns. */
2403 8, /* Memset max inline. */
2404 2, /* Issue rate. */
2405 ARM_PREFETCH_NOT_BENEFICIAL,
2406 tune_params::PREF_CONST_POOL_TRUE,
2407 tune_params::PREF_LDRD_FALSE,
2408 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2409 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2410 tune_params::DISPARAGE_FLAGS_NEITHER,
2411 tune_params::PREF_NEON_STRINGOPS_FALSE,
2412 tune_params::FUSE_NOTHING,
2413 tune_params::SCHED_AUTOPREF_OFF
2416 /* Auto-generated CPU, FPU and architecture tables. */
2417 #include "arm-cpu-data.h"
2419 /* The name of the preprocessor macro to define for this architecture. PROFILE
2420 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2421 is thus chosen to be big enough to hold the longest architecture name. */
2423 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2425 /* Supported TLS relocations. */
2427 enum tls_reloc {
2428 TLS_GD32,
2429 TLS_GD32_FDPIC,
2430 TLS_LDM32,
2431 TLS_LDM32_FDPIC,
2432 TLS_LDO32,
2433 TLS_IE32,
2434 TLS_IE32_FDPIC,
2435 TLS_LE32,
2436 TLS_DESCSEQ /* GNU scheme */
2439 /* The maximum number of insns to be used when loading a constant. */
2440 inline static int
2441 arm_constant_limit (bool size_p)
2443 return size_p ? 1 : current_tune->constant_limit;
2446 /* Emit an insn that's a simple single-set. Both the operands must be known
2447 to be valid. */
2448 inline static rtx_insn *
2449 emit_set_insn (rtx x, rtx y)
2451 return emit_insn (gen_rtx_SET (x, y));
2454 /* Return the number of bits set in VALUE. */
2455 static unsigned
2456 bit_count (unsigned long value)
2458 unsigned long count = 0;
2460 while (value)
2462 count++;
2463 value &= value - 1; /* Clear the least-significant set bit. */
2466 return count;
2469 /* Return the number of bits set in BMAP. */
2470 static unsigned
2471 bitmap_popcount (const sbitmap bmap)
2473 unsigned int count = 0;
2474 unsigned int n = 0;
2475 sbitmap_iterator sbi;
2477 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2478 count++;
2479 return count;
2482 typedef struct
2484 machine_mode mode;
2485 const char *name;
2486 } arm_fixed_mode_set;
2488 /* A small helper for setting fixed-point library libfuncs. */
2490 static void
2491 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2492 const char *funcname, const char *modename,
2493 int num_suffix)
2495 char buffer[50];
2497 if (num_suffix == 0)
2498 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2499 else
2500 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2502 set_optab_libfunc (optable, mode, buffer);
2505 static void
2506 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2507 machine_mode from, const char *funcname,
2508 const char *toname, const char *fromname)
2510 char buffer[50];
2511 const char *maybe_suffix_2 = "";
2513 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2514 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2515 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2516 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2517 maybe_suffix_2 = "2";
2519 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2520 maybe_suffix_2);
2522 set_conv_libfunc (optable, to, from, buffer);
2525 static GTY(()) rtx speculation_barrier_libfunc;
2527 /* Record that we have no arithmetic or comparison libfuncs for
2528 machine mode MODE. */
2530 static void
2531 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2533 /* Arithmetic. */
2534 set_optab_libfunc (add_optab, mode, NULL);
2535 set_optab_libfunc (sdiv_optab, mode, NULL);
2536 set_optab_libfunc (smul_optab, mode, NULL);
2537 set_optab_libfunc (neg_optab, mode, NULL);
2538 set_optab_libfunc (sub_optab, mode, NULL);
2540 /* Comparisons. */
2541 set_optab_libfunc (eq_optab, mode, NULL);
2542 set_optab_libfunc (ne_optab, mode, NULL);
2543 set_optab_libfunc (lt_optab, mode, NULL);
2544 set_optab_libfunc (le_optab, mode, NULL);
2545 set_optab_libfunc (ge_optab, mode, NULL);
2546 set_optab_libfunc (gt_optab, mode, NULL);
2547 set_optab_libfunc (unord_optab, mode, NULL);
2550 /* Set up library functions unique to ARM. */
2551 static void
2552 arm_init_libfuncs (void)
2554 machine_mode mode_iter;
2556 /* For Linux, we have access to kernel support for atomic operations. */
2557 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2558 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2560 /* There are no special library functions unless we are using the
2561 ARM BPABI. */
2562 if (!TARGET_BPABI)
2563 return;
2565 /* The functions below are described in Section 4 of the "Run-Time
2566 ABI for the ARM architecture", Version 1.0. */
2568 /* Double-precision floating-point arithmetic. Table 2. */
2569 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2570 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2571 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2572 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2573 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2575 /* Double-precision comparisons. Table 3. */
2576 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2577 set_optab_libfunc (ne_optab, DFmode, NULL);
2578 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2579 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2580 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2581 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2582 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2584 /* Single-precision floating-point arithmetic. Table 4. */
2585 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2586 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2587 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2588 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2589 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2591 /* Single-precision comparisons. Table 5. */
2592 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2593 set_optab_libfunc (ne_optab, SFmode, NULL);
2594 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2595 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2596 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2597 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2598 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2600 /* Floating-point to integer conversions. Table 6. */
2601 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2602 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2603 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2604 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2605 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2606 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2607 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2608 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2610 /* Conversions between floating types. Table 7. */
2611 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2612 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2614 /* Integer to floating-point conversions. Table 8. */
2615 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2616 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2617 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2618 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2619 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2620 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2621 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2622 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2624 /* Long long. Table 9. */
2625 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2626 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2627 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2628 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2629 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2630 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2631 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2632 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2634 /* Integer (32/32->32) division. \S 4.3.1. */
2635 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2636 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2638 /* The divmod functions are designed so that they can be used for
2639 plain division, even though they return both the quotient and the
2640 remainder. The quotient is returned in the usual location (i.e.,
2641 r0 for SImode, {r0, r1} for DImode), just as would be expected
2642 for an ordinary division routine. Because the AAPCS calling
2643 conventions specify that all of { r0, r1, r2, r3 } are
2644 callee-saved registers, there is no need to tell the compiler
2645 explicitly that those registers are clobbered by these
2646 routines. */
2647 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2648 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2650 /* For SImode division the ABI provides div-without-mod routines,
2651 which are faster. */
2652 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2653 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2655 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2656 divmod libcalls instead. */
2657 set_optab_libfunc (smod_optab, DImode, NULL);
2658 set_optab_libfunc (umod_optab, DImode, NULL);
2659 set_optab_libfunc (smod_optab, SImode, NULL);
2660 set_optab_libfunc (umod_optab, SImode, NULL);
2662 /* Half-precision float operations. The compiler handles all operations
2663 with NULL libfuncs by converting the SFmode. */
2664 switch (arm_fp16_format)
2666 case ARM_FP16_FORMAT_IEEE:
2667 case ARM_FP16_FORMAT_ALTERNATIVE:
2669 /* Conversions. */
2670 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2671 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2672 ? "__gnu_f2h_ieee"
2673 : "__gnu_f2h_alternative"));
2674 set_conv_libfunc (sext_optab, SFmode, HFmode,
2675 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2676 ? "__gnu_h2f_ieee"
2677 : "__gnu_h2f_alternative"));
2679 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2680 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2681 ? "__gnu_d2h_ieee"
2682 : "__gnu_d2h_alternative"));
2684 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2685 break;
2687 default:
2688 break;
2691 /* For all possible libcalls in BFmode, record NULL. */
2692 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2694 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2695 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2696 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2697 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2699 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2701 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2703 const arm_fixed_mode_set fixed_arith_modes[] =
2705 { E_QQmode, "qq" },
2706 { E_UQQmode, "uqq" },
2707 { E_HQmode, "hq" },
2708 { E_UHQmode, "uhq" },
2709 { E_SQmode, "sq" },
2710 { E_USQmode, "usq" },
2711 { E_DQmode, "dq" },
2712 { E_UDQmode, "udq" },
2713 { E_TQmode, "tq" },
2714 { E_UTQmode, "utq" },
2715 { E_HAmode, "ha" },
2716 { E_UHAmode, "uha" },
2717 { E_SAmode, "sa" },
2718 { E_USAmode, "usa" },
2719 { E_DAmode, "da" },
2720 { E_UDAmode, "uda" },
2721 { E_TAmode, "ta" },
2722 { E_UTAmode, "uta" }
2724 const arm_fixed_mode_set fixed_conv_modes[] =
2726 { E_QQmode, "qq" },
2727 { E_UQQmode, "uqq" },
2728 { E_HQmode, "hq" },
2729 { E_UHQmode, "uhq" },
2730 { E_SQmode, "sq" },
2731 { E_USQmode, "usq" },
2732 { E_DQmode, "dq" },
2733 { E_UDQmode, "udq" },
2734 { E_TQmode, "tq" },
2735 { E_UTQmode, "utq" },
2736 { E_HAmode, "ha" },
2737 { E_UHAmode, "uha" },
2738 { E_SAmode, "sa" },
2739 { E_USAmode, "usa" },
2740 { E_DAmode, "da" },
2741 { E_UDAmode, "uda" },
2742 { E_TAmode, "ta" },
2743 { E_UTAmode, "uta" },
2744 { E_QImode, "qi" },
2745 { E_HImode, "hi" },
2746 { E_SImode, "si" },
2747 { E_DImode, "di" },
2748 { E_TImode, "ti" },
2749 { E_SFmode, "sf" },
2750 { E_DFmode, "df" }
2752 unsigned int i, j;
2754 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2756 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2757 "add", fixed_arith_modes[i].name, 3);
2758 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2759 "ssadd", fixed_arith_modes[i].name, 3);
2760 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2761 "usadd", fixed_arith_modes[i].name, 3);
2762 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2763 "sub", fixed_arith_modes[i].name, 3);
2764 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2765 "sssub", fixed_arith_modes[i].name, 3);
2766 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2767 "ussub", fixed_arith_modes[i].name, 3);
2768 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2769 "mul", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2771 "ssmul", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2773 "usmul", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2775 "div", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2777 "udiv", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2779 "ssdiv", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2781 "usdiv", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2783 "neg", fixed_arith_modes[i].name, 2);
2784 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2785 "ssneg", fixed_arith_modes[i].name, 2);
2786 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2787 "usneg", fixed_arith_modes[i].name, 2);
2788 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2789 "ashl", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2791 "ashr", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2793 "lshr", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2795 "ssashl", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2797 "usashl", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2799 "cmp", fixed_arith_modes[i].name, 2);
2802 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2803 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2805 if (i == j
2806 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2807 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2808 continue;
2810 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2811 fixed_conv_modes[j].mode, "fract",
2812 fixed_conv_modes[i].name,
2813 fixed_conv_modes[j].name);
2814 arm_set_fixed_conv_libfunc (satfract_optab,
2815 fixed_conv_modes[i].mode,
2816 fixed_conv_modes[j].mode, "satfract",
2817 fixed_conv_modes[i].name,
2818 fixed_conv_modes[j].name);
2819 arm_set_fixed_conv_libfunc (fractuns_optab,
2820 fixed_conv_modes[i].mode,
2821 fixed_conv_modes[j].mode, "fractuns",
2822 fixed_conv_modes[i].name,
2823 fixed_conv_modes[j].name);
2824 arm_set_fixed_conv_libfunc (satfractuns_optab,
2825 fixed_conv_modes[i].mode,
2826 fixed_conv_modes[j].mode, "satfractuns",
2827 fixed_conv_modes[i].name,
2828 fixed_conv_modes[j].name);
2832 if (TARGET_AAPCS_BASED)
2833 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2835 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2838 /* On AAPCS systems, this is the "struct __va_list". */
2839 static GTY(()) tree va_list_type;
2841 /* Return the type to use as __builtin_va_list. */
2842 static tree
2843 arm_build_builtin_va_list (void)
2845 tree va_list_name;
2846 tree ap_field;
2848 if (!TARGET_AAPCS_BASED)
2849 return std_build_builtin_va_list ();
2851 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2852 defined as:
2854 struct __va_list
2856 void *__ap;
2859 The C Library ABI further reinforces this definition in \S
2860 4.1.
2862 We must follow this definition exactly. The structure tag
2863 name is visible in C++ mangled names, and thus forms a part
2864 of the ABI. The field name may be used by people who
2865 #include <stdarg.h>. */
2866 /* Create the type. */
2867 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2868 /* Give it the required name. */
2869 va_list_name = build_decl (BUILTINS_LOCATION,
2870 TYPE_DECL,
2871 get_identifier ("__va_list"),
2872 va_list_type);
2873 DECL_ARTIFICIAL (va_list_name) = 1;
2874 TYPE_NAME (va_list_type) = va_list_name;
2875 TYPE_STUB_DECL (va_list_type) = va_list_name;
2876 /* Create the __ap field. */
2877 ap_field = build_decl (BUILTINS_LOCATION,
2878 FIELD_DECL,
2879 get_identifier ("__ap"),
2880 ptr_type_node);
2881 DECL_ARTIFICIAL (ap_field) = 1;
2882 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2883 TYPE_FIELDS (va_list_type) = ap_field;
2884 /* Compute its layout. */
2885 layout_type (va_list_type);
2887 return va_list_type;
2890 /* Return an expression of type "void *" pointing to the next
2891 available argument in a variable-argument list. VALIST is the
2892 user-level va_list object, of type __builtin_va_list. */
2893 static tree
2894 arm_extract_valist_ptr (tree valist)
2896 if (TREE_TYPE (valist) == error_mark_node)
2897 return error_mark_node;
2899 /* On an AAPCS target, the pointer is stored within "struct
2900 va_list". */
2901 if (TARGET_AAPCS_BASED)
2903 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2904 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2905 valist, ap_field, NULL_TREE);
2908 return valist;
2911 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2912 static void
2913 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2915 valist = arm_extract_valist_ptr (valist);
2916 std_expand_builtin_va_start (valist, nextarg);
2919 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2920 static tree
2921 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2922 gimple_seq *post_p)
2924 valist = arm_extract_valist_ptr (valist);
2925 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2928 /* Check any incompatible options that the user has specified. */
2929 static void
2930 arm_option_check_internal (struct gcc_options *opts)
2932 int flags = opts->x_target_flags;
2934 /* iWMMXt and NEON are incompatible. */
2935 if (TARGET_IWMMXT
2936 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2937 error ("iWMMXt and NEON are incompatible");
2939 /* Make sure that the processor choice does not conflict with any of the
2940 other command line choices. */
2941 if (TARGET_ARM_P (flags)
2942 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2943 error ("target CPU does not support ARM mode");
2945 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2946 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2947 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2949 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2950 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2952 /* If this target is normally configured to use APCS frames, warn if they
2953 are turned off and debugging is turned on. */
2954 if (TARGET_ARM_P (flags)
2955 && write_symbols != NO_DEBUG
2956 && !TARGET_APCS_FRAME
2957 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2958 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2959 "debugging");
2961 /* iWMMXt unsupported under Thumb mode. */
2962 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2963 error ("iWMMXt unsupported under Thumb mode");
2965 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2966 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2968 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2970 error ("RTP PIC is incompatible with Thumb");
2971 flag_pic = 0;
2974 if (target_pure_code || target_slow_flash_data)
2976 const char *flag = (target_pure_code ? "-mpure-code" :
2977 "-mslow-flash-data");
2978 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2980 /* We only support -mslow-flash-data on M-profile targets with
2981 MOVT. */
2982 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2983 error ("%s only supports non-pic code on M-profile targets with the "
2984 "MOVT instruction", flag);
2986 /* We only support -mpure-code on M-profile targets. */
2987 if (target_pure_code && common_unsupported_modes)
2988 error ("%s only supports non-pic code on M-profile targets", flag);
2990 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2991 -mword-relocations forbids relocation of MOVT/MOVW. */
2992 if (target_word_relocations)
2993 error ("%s incompatible with %<-mword-relocations%>", flag);
2997 /* Recompute the global settings depending on target attribute options. */
2999 static void
3000 arm_option_params_internal (void)
3002 /* If we are not using the default (ARM mode) section anchor offset
3003 ranges, then set the correct ranges now. */
3004 if (TARGET_THUMB1)
3006 /* Thumb-1 LDR instructions cannot have negative offsets.
3007 Permissible positive offset ranges are 5-bit (for byte loads),
3008 6-bit (for halfword loads), or 7-bit (for word loads).
3009 Empirical results suggest a 7-bit anchor range gives the best
3010 overall code size. */
3011 targetm.min_anchor_offset = 0;
3012 targetm.max_anchor_offset = 127;
3014 else if (TARGET_THUMB2)
3016 /* The minimum is set such that the total size of the block
3017 for a particular anchor is 248 + 1 + 4095 bytes, which is
3018 divisible by eight, ensuring natural spacing of anchors. */
3019 targetm.min_anchor_offset = -248;
3020 targetm.max_anchor_offset = 4095;
3022 else
3024 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3025 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3028 /* Increase the number of conditional instructions with -Os. */
3029 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3031 /* For THUMB2, we limit the conditional sequence to one IT block. */
3032 if (TARGET_THUMB2)
3033 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3035 if (TARGET_THUMB1)
3036 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3037 else
3038 targetm.md_asm_adjust = arm_md_asm_adjust;
3041 /* True if -mflip-thumb should next add an attribute for the default
3042 mode, false if it should next add an attribute for the opposite mode. */
3043 static GTY(()) bool thumb_flipper;
3045 /* Options after initial target override. */
3046 static GTY(()) tree init_optimize;
3048 static void
3049 arm_override_options_after_change_1 (struct gcc_options *opts,
3050 struct gcc_options *opts_set)
3052 /* -falign-functions without argument: supply one. */
3053 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3054 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3055 && opts->x_optimize_size ? "2" : "4";
3058 /* Implement targetm.override_options_after_change. */
3060 static void
3061 arm_override_options_after_change (void)
3063 arm_override_options_after_change_1 (&global_options, &global_options_set);
3066 /* Implement TARGET_OPTION_RESTORE. */
3067 static void
3068 arm_option_restore (struct gcc_options */* opts */,
3069 struct gcc_options */* opts_set */,
3070 struct cl_target_option *ptr)
3072 arm_configure_build_target (&arm_active_target, ptr, false);
3073 arm_option_reconfigure_globals ();
3076 /* Reset options between modes that the user has specified. */
3077 static void
3078 arm_option_override_internal (struct gcc_options *opts,
3079 struct gcc_options *opts_set)
3081 arm_override_options_after_change_1 (opts, opts_set);
3083 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3085 /* The default is to enable interworking, so this warning message would
3086 be confusing to users who have just compiled with
3087 eg, -march=armv4. */
3088 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3089 opts->x_target_flags &= ~MASK_INTERWORK;
3092 if (TARGET_THUMB_P (opts->x_target_flags)
3093 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3095 warning (0, "target CPU does not support THUMB instructions");
3096 opts->x_target_flags &= ~MASK_THUMB;
3099 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3101 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3102 opts->x_target_flags &= ~MASK_APCS_FRAME;
3105 /* Callee super interworking implies thumb interworking. Adding
3106 this to the flags here simplifies the logic elsewhere. */
3107 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3108 opts->x_target_flags |= MASK_INTERWORK;
3110 /* need to remember initial values so combinaisons of options like
3111 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3112 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3114 if (! opts_set->x_arm_restrict_it)
3115 opts->x_arm_restrict_it = arm_arch8;
3117 /* ARM execution state and M profile don't have [restrict] IT. */
3118 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3119 opts->x_arm_restrict_it = 0;
3121 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3122 if (!opts_set->x_arm_restrict_it
3123 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3124 opts->x_arm_restrict_it = 0;
3126 /* Enable -munaligned-access by default for
3127 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3128 i.e. Thumb2 and ARM state only.
3129 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3130 - ARMv8 architecture-base processors.
3132 Disable -munaligned-access by default for
3133 - all pre-ARMv6 architecture-based processors
3134 - ARMv6-M architecture-based processors
3135 - ARMv8-M Baseline processors. */
3137 if (! opts_set->x_unaligned_access)
3139 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3140 && arm_arch6 && (arm_arch_notm || arm_arch7));
3142 else if (opts->x_unaligned_access == 1
3143 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3145 warning (0, "target CPU does not support unaligned accesses");
3146 opts->x_unaligned_access = 0;
3149 /* Don't warn since it's on by default in -O2. */
3150 if (TARGET_THUMB1_P (opts->x_target_flags))
3151 opts->x_flag_schedule_insns = 0;
3152 else
3153 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3155 /* Disable shrink-wrap when optimizing function for size, since it tends to
3156 generate additional returns. */
3157 if (optimize_function_for_size_p (cfun)
3158 && TARGET_THUMB2_P (opts->x_target_flags))
3159 opts->x_flag_shrink_wrap = false;
3160 else
3161 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3163 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3164 - epilogue_insns - does not accurately model the corresponding insns
3165 emitted in the asm file. In particular, see the comment in thumb_exit
3166 'Find out how many of the (return) argument registers we can corrupt'.
3167 As a consequence, the epilogue may clobber registers without fipa-ra
3168 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3169 TODO: Accurately model clobbers for epilogue_insns and reenable
3170 fipa-ra. */
3171 if (TARGET_THUMB1_P (opts->x_target_flags))
3172 opts->x_flag_ipa_ra = 0;
3173 else
3174 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3176 /* Thumb2 inline assembly code should always use unified syntax.
3177 This will apply to ARM and Thumb1 eventually. */
3178 if (TARGET_THUMB2_P (opts->x_target_flags))
3179 opts->x_inline_asm_unified = true;
3181 if (arm_stack_protector_guard == SSP_GLOBAL
3182 && opts->x_arm_stack_protector_guard_offset_str)
3184 error ("incompatible options %<-mstack-protector-guard=global%> and "
3185 "%<-mstack-protector-guard-offset=%s%>",
3186 arm_stack_protector_guard_offset_str);
3189 if (opts->x_arm_stack_protector_guard_offset_str)
3191 char *end;
3192 const char *str = arm_stack_protector_guard_offset_str;
3193 errno = 0;
3194 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3195 if (!*str || *end || errno)
3196 error ("%qs is not a valid offset in %qs", str,
3197 "-mstack-protector-guard-offset=");
3198 arm_stack_protector_guard_offset = offs;
3201 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3202 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3203 #endif
3206 static sbitmap isa_all_fpubits_internal;
3207 static sbitmap isa_all_fpbits;
3208 static sbitmap isa_quirkbits;
3210 /* Configure a build target TARGET from the user-specified options OPTS and
3211 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3212 architecture have been specified, but the two are not identical. */
3213 void
3214 arm_configure_build_target (struct arm_build_target *target,
3215 struct cl_target_option *opts,
3216 bool warn_compatible)
3218 const cpu_option *arm_selected_tune = NULL;
3219 const arch_option *arm_selected_arch = NULL;
3220 const cpu_option *arm_selected_cpu = NULL;
3221 const arm_fpu_desc *arm_selected_fpu = NULL;
3222 const char *tune_opts = NULL;
3223 const char *arch_opts = NULL;
3224 const char *cpu_opts = NULL;
3226 bitmap_clear (target->isa);
3227 target->core_name = NULL;
3228 target->arch_name = NULL;
3230 if (opts->x_arm_arch_string)
3232 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3233 "-march",
3234 opts->x_arm_arch_string);
3235 arch_opts = strchr (opts->x_arm_arch_string, '+');
3238 if (opts->x_arm_cpu_string)
3240 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3241 opts->x_arm_cpu_string);
3242 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3243 arm_selected_tune = arm_selected_cpu;
3244 /* If taking the tuning from -mcpu, we don't need to rescan the
3245 options for tuning. */
3248 if (opts->x_arm_tune_string)
3250 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3251 opts->x_arm_tune_string);
3252 tune_opts = strchr (opts->x_arm_tune_string, '+');
3255 if (arm_selected_arch)
3257 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3258 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3259 arch_opts);
3261 if (arm_selected_cpu)
3263 auto_sbitmap cpu_isa (isa_num_bits);
3264 auto_sbitmap isa_delta (isa_num_bits);
3266 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3267 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3268 cpu_opts);
3269 bitmap_xor (isa_delta, cpu_isa, target->isa);
3270 /* Ignore any bits that are quirk bits. */
3271 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3272 /* If the user (or the default configuration) has specified a
3273 specific FPU, then ignore any bits that depend on the FPU
3274 configuration. Do similarly if using the soft-float
3275 ABI. */
3276 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3277 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3278 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3280 if (!bitmap_empty_p (isa_delta))
3282 if (warn_compatible)
3283 warning (0, "switch %<-mcpu=%s%> conflicts "
3284 "with switch %<-march=%s%>",
3285 opts->x_arm_cpu_string,
3286 opts->x_arm_arch_string);
3288 /* -march wins for code generation.
3289 -mcpu wins for default tuning. */
3290 if (!arm_selected_tune)
3291 arm_selected_tune = arm_selected_cpu;
3293 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3294 target->arch_name = arm_selected_arch->common.name;
3296 else
3298 /* Architecture and CPU are essentially the same.
3299 Prefer the CPU setting. */
3300 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3301 target->core_name = arm_selected_cpu->common.name;
3302 /* Copy the CPU's capabilities, so that we inherit the
3303 appropriate extensions and quirks. */
3304 bitmap_copy (target->isa, cpu_isa);
3307 else
3309 /* Pick a CPU based on the architecture. */
3310 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3311 target->arch_name = arm_selected_arch->common.name;
3312 /* Note: target->core_name is left unset in this path. */
3315 else if (arm_selected_cpu)
3317 target->core_name = arm_selected_cpu->common.name;
3318 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3319 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3320 cpu_opts);
3321 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3323 /* If the user did not specify a processor or architecture, choose
3324 one for them. */
3325 else
3327 const cpu_option *sel;
3328 auto_sbitmap sought_isa (isa_num_bits);
3329 bitmap_clear (sought_isa);
3330 auto_sbitmap default_isa (isa_num_bits);
3332 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3333 TARGET_CPU_DEFAULT);
3334 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3335 gcc_assert (arm_selected_cpu->common.name);
3337 /* RWE: All of the selection logic below (to the end of this
3338 'if' clause) looks somewhat suspect. It appears to be mostly
3339 there to support forcing thumb support when the default CPU
3340 does not have thumb (somewhat dubious in terms of what the
3341 user might be expecting). I think it should be removed once
3342 support for the pre-thumb era cores is removed. */
3343 sel = arm_selected_cpu;
3344 arm_initialize_isa (default_isa, sel->common.isa_bits);
3345 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3346 cpu_opts);
3348 /* Now check to see if the user has specified any command line
3349 switches that require certain abilities from the cpu. */
3351 if (TARGET_INTERWORK || TARGET_THUMB)
3352 bitmap_set_bit (sought_isa, isa_bit_thumb);
3354 /* If there are such requirements and the default CPU does not
3355 satisfy them, we need to run over the complete list of
3356 cores looking for one that is satisfactory. */
3357 if (!bitmap_empty_p (sought_isa)
3358 && !bitmap_subset_p (sought_isa, default_isa))
3360 auto_sbitmap candidate_isa (isa_num_bits);
3361 /* We're only interested in a CPU with at least the
3362 capabilities of the default CPU and the required
3363 additional features. */
3364 bitmap_ior (default_isa, default_isa, sought_isa);
3366 /* Try to locate a CPU type that supports all of the abilities
3367 of the default CPU, plus the extra abilities requested by
3368 the user. */
3369 for (sel = all_cores; sel->common.name != NULL; sel++)
3371 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3372 /* An exact match? */
3373 if (bitmap_equal_p (default_isa, candidate_isa))
3374 break;
3377 if (sel->common.name == NULL)
3379 unsigned current_bit_count = isa_num_bits;
3380 const cpu_option *best_fit = NULL;
3382 /* Ideally we would like to issue an error message here
3383 saying that it was not possible to find a CPU compatible
3384 with the default CPU, but which also supports the command
3385 line options specified by the programmer, and so they
3386 ought to use the -mcpu=<name> command line option to
3387 override the default CPU type.
3389 If we cannot find a CPU that has exactly the
3390 characteristics of the default CPU and the given
3391 command line options we scan the array again looking
3392 for a best match. The best match must have at least
3393 the capabilities of the perfect match. */
3394 for (sel = all_cores; sel->common.name != NULL; sel++)
3396 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3398 if (bitmap_subset_p (default_isa, candidate_isa))
3400 unsigned count;
3402 bitmap_and_compl (candidate_isa, candidate_isa,
3403 default_isa);
3404 count = bitmap_popcount (candidate_isa);
3406 if (count < current_bit_count)
3408 best_fit = sel;
3409 current_bit_count = count;
3413 gcc_assert (best_fit);
3414 sel = best_fit;
3417 arm_selected_cpu = sel;
3420 /* Now we know the CPU, we can finally initialize the target
3421 structure. */
3422 target->core_name = arm_selected_cpu->common.name;
3423 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3424 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3425 cpu_opts);
3426 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3429 gcc_assert (arm_selected_cpu);
3430 gcc_assert (arm_selected_arch);
3432 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3434 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3435 auto_sbitmap fpu_bits (isa_num_bits);
3437 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3438 /* This should clear out ALL bits relating to the FPU/simd
3439 extensions, to avoid potentially invalid combinations later on
3440 that we can't match. At present we only clear out those bits
3441 that can be set by -mfpu. This should be fixed in GCC-12. */
3442 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3443 bitmap_ior (target->isa, target->isa, fpu_bits);
3446 /* If we have the soft-float ABI, clear any feature bits relating to use of
3447 floating-point operations. They'll just confuse things later on. */
3448 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3449 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3451 /* There may be implied bits which we still need to enable. These are
3452 non-named features which are needed to complete other sets of features,
3453 but cannot be enabled from arm-cpus.in due to being shared between
3454 multiple fgroups. Each entry in all_implied_fbits is of the form
3455 ante -> cons, meaning that if the feature "ante" is enabled, we should
3456 implicitly enable "cons". */
3457 const struct fbit_implication *impl = all_implied_fbits;
3458 while (impl->ante)
3460 if (bitmap_bit_p (target->isa, impl->ante))
3461 bitmap_set_bit (target->isa, impl->cons);
3462 impl++;
3465 if (!arm_selected_tune)
3466 arm_selected_tune = arm_selected_cpu;
3467 else /* Validate the features passed to -mtune. */
3468 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3470 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3472 /* Finish initializing the target structure. */
3473 if (!target->arch_name)
3474 target->arch_name = arm_selected_arch->common.name;
3475 target->arch_pp_name = arm_selected_arch->arch;
3476 target->base_arch = arm_selected_arch->base_arch;
3477 target->profile = arm_selected_arch->profile;
3479 target->tune_flags = tune_data->tune_flags;
3480 target->tune = tune_data->tune;
3481 target->tune_core = tune_data->scheduler;
3484 /* Fix up any incompatible options that the user has specified. */
3485 static void
3486 arm_option_override (void)
3488 static const enum isa_feature fpu_bitlist_internal[]
3489 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3490 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3491 static const enum isa_feature fp_bitlist[]
3492 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3493 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3494 cl_target_option opts;
3496 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3497 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3499 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3500 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3501 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3502 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3504 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3506 if (!OPTION_SET_P (arm_fpu_index))
3508 bool ok;
3509 int fpu_index;
3511 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3512 CL_TARGET);
3513 gcc_assert (ok);
3514 arm_fpu_index = (enum fpu_type) fpu_index;
3517 cl_target_option_save (&opts, &global_options, &global_options_set);
3518 arm_configure_build_target (&arm_active_target, &opts, true);
3520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3521 SUBTARGET_OVERRIDE_OPTIONS;
3522 #endif
3524 /* Initialize boolean versions of the architectural flags, for use
3525 in the arm.md file and for enabling feature flags. */
3526 arm_option_reconfigure_globals ();
3528 arm_tune = arm_active_target.tune_core;
3529 tune_flags = arm_active_target.tune_flags;
3530 current_tune = arm_active_target.tune;
3532 /* TBD: Dwarf info for apcs frame is not handled yet. */
3533 if (TARGET_APCS_FRAME)
3534 flag_shrink_wrap = false;
3536 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3538 warning (0, "%<-mapcs-stack-check%> incompatible with "
3539 "%<-mno-apcs-frame%>");
3540 target_flags |= MASK_APCS_FRAME;
3543 if (TARGET_POKE_FUNCTION_NAME)
3544 target_flags |= MASK_APCS_FRAME;
3546 if (TARGET_APCS_REENT && flag_pic)
3547 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3549 if (TARGET_APCS_REENT)
3550 warning (0, "APCS reentrant code not supported. Ignored");
3552 /* Set up some tuning parameters. */
3553 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3554 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3555 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3556 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3557 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3558 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3560 /* For arm2/3 there is no need to do any scheduling if we are doing
3561 software floating-point. */
3562 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3563 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3565 /* Override the default structure alignment for AAPCS ABI. */
3566 if (!OPTION_SET_P (arm_structure_size_boundary))
3568 if (TARGET_AAPCS_BASED)
3569 arm_structure_size_boundary = 8;
3571 else
3573 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3575 if (arm_structure_size_boundary != 8
3576 && arm_structure_size_boundary != 32
3577 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3579 if (ARM_DOUBLEWORD_ALIGN)
3580 warning (0,
3581 "structure size boundary can only be set to 8, 32 or 64");
3582 else
3583 warning (0, "structure size boundary can only be set to 8 or 32");
3584 arm_structure_size_boundary
3585 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3589 if (TARGET_VXWORKS_RTP)
3591 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3592 arm_pic_data_is_text_relative = 0;
3594 else if (flag_pic
3595 && !arm_pic_data_is_text_relative
3596 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3597 /* When text & data segments don't have a fixed displacement, the
3598 intended use is with a single, read only, pic base register.
3599 Unless the user explicitly requested not to do that, set
3600 it. */
3601 target_flags |= MASK_SINGLE_PIC_BASE;
3603 /* If stack checking is disabled, we can use r10 as the PIC register,
3604 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3605 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3607 if (TARGET_VXWORKS_RTP)
3608 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3609 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3612 if (flag_pic && TARGET_VXWORKS_RTP)
3613 arm_pic_register = 9;
3615 /* If in FDPIC mode then force arm_pic_register to be r9. */
3616 if (TARGET_FDPIC)
3618 arm_pic_register = FDPIC_REGNUM;
3619 if (TARGET_THUMB1)
3620 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3623 if (arm_pic_register_string != NULL)
3625 int pic_register = decode_reg_name (arm_pic_register_string);
3627 if (!flag_pic)
3628 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3630 /* Prevent the user from choosing an obviously stupid PIC register. */
3631 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3632 || pic_register == HARD_FRAME_POINTER_REGNUM
3633 || pic_register == STACK_POINTER_REGNUM
3634 || pic_register >= PC_REGNUM
3635 || (TARGET_VXWORKS_RTP
3636 && (unsigned int) pic_register != arm_pic_register))
3637 error ("unable to use %qs for PIC register", arm_pic_register_string);
3638 else
3639 arm_pic_register = pic_register;
3642 if (flag_pic)
3643 target_word_relocations = 1;
3645 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3646 if (fix_cm3_ldrd == 2)
3648 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3649 fix_cm3_ldrd = 1;
3650 else
3651 fix_cm3_ldrd = 0;
3654 /* Enable fix_vlldm by default if required. */
3655 if (fix_vlldm == 2)
3657 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3658 fix_vlldm = 1;
3659 else
3660 fix_vlldm = 0;
3663 /* Enable fix_aes by default if required. */
3664 if (fix_aes_erratum_1742098 == 2)
3666 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3667 fix_aes_erratum_1742098 = 1;
3668 else
3669 fix_aes_erratum_1742098 = 0;
3672 /* Hot/Cold partitioning is not currently supported, since we can't
3673 handle literal pool placement in that case. */
3674 if (flag_reorder_blocks_and_partition)
3676 inform (input_location,
3677 "%<-freorder-blocks-and-partition%> not supported "
3678 "on this architecture");
3679 flag_reorder_blocks_and_partition = 0;
3680 flag_reorder_blocks = 1;
3683 if (flag_pic)
3684 /* Hoisting PIC address calculations more aggressively provides a small,
3685 but measurable, size reduction for PIC code. Therefore, we decrease
3686 the bar for unrestricted expression hoisting to the cost of PIC address
3687 calculation, which is 2 instructions. */
3688 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3689 param_gcse_unrestricted_cost, 2);
3691 /* ARM EABI defaults to strict volatile bitfields. */
3692 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3693 && abi_version_at_least(2))
3694 flag_strict_volatile_bitfields = 1;
3696 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3697 have deemed it beneficial (signified by setting
3698 prefetch.num_slots to 1 or more). */
3699 if (flag_prefetch_loop_arrays < 0
3700 && HAVE_prefetch
3701 && optimize >= 3
3702 && current_tune->prefetch.num_slots > 0)
3703 flag_prefetch_loop_arrays = 1;
3705 /* Set up parameters to be used in prefetching algorithm. Do not
3706 override the defaults unless we are tuning for a core we have
3707 researched values for. */
3708 if (current_tune->prefetch.num_slots > 0)
3709 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3710 param_simultaneous_prefetches,
3711 current_tune->prefetch.num_slots);
3712 if (current_tune->prefetch.l1_cache_line_size >= 0)
3713 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3714 param_l1_cache_line_size,
3715 current_tune->prefetch.l1_cache_line_size);
3716 if (current_tune->prefetch.l1_cache_line_size >= 0)
3718 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3719 param_destruct_interfere_size,
3720 current_tune->prefetch.l1_cache_line_size);
3721 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3722 param_construct_interfere_size,
3723 current_tune->prefetch.l1_cache_line_size);
3725 else
3727 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3728 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3729 constructive? */
3730 /* More recent Cortex chips have a 64-byte cache line, but are marked
3731 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3732 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3733 param_destruct_interfere_size, 64);
3734 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3735 param_construct_interfere_size, 64);
3738 if (current_tune->prefetch.l1_cache_size >= 0)
3739 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3740 param_l1_cache_size,
3741 current_tune->prefetch.l1_cache_size);
3743 /* Look through ready list and all of queue for instructions
3744 relevant for L2 auto-prefetcher. */
3745 int sched_autopref_queue_depth;
3747 switch (current_tune->sched_autopref)
3749 case tune_params::SCHED_AUTOPREF_OFF:
3750 sched_autopref_queue_depth = -1;
3751 break;
3753 case tune_params::SCHED_AUTOPREF_RANK:
3754 sched_autopref_queue_depth = 0;
3755 break;
3757 case tune_params::SCHED_AUTOPREF_FULL:
3758 sched_autopref_queue_depth = max_insn_queue_index + 1;
3759 break;
3761 default:
3762 gcc_unreachable ();
3765 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3766 param_sched_autopref_queue_depth,
3767 sched_autopref_queue_depth);
3769 /* Currently, for slow flash data, we just disable literal pools. We also
3770 disable it for pure-code. */
3771 if (target_slow_flash_data || target_pure_code)
3772 arm_disable_literal_pool = true;
3774 /* Disable scheduling fusion by default if it's not armv7 processor
3775 or doesn't prefer ldrd/strd. */
3776 if (flag_schedule_fusion == 2
3777 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3778 flag_schedule_fusion = 0;
3780 /* Need to remember initial options before they are overriden. */
3781 init_optimize = build_optimization_node (&global_options,
3782 &global_options_set);
3784 arm_options_perform_arch_sanity_checks ();
3785 arm_option_override_internal (&global_options, &global_options_set);
3786 arm_option_check_internal (&global_options);
3787 arm_option_params_internal ();
3789 /* Create the default target_options structure. */
3790 target_option_default_node = target_option_current_node
3791 = build_target_option_node (&global_options, &global_options_set);
3793 /* Register global variables with the garbage collector. */
3794 arm_add_gc_roots ();
3796 /* Init initial mode for testing. */
3797 thumb_flipper = TARGET_THUMB;
3801 /* Reconfigure global status flags from the active_target.isa. */
3802 void
3803 arm_option_reconfigure_globals (void)
3805 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3806 arm_base_arch = arm_active_target.base_arch;
3808 /* Initialize boolean versions of the architectural flags, for use
3809 in the arm.md file. */
3810 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3811 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3812 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3813 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3814 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3815 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3816 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3817 arm_arch6m = arm_arch6 && !arm_arch_notm;
3818 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3819 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3820 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3821 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3822 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3823 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3824 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3825 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3826 isa_bit_armv8_1m_main);
3827 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3828 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3829 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3830 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3831 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3832 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3833 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3834 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3835 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3836 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3837 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3838 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3840 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3841 if (arm_fp16_inst)
3843 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3844 error ("selected fp16 options are incompatible");
3845 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3848 arm_arch_cde = 0;
3849 arm_arch_cde_coproc = 0;
3850 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3851 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3852 isa_bit_cdecp6, isa_bit_cdecp7};
3853 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3855 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3856 if (cde_bit)
3858 arm_arch_cde |= cde_bit;
3859 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3863 /* And finally, set up some quirks. */
3864 arm_arch_no_volatile_ce
3865 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3866 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3867 isa_bit_quirk_armv6kz);
3869 /* Use the cp15 method if it is available. */
3870 if (target_thread_pointer == TP_AUTO)
3872 if (arm_arch6k && !TARGET_THUMB1)
3873 target_thread_pointer = TP_CP15;
3874 else
3875 target_thread_pointer = TP_SOFT;
3878 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3879 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3882 /* Perform some validation between the desired architecture and the rest of the
3883 options. */
3884 void
3885 arm_options_perform_arch_sanity_checks (void)
3887 /* V5T code we generate is completely interworking capable, so we turn off
3888 TARGET_INTERWORK here to avoid many tests later on. */
3890 /* XXX However, we must pass the right pre-processor defines to CPP
3891 or GLD can get confused. This is a hack. */
3892 if (TARGET_INTERWORK)
3893 arm_cpp_interwork = 1;
3895 if (arm_arch5t)
3896 target_flags &= ~MASK_INTERWORK;
3898 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3899 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3901 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3902 error ("iwmmxt abi requires an iwmmxt capable cpu");
3904 /* BPABI targets use linker tricks to allow interworking on cores
3905 without thumb support. */
3906 if (TARGET_INTERWORK
3907 && !TARGET_BPABI
3908 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3910 warning (0, "target CPU does not support interworking" );
3911 target_flags &= ~MASK_INTERWORK;
3914 /* If soft-float is specified then don't use FPU. */
3915 if (TARGET_SOFT_FLOAT)
3916 arm_fpu_attr = FPU_NONE;
3917 else
3918 arm_fpu_attr = FPU_VFP;
3920 if (TARGET_AAPCS_BASED)
3922 if (TARGET_CALLER_INTERWORKING)
3923 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3924 else
3925 if (TARGET_CALLEE_INTERWORKING)
3926 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3929 /* __fp16 support currently assumes the core has ldrh. */
3930 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3931 sorry ("%<__fp16%> and no ldrh");
3933 if (use_cmse && !arm_arch_cmse)
3934 error ("target CPU does not support ARMv8-M Security Extensions");
3936 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3937 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3938 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3939 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3942 if (TARGET_AAPCS_BASED)
3944 if (arm_abi == ARM_ABI_IWMMXT)
3945 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3946 else if (TARGET_HARD_FLOAT_ABI)
3948 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3949 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3950 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3951 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3953 else
3954 arm_pcs_default = ARM_PCS_AAPCS;
3956 else
3958 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3959 sorry ("%<-mfloat-abi=hard%> and VFP");
3961 if (arm_abi == ARM_ABI_APCS)
3962 arm_pcs_default = ARM_PCS_APCS;
3963 else
3964 arm_pcs_default = ARM_PCS_ATPCS;
3968 /* Test whether a local function descriptor is canonical, i.e.,
3969 whether we can use GOTOFFFUNCDESC to compute the address of the
3970 function. */
3971 static bool
3972 arm_fdpic_local_funcdesc_p (rtx fnx)
3974 tree fn;
3975 enum symbol_visibility vis;
3976 bool ret;
3978 if (!TARGET_FDPIC)
3979 return true;
3981 if (! SYMBOL_REF_LOCAL_P (fnx))
3982 return false;
3984 fn = SYMBOL_REF_DECL (fnx);
3986 if (! fn)
3987 return false;
3989 vis = DECL_VISIBILITY (fn);
3991 if (vis == VISIBILITY_PROTECTED)
3992 /* Private function descriptors for protected functions are not
3993 canonical. Temporarily change the visibility to global so that
3994 we can ensure uniqueness of funcdesc pointers. */
3995 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3997 ret = default_binds_local_p_1 (fn, flag_pic);
3999 DECL_VISIBILITY (fn) = vis;
4001 return ret;
4004 static void
4005 arm_add_gc_roots (void)
4007 gcc_obstack_init(&minipool_obstack);
4008 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4011 /* A table of known ARM exception types.
4012 For use with the interrupt function attribute. */
4014 typedef struct
4016 const char *const arg;
4017 const unsigned long return_value;
4019 isr_attribute_arg;
4021 static const isr_attribute_arg isr_attribute_args [] =
4023 { "IRQ", ARM_FT_ISR },
4024 { "irq", ARM_FT_ISR },
4025 { "FIQ", ARM_FT_FIQ },
4026 { "fiq", ARM_FT_FIQ },
4027 { "ABORT", ARM_FT_ISR },
4028 { "abort", ARM_FT_ISR },
4029 { "UNDEF", ARM_FT_EXCEPTION },
4030 { "undef", ARM_FT_EXCEPTION },
4031 { "SWI", ARM_FT_EXCEPTION },
4032 { "swi", ARM_FT_EXCEPTION },
4033 { NULL, ARM_FT_NORMAL }
4036 /* Returns the (interrupt) function type of the current
4037 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4039 static unsigned long
4040 arm_isr_value (tree argument)
4042 const isr_attribute_arg * ptr;
4043 const char * arg;
4045 if (!arm_arch_notm)
4046 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4048 /* No argument - default to IRQ. */
4049 if (argument == NULL_TREE)
4050 return ARM_FT_ISR;
4052 /* Get the value of the argument. */
4053 if (TREE_VALUE (argument) == NULL_TREE
4054 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4055 return ARM_FT_UNKNOWN;
4057 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4059 /* Check it against the list of known arguments. */
4060 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4061 if (streq (arg, ptr->arg))
4062 return ptr->return_value;
4064 /* An unrecognized interrupt type. */
4065 return ARM_FT_UNKNOWN;
4068 /* Computes the type of the current function. */
4070 static unsigned long
4071 arm_compute_func_type (void)
4073 unsigned long type = ARM_FT_UNKNOWN;
4074 tree a;
4075 tree attr;
4077 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4079 /* Decide if the current function is volatile. Such functions
4080 never return, and many memory cycles can be saved by not storing
4081 register values that will never be needed again. This optimization
4082 was added to speed up context switching in a kernel application. */
4083 if (optimize > 0
4084 && (TREE_NOTHROW (current_function_decl)
4085 || !(flag_unwind_tables
4086 || (flag_exceptions
4087 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4088 && TREE_THIS_VOLATILE (current_function_decl))
4089 type |= ARM_FT_VOLATILE;
4091 if (cfun->static_chain_decl != NULL)
4092 type |= ARM_FT_NESTED;
4094 attr = DECL_ATTRIBUTES (current_function_decl);
4096 a = lookup_attribute ("naked", attr);
4097 if (a != NULL_TREE)
4098 type |= ARM_FT_NAKED;
4100 a = lookup_attribute ("isr", attr);
4101 if (a == NULL_TREE)
4102 a = lookup_attribute ("interrupt", attr);
4104 if (a == NULL_TREE)
4105 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4106 else
4107 type |= arm_isr_value (TREE_VALUE (a));
4109 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4110 type |= ARM_FT_CMSE_ENTRY;
4112 return type;
4115 /* Returns the type of the current function. */
4117 unsigned long
4118 arm_current_func_type (void)
4120 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4121 cfun->machine->func_type = arm_compute_func_type ();
4123 return cfun->machine->func_type;
4126 bool
4127 arm_allocate_stack_slots_for_args (void)
4129 /* Naked functions should not allocate stack slots for arguments. */
4130 return !IS_NAKED (arm_current_func_type ());
4133 static bool
4134 arm_warn_func_return (tree decl)
4136 /* Naked functions are implemented entirely in assembly, including the
4137 return sequence, so suppress warnings about this. */
4138 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4142 /* Output assembler code for a block containing the constant parts
4143 of a trampoline, leaving space for the variable parts.
4145 On the ARM, (if r8 is the static chain regnum, and remembering that
4146 referencing pc adds an offset of 8) the trampoline looks like:
4147 ldr r8, [pc, #0]
4148 ldr pc, [pc]
4149 .word static chain value
4150 .word function's address
4151 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4153 In FDPIC mode, the trampoline looks like:
4154 .word trampoline address
4155 .word trampoline GOT address
4156 ldr r12, [pc, #8] ; #4 for Arm mode
4157 ldr r9, [pc, #8] ; #4 for Arm mode
4158 ldr pc, [pc, #8] ; #4 for Arm mode
4159 .word static chain value
4160 .word GOT address
4161 .word function's address
4164 static void
4165 arm_asm_trampoline_template (FILE *f)
4167 fprintf (f, "\t.syntax unified\n");
4169 if (TARGET_FDPIC)
4171 /* The first two words are a function descriptor pointing to the
4172 trampoline code just below. */
4173 if (TARGET_ARM)
4174 fprintf (f, "\t.arm\n");
4175 else if (TARGET_THUMB2)
4176 fprintf (f, "\t.thumb\n");
4177 else
4178 /* Only ARM and Thumb-2 are supported. */
4179 gcc_unreachable ();
4181 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4182 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4183 /* Trampoline code which sets the static chain register but also
4184 PIC register before jumping into real code. */
4185 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4186 STATIC_CHAIN_REGNUM, PC_REGNUM,
4187 TARGET_THUMB2 ? 8 : 4);
4188 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4189 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4190 TARGET_THUMB2 ? 8 : 4);
4191 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4192 PC_REGNUM, PC_REGNUM,
4193 TARGET_THUMB2 ? 8 : 4);
4194 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4196 else if (TARGET_ARM)
4198 fprintf (f, "\t.arm\n");
4199 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4200 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4202 else if (TARGET_THUMB2)
4204 fprintf (f, "\t.thumb\n");
4205 /* The Thumb-2 trampoline is similar to the arm implementation.
4206 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4207 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4208 STATIC_CHAIN_REGNUM, PC_REGNUM);
4209 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4211 else
4213 ASM_OUTPUT_ALIGN (f, 2);
4214 fprintf (f, "\t.code\t16\n");
4215 fprintf (f, ".Ltrampoline_start:\n");
4216 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4217 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4218 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4219 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4220 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4221 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4223 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4224 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4227 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4229 static void
4230 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4232 rtx fnaddr, mem, a_tramp;
4234 emit_block_move (m_tramp, assemble_trampoline_template (),
4235 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4237 if (TARGET_FDPIC)
4239 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4240 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4241 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4242 /* The function start address is at offset 8, but in Thumb mode
4243 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4244 below. */
4245 rtx trampoline_code_start
4246 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4248 /* Write initial funcdesc which points to the trampoline. */
4249 mem = adjust_address (m_tramp, SImode, 0);
4250 emit_move_insn (mem, trampoline_code_start);
4251 mem = adjust_address (m_tramp, SImode, 4);
4252 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4253 /* Setup static chain. */
4254 mem = adjust_address (m_tramp, SImode, 20);
4255 emit_move_insn (mem, chain_value);
4256 /* GOT + real function entry point. */
4257 mem = adjust_address (m_tramp, SImode, 24);
4258 emit_move_insn (mem, gotaddr);
4259 mem = adjust_address (m_tramp, SImode, 28);
4260 emit_move_insn (mem, fnaddr);
4262 else
4264 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4265 emit_move_insn (mem, chain_value);
4267 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4268 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4269 emit_move_insn (mem, fnaddr);
4272 a_tramp = XEXP (m_tramp, 0);
4273 maybe_emit_call_builtin___clear_cache (a_tramp,
4274 plus_constant (ptr_mode,
4275 a_tramp,
4276 TRAMPOLINE_SIZE));
4279 /* Thumb trampolines should be entered in thumb mode, so set
4280 the bottom bit of the address. */
4282 static rtx
4283 arm_trampoline_adjust_address (rtx addr)
4285 /* For FDPIC don't fix trampoline address since it's a function
4286 descriptor and not a function address. */
4287 if (TARGET_THUMB && !TARGET_FDPIC)
4288 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4289 NULL, 0, OPTAB_LIB_WIDEN);
4290 return addr;
4293 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4294 includes call-clobbered registers too. If this is a leaf function
4295 we can just examine the registers used by the RTL, but otherwise we
4296 have to assume that whatever function is called might clobber
4297 anything, and so we have to save all the call-clobbered registers
4298 as well. */
4299 static inline bool reg_needs_saving_p (unsigned reg)
4301 unsigned long func_type = arm_current_func_type ();
4303 if (IS_INTERRUPT (func_type))
4304 if (df_regs_ever_live_p (reg)
4305 /* Save call-clobbered core registers. */
4306 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4307 return true;
4308 else
4309 return false;
4310 else
4311 if (!df_regs_ever_live_p (reg)
4312 || call_used_or_fixed_reg_p (reg))
4313 return false;
4314 else
4315 return true;
4318 /* Return 1 if it is possible to return using a single instruction.
4319 If SIBLING is non-null, this is a test for a return before a sibling
4320 call. SIBLING is the call insn, so we can examine its register usage. */
4323 use_return_insn (int iscond, rtx sibling)
4325 int regno;
4326 unsigned int func_type;
4327 unsigned long saved_int_regs;
4328 unsigned HOST_WIDE_INT stack_adjust;
4329 arm_stack_offsets *offsets;
4331 /* Never use a return instruction before reload has run. */
4332 if (!reload_completed)
4333 return 0;
4335 func_type = arm_current_func_type ();
4337 /* Naked, volatile and stack alignment functions need special
4338 consideration. */
4339 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4340 return 0;
4342 /* So do interrupt functions that use the frame pointer and Thumb
4343 interrupt functions. */
4344 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4345 return 0;
4347 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4348 && !optimize_function_for_size_p (cfun))
4349 return 0;
4351 offsets = arm_get_frame_offsets ();
4352 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4354 /* As do variadic functions. */
4355 if (crtl->args.pretend_args_size
4356 || cfun->machine->uses_anonymous_args
4357 /* Or if the function calls __builtin_eh_return () */
4358 || crtl->calls_eh_return
4359 /* Or if the function calls alloca */
4360 || cfun->calls_alloca
4361 /* Or if there is a stack adjustment. However, if the stack pointer
4362 is saved on the stack, we can use a pre-incrementing stack load. */
4363 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4364 && stack_adjust == 4))
4365 /* Or if the static chain register was saved above the frame, under the
4366 assumption that the stack pointer isn't saved on the stack. */
4367 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4368 && arm_compute_static_chain_stack_bytes() != 0))
4369 return 0;
4371 saved_int_regs = offsets->saved_regs_mask;
4373 /* Unfortunately, the insn
4375 ldmib sp, {..., sp, ...}
4377 triggers a bug on most SA-110 based devices, such that the stack
4378 pointer won't be correctly restored if the instruction takes a
4379 page fault. We work around this problem by popping r3 along with
4380 the other registers, since that is never slower than executing
4381 another instruction.
4383 We test for !arm_arch5t here, because code for any architecture
4384 less than this could potentially be run on one of the buggy
4385 chips. */
4386 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4388 /* Validate that r3 is a call-clobbered register (always true in
4389 the default abi) ... */
4390 if (!call_used_or_fixed_reg_p (3))
4391 return 0;
4393 /* ... that it isn't being used for a return value ... */
4394 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4395 return 0;
4397 /* ... or for a tail-call argument ... */
4398 if (sibling)
4400 gcc_assert (CALL_P (sibling));
4402 if (find_regno_fusage (sibling, USE, 3))
4403 return 0;
4406 /* ... and that there are no call-saved registers in r0-r2
4407 (always true in the default ABI). */
4408 if (saved_int_regs & 0x7)
4409 return 0;
4412 /* Can't be done if interworking with Thumb, and any registers have been
4413 stacked. */
4414 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4415 return 0;
4417 /* On StrongARM, conditional returns are expensive if they aren't
4418 taken and multiple registers have been stacked. */
4419 if (iscond && arm_tune_strongarm)
4421 /* Conditional return when just the LR is stored is a simple
4422 conditional-load instruction, that's not expensive. */
4423 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4424 return 0;
4426 if (flag_pic
4427 && arm_pic_register != INVALID_REGNUM
4428 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4429 return 0;
4432 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4433 several instructions if anything needs to be popped. Armv8.1-M Mainline
4434 also needs several instructions to save and restore FP context. */
4435 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4436 return 0;
4438 /* If there are saved registers but the LR isn't saved, then we need
4439 two instructions for the return. */
4440 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4441 return 0;
4443 /* Can't be done if any of the VFP regs are pushed,
4444 since this also requires an insn. */
4445 if (TARGET_VFP_BASE)
4446 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4447 if (reg_needs_saving_p (regno))
4448 return 0;
4450 if (TARGET_REALLY_IWMMXT)
4451 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4452 if (reg_needs_saving_p (regno))
4453 return 0;
4455 return 1;
4458 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4459 shrink-wrapping if possible. This is the case if we need to emit a
4460 prologue, which we can test by looking at the offsets. */
4461 bool
4462 use_simple_return_p (void)
4464 arm_stack_offsets *offsets;
4466 /* Note this function can be called before or after reload. */
4467 if (!reload_completed)
4468 arm_compute_frame_layout ();
4470 offsets = arm_get_frame_offsets ();
4471 return offsets->outgoing_args != 0;
4474 /* Return TRUE if int I is a valid immediate ARM constant. */
4477 const_ok_for_arm (HOST_WIDE_INT i)
4479 int lowbit;
4481 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4482 be all zero, or all one. */
4483 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4484 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4485 != ((~(unsigned HOST_WIDE_INT) 0)
4486 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4487 return FALSE;
4489 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4491 /* Fast return for 0 and small values. We must do this for zero, since
4492 the code below can't handle that one case. */
4493 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4494 return TRUE;
4496 /* Get the number of trailing zeros. */
4497 lowbit = ffs((int) i) - 1;
4499 /* Only even shifts are allowed in ARM mode so round down to the
4500 nearest even number. */
4501 if (TARGET_ARM)
4502 lowbit &= ~1;
4504 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4505 return TRUE;
4507 if (TARGET_ARM)
4509 /* Allow rotated constants in ARM mode. */
4510 if (lowbit <= 4
4511 && ((i & ~0xc000003f) == 0
4512 || (i & ~0xf000000f) == 0
4513 || (i & ~0xfc000003) == 0))
4514 return TRUE;
4516 else if (TARGET_THUMB2)
4518 HOST_WIDE_INT v;
4520 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4521 v = i & 0xff;
4522 v |= v << 16;
4523 if (i == v || i == (v | (v << 8)))
4524 return TRUE;
4526 /* Allow repeated pattern 0xXY00XY00. */
4527 v = i & 0xff00;
4528 v |= v << 16;
4529 if (i == v)
4530 return TRUE;
4532 else if (TARGET_HAVE_MOVT)
4534 /* Thumb-1 Targets with MOVT. */
4535 if (i > 0xffff)
4536 return FALSE;
4537 else
4538 return TRUE;
4541 return FALSE;
4544 /* Return true if I is a valid constant for the operation CODE. */
4546 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4548 if (const_ok_for_arm (i))
4549 return 1;
4551 switch (code)
4553 case SET:
4554 /* See if we can use movw. */
4555 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4556 return 1;
4557 else
4558 /* Otherwise, try mvn. */
4559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4561 case PLUS:
4562 /* See if we can use addw or subw. */
4563 if (TARGET_THUMB2
4564 && ((i & 0xfffff000) == 0
4565 || ((-i) & 0xfffff000) == 0))
4566 return 1;
4567 /* Fall through. */
4568 case COMPARE:
4569 case EQ:
4570 case NE:
4571 case GT:
4572 case LE:
4573 case LT:
4574 case GE:
4575 case GEU:
4576 case LTU:
4577 case GTU:
4578 case LEU:
4579 case UNORDERED:
4580 case ORDERED:
4581 case UNEQ:
4582 case UNGE:
4583 case UNLT:
4584 case UNGT:
4585 case UNLE:
4586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4588 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4589 case XOR:
4590 return 0;
4592 case IOR:
4593 if (TARGET_THUMB2)
4594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4595 return 0;
4597 case AND:
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4600 default:
4601 gcc_unreachable ();
4605 /* Return true if I is a valid di mode constant for the operation CODE. */
4607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4609 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4610 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4611 rtx hi = GEN_INT (hi_val);
4612 rtx lo = GEN_INT (lo_val);
4614 if (TARGET_THUMB1)
4615 return 0;
4617 switch (code)
4619 case AND:
4620 case IOR:
4621 case XOR:
4622 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4623 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4624 case PLUS:
4625 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4627 default:
4628 return 0;
4632 /* Emit a sequence of insns to handle a large constant.
4633 CODE is the code of the operation required, it can be any of SET, PLUS,
4634 IOR, AND, XOR, MINUS;
4635 MODE is the mode in which the operation is being performed;
4636 VAL is the integer to operate on;
4637 SOURCE is the other operand (a register, or a null-pointer for SET);
4638 SUBTARGETS means it is safe to create scratch registers if that will
4639 either produce a simpler sequence, or we will want to cse the values.
4640 Return value is the number of insns emitted. */
4642 /* ??? Tweak this for thumb2. */
4644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4645 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4647 rtx cond;
4649 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4650 cond = COND_EXEC_TEST (PATTERN (insn));
4651 else
4652 cond = NULL_RTX;
4654 if (subtargets || code == SET
4655 || (REG_P (target) && REG_P (source)
4656 && REGNO (target) != REGNO (source)))
4658 /* After arm_reorg has been called, we can't fix up expensive
4659 constants by pushing them into memory so we must synthesize
4660 them in-line, regardless of the cost. This is only likely to
4661 be more costly on chips that have load delay slots and we are
4662 compiling without running the scheduler (so no splitting
4663 occurred before the final instruction emission).
4665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4667 if (!cfun->machine->after_arm_reorg
4668 && !cond
4669 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4670 1, 0)
4671 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4672 + (code != SET))))
4674 if (code == SET)
4676 /* Currently SET is the only monadic value for CODE, all
4677 the rest are diadic. */
4678 if (TARGET_USE_MOVT)
4679 arm_emit_movpair (target, GEN_INT (val));
4680 else
4681 emit_set_insn (target, GEN_INT (val));
4683 return 1;
4685 else
4687 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4689 if (TARGET_USE_MOVT)
4690 arm_emit_movpair (temp, GEN_INT (val));
4691 else
4692 emit_set_insn (temp, GEN_INT (val));
4694 /* For MINUS, the value is subtracted from, since we never
4695 have subtraction of a constant. */
4696 if (code == MINUS)
4697 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4698 else
4699 emit_set_insn (target,
4700 gen_rtx_fmt_ee (code, mode, source, temp));
4701 return 2;
4706 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4711 ARM/THUMB2 immediates, and add up to VAL.
4712 Thr function return value gives the number of insns required. */
4713 static int
4714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4715 struct four_ints *return_sequence)
4717 int best_consecutive_zeros = 0;
4718 int i;
4719 int best_start = 0;
4720 int insns1, insns2;
4721 struct four_ints tmp_sequence;
4723 /* If we aren't targeting ARM, the best place to start is always at
4724 the bottom, otherwise look more closely. */
4725 if (TARGET_ARM)
4727 for (i = 0; i < 32; i += 2)
4729 int consecutive_zeros = 0;
4731 if (!(val & (3 << i)))
4733 while ((i < 32) && !(val & (3 << i)))
4735 consecutive_zeros += 2;
4736 i += 2;
4738 if (consecutive_zeros > best_consecutive_zeros)
4740 best_consecutive_zeros = consecutive_zeros;
4741 best_start = i - consecutive_zeros;
4743 i -= 2;
4748 /* So long as it won't require any more insns to do so, it's
4749 desirable to emit a small constant (in bits 0...9) in the last
4750 insn. This way there is more chance that it can be combined with
4751 a later addressing insn to form a pre-indexed load or store
4752 operation. Consider:
4754 *((volatile int *)0xe0000100) = 1;
4755 *((volatile int *)0xe0000110) = 2;
4757 We want this to wind up as:
4759 mov rA, #0xe0000000
4760 mov rB, #1
4761 str rB, [rA, #0x100]
4762 mov rB, #2
4763 str rB, [rA, #0x110]
4765 rather than having to synthesize both large constants from scratch.
4767 Therefore, we calculate how many insns would be required to emit
4768 the constant starting from `best_start', and also starting from
4769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4770 yield a shorter sequence, we may as well use zero. */
4771 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4772 if (best_start != 0
4773 && ((HOST_WIDE_INT_1U << best_start) < val))
4775 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4776 if (insns2 <= insns1)
4778 *return_sequence = tmp_sequence;
4779 insns1 = insns2;
4783 return insns1;
4786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4787 static int
4788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4789 struct four_ints *return_sequence, int i)
4791 int remainder = val & 0xffffffff;
4792 int insns = 0;
4794 /* Try and find a way of doing the job in either two or three
4795 instructions.
4797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4798 location. We start at position I. This may be the MSB, or
4799 optimial_immediate_sequence may have positioned it at the largest block
4800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4801 wrapping around to the top of the word when we drop off the bottom.
4802 In the worst case this code should produce no more than four insns.
4804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4805 constants, shifted to any arbitrary location. We should always start
4806 at the MSB. */
4809 int end;
4810 unsigned int b1, b2, b3, b4;
4811 unsigned HOST_WIDE_INT result;
4812 int loc;
4814 gcc_assert (insns < 4);
4816 if (i <= 0)
4817 i += 32;
4819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4820 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4822 loc = i;
4823 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4824 /* We can use addw/subw for the last 12 bits. */
4825 result = remainder;
4826 else
4828 /* Use an 8-bit shifted/rotated immediate. */
4829 end = i - 8;
4830 if (end < 0)
4831 end += 32;
4832 result = remainder & ((0x0ff << end)
4833 | ((i < end) ? (0xff >> (32 - end))
4834 : 0));
4835 i -= 8;
4838 else
4840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4841 arbitrary shifts. */
4842 i -= TARGET_ARM ? 2 : 1;
4843 continue;
4846 /* Next, see if we can do a better job with a thumb2 replicated
4847 constant.
4849 We do it this way around to catch the cases like 0x01F001E0 where
4850 two 8-bit immediates would work, but a replicated constant would
4851 make it worse.
4853 TODO: 16-bit constants that don't clear all the bits, but still win.
4854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4855 if (TARGET_THUMB2)
4857 b1 = (remainder & 0xff000000) >> 24;
4858 b2 = (remainder & 0x00ff0000) >> 16;
4859 b3 = (remainder & 0x0000ff00) >> 8;
4860 b4 = remainder & 0xff;
4862 if (loc > 24)
4864 /* The 8-bit immediate already found clears b1 (and maybe b2),
4865 but must leave b3 and b4 alone. */
4867 /* First try to find a 32-bit replicated constant that clears
4868 almost everything. We can assume that we can't do it in one,
4869 or else we wouldn't be here. */
4870 unsigned int tmp = b1 & b2 & b3 & b4;
4871 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4872 + (tmp << 24);
4873 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4874 + (tmp == b3) + (tmp == b4);
4875 if (tmp
4876 && (matching_bytes >= 3
4877 || (matching_bytes == 2
4878 && const_ok_for_op (remainder & ~tmp2, code))))
4880 /* At least 3 of the bytes match, and the fourth has at
4881 least as many bits set, or two of the bytes match
4882 and it will only require one more insn to finish. */
4883 result = tmp2;
4884 i = tmp != b1 ? 32
4885 : tmp != b2 ? 24
4886 : tmp != b3 ? 16
4887 : 8;
4890 /* Second, try to find a 16-bit replicated constant that can
4891 leave three of the bytes clear. If b2 or b4 is already
4892 zero, then we can. If the 8-bit from above would not
4893 clear b2 anyway, then we still win. */
4894 else if (b1 == b3 && (!b2 || !b4
4895 || (remainder & 0x00ff0000 & ~result)))
4897 result = remainder & 0xff00ff00;
4898 i = 24;
4901 else if (loc > 16)
4903 /* The 8-bit immediate already found clears b2 (and maybe b3)
4904 and we don't get here unless b1 is alredy clear, but it will
4905 leave b4 unchanged. */
4907 /* If we can clear b2 and b4 at once, then we win, since the
4908 8-bits couldn't possibly reach that far. */
4909 if (b2 == b4)
4911 result = remainder & 0x00ff00ff;
4912 i = 16;
4917 return_sequence->i[insns++] = result;
4918 remainder &= ~result;
4920 if (code == SET || code == MINUS)
4921 code = PLUS;
4923 while (remainder);
4925 return insns;
4928 /* Emit an instruction with the indicated PATTERN. If COND is
4929 non-NULL, conditionalize the execution of the instruction on COND
4930 being true. */
4932 static void
4933 emit_constant_insn (rtx cond, rtx pattern)
4935 if (cond)
4936 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4937 emit_insn (pattern);
4940 /* As above, but extra parameter GENERATE which, if clear, suppresses
4941 RTL generation. */
4943 static int
4944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4945 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4946 int subtargets, int generate)
4948 int can_invert = 0;
4949 int can_negate = 0;
4950 int final_invert = 0;
4951 int i;
4952 int set_sign_bit_copies = 0;
4953 int clear_sign_bit_copies = 0;
4954 int clear_zero_bit_copies = 0;
4955 int set_zero_bit_copies = 0;
4956 int insns = 0, neg_insns, inv_insns;
4957 unsigned HOST_WIDE_INT temp1, temp2;
4958 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4959 struct four_ints *immediates;
4960 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4962 /* Find out which operations are safe for a given CODE. Also do a quick
4963 check for degenerate cases; these can occur when DImode operations
4964 are split. */
4965 switch (code)
4967 case SET:
4968 can_invert = 1;
4969 break;
4971 case PLUS:
4972 can_negate = 1;
4973 break;
4975 case IOR:
4976 if (remainder == 0xffffffff)
4978 if (generate)
4979 emit_constant_insn (cond,
4980 gen_rtx_SET (target,
4981 GEN_INT (ARM_SIGN_EXTEND (val))));
4982 return 1;
4985 if (remainder == 0)
4987 if (reload_completed && rtx_equal_p (target, source))
4988 return 0;
4990 if (generate)
4991 emit_constant_insn (cond, gen_rtx_SET (target, source));
4992 return 1;
4994 break;
4996 case AND:
4997 if (remainder == 0)
4999 if (generate)
5000 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5001 return 1;
5003 if (remainder == 0xffffffff)
5005 if (reload_completed && rtx_equal_p (target, source))
5006 return 0;
5007 if (generate)
5008 emit_constant_insn (cond, gen_rtx_SET (target, source));
5009 return 1;
5011 can_invert = 1;
5012 break;
5014 case XOR:
5015 if (remainder == 0)
5017 if (reload_completed && rtx_equal_p (target, source))
5018 return 0;
5019 if (generate)
5020 emit_constant_insn (cond, gen_rtx_SET (target, source));
5021 return 1;
5024 if (remainder == 0xffffffff)
5026 if (generate)
5027 emit_constant_insn (cond,
5028 gen_rtx_SET (target,
5029 gen_rtx_NOT (mode, source)));
5030 return 1;
5032 final_invert = 1;
5033 break;
5035 case MINUS:
5036 /* We treat MINUS as (val - source), since (source - val) is always
5037 passed as (source + (-val)). */
5038 if (remainder == 0)
5040 if (generate)
5041 emit_constant_insn (cond,
5042 gen_rtx_SET (target,
5043 gen_rtx_NEG (mode, source)));
5044 return 1;
5046 if (const_ok_for_arm (val))
5048 if (generate)
5049 emit_constant_insn (cond,
5050 gen_rtx_SET (target,
5051 gen_rtx_MINUS (mode, GEN_INT (val),
5052 source)));
5053 return 1;
5056 break;
5058 default:
5059 gcc_unreachable ();
5062 /* If we can do it in one insn get out quickly. */
5063 if (const_ok_for_op (val, code))
5065 if (generate)
5066 emit_constant_insn (cond,
5067 gen_rtx_SET (target,
5068 (source
5069 ? gen_rtx_fmt_ee (code, mode, source,
5070 GEN_INT (val))
5071 : GEN_INT (val))));
5072 return 1;
5075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5076 insn. */
5077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5080 if (generate)
5082 if (mode == SImode && i == 16)
5083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5084 smaller insn. */
5085 emit_constant_insn (cond,
5086 gen_zero_extendhisi2
5087 (target, gen_lowpart (HImode, source)));
5088 else
5089 /* Extz only supports SImode, but we can coerce the operands
5090 into that mode. */
5091 emit_constant_insn (cond,
5092 gen_extzv_t2 (gen_lowpart (SImode, target),
5093 gen_lowpart (SImode, source),
5094 GEN_INT (i), const0_rtx));
5097 return 1;
5100 /* Calculate a few attributes that may be useful for specific
5101 optimizations. */
5102 /* Count number of leading zeros. */
5103 for (i = 31; i >= 0; i--)
5105 if ((remainder & (1 << i)) == 0)
5106 clear_sign_bit_copies++;
5107 else
5108 break;
5111 /* Count number of leading 1's. */
5112 for (i = 31; i >= 0; i--)
5114 if ((remainder & (1 << i)) != 0)
5115 set_sign_bit_copies++;
5116 else
5117 break;
5120 /* Count number of trailing zero's. */
5121 for (i = 0; i <= 31; i++)
5123 if ((remainder & (1 << i)) == 0)
5124 clear_zero_bit_copies++;
5125 else
5126 break;
5129 /* Count number of trailing 1's. */
5130 for (i = 0; i <= 31; i++)
5132 if ((remainder & (1 << i)) != 0)
5133 set_zero_bit_copies++;
5134 else
5135 break;
5138 switch (code)
5140 case SET:
5141 /* See if we can do this by sign_extending a constant that is known
5142 to be negative. This is a good, way of doing it, since the shift
5143 may well merge into a subsequent insn. */
5144 if (set_sign_bit_copies > 1)
5146 if (const_ok_for_arm
5147 (temp1 = ARM_SIGN_EXTEND (remainder
5148 << (set_sign_bit_copies - 1))))
5150 if (generate)
5152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5153 emit_constant_insn (cond,
5154 gen_rtx_SET (new_src, GEN_INT (temp1)));
5155 emit_constant_insn (cond,
5156 gen_ashrsi3 (target, new_src,
5157 GEN_INT (set_sign_bit_copies - 1)));
5159 return 2;
5161 /* For an inverted constant, we will need to set the low bits,
5162 these will be shifted out of harm's way. */
5163 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5164 if (const_ok_for_arm (~temp1))
5166 if (generate)
5168 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5169 emit_constant_insn (cond,
5170 gen_rtx_SET (new_src, GEN_INT (temp1)));
5171 emit_constant_insn (cond,
5172 gen_ashrsi3 (target, new_src,
5173 GEN_INT (set_sign_bit_copies - 1)));
5175 return 2;
5179 /* See if we can calculate the value as the difference between two
5180 valid immediates. */
5181 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5183 int topshift = clear_sign_bit_copies & ~1;
5185 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5186 & (0xff000000 >> topshift));
5188 /* If temp1 is zero, then that means the 9 most significant
5189 bits of remainder were 1 and we've caused it to overflow.
5190 When topshift is 0 we don't need to do anything since we
5191 can borrow from 'bit 32'. */
5192 if (temp1 == 0 && topshift != 0)
5193 temp1 = 0x80000000 >> (topshift - 1);
5195 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5197 if (const_ok_for_arm (temp2))
5199 if (generate)
5201 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5202 emit_constant_insn (cond,
5203 gen_rtx_SET (new_src, GEN_INT (temp1)));
5204 emit_constant_insn (cond,
5205 gen_addsi3 (target, new_src,
5206 GEN_INT (-temp2)));
5209 return 2;
5213 /* See if we can generate this by setting the bottom (or the top)
5214 16 bits, and then shifting these into the other half of the
5215 word. We only look for the simplest cases, to do more would cost
5216 too much. Be careful, however, not to generate this when the
5217 alternative would take fewer insns. */
5218 if (val & 0xffff0000)
5220 temp1 = remainder & 0xffff0000;
5221 temp2 = remainder & 0x0000ffff;
5223 /* Overlaps outside this range are best done using other methods. */
5224 for (i = 9; i < 24; i++)
5226 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5227 && !const_ok_for_arm (temp2))
5229 rtx new_src = (subtargets
5230 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5231 : target);
5232 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5233 source, subtargets, generate);
5234 source = new_src;
5235 if (generate)
5236 emit_constant_insn
5237 (cond,
5238 gen_rtx_SET
5239 (target,
5240 gen_rtx_IOR (mode,
5241 gen_rtx_ASHIFT (mode, source,
5242 GEN_INT (i)),
5243 source)));
5244 return insns + 1;
5248 /* Don't duplicate cases already considered. */
5249 for (i = 17; i < 24; i++)
5251 if (((temp1 | (temp1 >> i)) == remainder)
5252 && !const_ok_for_arm (temp1))
5254 rtx new_src = (subtargets
5255 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5256 : target);
5257 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5258 source, subtargets, generate);
5259 source = new_src;
5260 if (generate)
5261 emit_constant_insn
5262 (cond,
5263 gen_rtx_SET (target,
5264 gen_rtx_IOR
5265 (mode,
5266 gen_rtx_LSHIFTRT (mode, source,
5267 GEN_INT (i)),
5268 source)));
5269 return insns + 1;
5273 break;
5275 case IOR:
5276 case XOR:
5277 /* If we have IOR or XOR, and the constant can be loaded in a
5278 single instruction, and we can find a temporary to put it in,
5279 then this can be done in two instructions instead of 3-4. */
5280 if (subtargets
5281 /* TARGET can't be NULL if SUBTARGETS is 0 */
5282 || (reload_completed && !reg_mentioned_p (target, source)))
5284 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5286 if (generate)
5288 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5290 emit_constant_insn (cond,
5291 gen_rtx_SET (sub, GEN_INT (val)));
5292 emit_constant_insn (cond,
5293 gen_rtx_SET (target,
5294 gen_rtx_fmt_ee (code, mode,
5295 source, sub)));
5297 return 2;
5301 if (code == XOR)
5302 break;
5304 /* Convert.
5305 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5306 and the remainder 0s for e.g. 0xfff00000)
5307 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5309 This can be done in 2 instructions by using shifts with mov or mvn.
5310 e.g. for
5311 x = x | 0xfff00000;
5312 we generate.
5313 mvn r0, r0, asl #12
5314 mvn r0, r0, lsr #12 */
5315 if (set_sign_bit_copies > 8
5316 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5318 if (generate)
5320 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5321 rtx shift = GEN_INT (set_sign_bit_copies);
5323 emit_constant_insn
5324 (cond,
5325 gen_rtx_SET (sub,
5326 gen_rtx_NOT (mode,
5327 gen_rtx_ASHIFT (mode,
5328 source,
5329 shift))));
5330 emit_constant_insn
5331 (cond,
5332 gen_rtx_SET (target,
5333 gen_rtx_NOT (mode,
5334 gen_rtx_LSHIFTRT (mode, sub,
5335 shift))));
5337 return 2;
5340 /* Convert
5341 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5343 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5345 For eg. r0 = r0 | 0xfff
5346 mvn r0, r0, lsr #12
5347 mvn r0, r0, asl #12
5350 if (set_zero_bit_copies > 8
5351 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5353 if (generate)
5355 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5356 rtx shift = GEN_INT (set_zero_bit_copies);
5358 emit_constant_insn
5359 (cond,
5360 gen_rtx_SET (sub,
5361 gen_rtx_NOT (mode,
5362 gen_rtx_LSHIFTRT (mode,
5363 source,
5364 shift))));
5365 emit_constant_insn
5366 (cond,
5367 gen_rtx_SET (target,
5368 gen_rtx_NOT (mode,
5369 gen_rtx_ASHIFT (mode, sub,
5370 shift))));
5372 return 2;
5375 /* This will never be reached for Thumb2 because orn is a valid
5376 instruction. This is for Thumb1 and the ARM 32 bit cases.
5378 x = y | constant (such that ~constant is a valid constant)
5379 Transform this to
5380 x = ~(~y & ~constant).
5382 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5384 if (generate)
5386 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5387 emit_constant_insn (cond,
5388 gen_rtx_SET (sub,
5389 gen_rtx_NOT (mode, source)));
5390 source = sub;
5391 if (subtargets)
5392 sub = gen_reg_rtx (mode);
5393 emit_constant_insn (cond,
5394 gen_rtx_SET (sub,
5395 gen_rtx_AND (mode, source,
5396 GEN_INT (temp1))));
5397 emit_constant_insn (cond,
5398 gen_rtx_SET (target,
5399 gen_rtx_NOT (mode, sub)));
5401 return 3;
5403 break;
5405 case AND:
5406 /* See if two shifts will do 2 or more insn's worth of work. */
5407 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5409 HOST_WIDE_INT shift_mask = ((0xffffffff
5410 << (32 - clear_sign_bit_copies))
5411 & 0xffffffff);
5413 if ((remainder | shift_mask) != 0xffffffff)
5415 HOST_WIDE_INT new_val
5416 = ARM_SIGN_EXTEND (remainder | shift_mask);
5418 if (generate)
5420 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5421 insns = arm_gen_constant (AND, SImode, cond, new_val,
5422 new_src, source, subtargets, 1);
5423 source = new_src;
5425 else
5427 rtx targ = subtargets ? NULL_RTX : target;
5428 insns = arm_gen_constant (AND, mode, cond, new_val,
5429 targ, source, subtargets, 0);
5433 if (generate)
5435 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5436 rtx shift = GEN_INT (clear_sign_bit_copies);
5438 emit_insn (gen_ashlsi3 (new_src, source, shift));
5439 emit_insn (gen_lshrsi3 (target, new_src, shift));
5442 return insns + 2;
5445 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5447 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5449 if ((remainder | shift_mask) != 0xffffffff)
5451 HOST_WIDE_INT new_val
5452 = ARM_SIGN_EXTEND (remainder | shift_mask);
5453 if (generate)
5455 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5457 insns = arm_gen_constant (AND, mode, cond, new_val,
5458 new_src, source, subtargets, 1);
5459 source = new_src;
5461 else
5463 rtx targ = subtargets ? NULL_RTX : target;
5465 insns = arm_gen_constant (AND, mode, cond, new_val,
5466 targ, source, subtargets, 0);
5470 if (generate)
5472 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5473 rtx shift = GEN_INT (clear_zero_bit_copies);
5475 emit_insn (gen_lshrsi3 (new_src, source, shift));
5476 emit_insn (gen_ashlsi3 (target, new_src, shift));
5479 return insns + 2;
5482 break;
5484 default:
5485 break;
5488 /* Calculate what the instruction sequences would be if we generated it
5489 normally, negated, or inverted. */
5490 if (code == AND)
5491 /* AND cannot be split into multiple insns, so invert and use BIC. */
5492 insns = 99;
5493 else
5494 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5496 if (can_negate)
5497 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5498 &neg_immediates);
5499 else
5500 neg_insns = 99;
5502 if (can_invert || final_invert)
5503 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5504 &inv_immediates);
5505 else
5506 inv_insns = 99;
5508 immediates = &pos_immediates;
5510 /* Is the negated immediate sequence more efficient? */
5511 if (neg_insns < insns && neg_insns <= inv_insns)
5513 insns = neg_insns;
5514 immediates = &neg_immediates;
5516 else
5517 can_negate = 0;
5519 /* Is the inverted immediate sequence more efficient?
5520 We must allow for an extra NOT instruction for XOR operations, although
5521 there is some chance that the final 'mvn' will get optimized later. */
5522 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5524 insns = inv_insns;
5525 immediates = &inv_immediates;
5527 else
5529 can_invert = 0;
5530 final_invert = 0;
5533 /* Now output the chosen sequence as instructions. */
5534 if (generate)
5536 for (i = 0; i < insns; i++)
5538 rtx new_src, temp1_rtx;
5540 temp1 = immediates->i[i];
5542 if (code == SET || code == MINUS)
5543 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5544 else if ((final_invert || i < (insns - 1)) && subtargets)
5545 new_src = gen_reg_rtx (mode);
5546 else
5547 new_src = target;
5549 if (can_invert)
5550 temp1 = ~temp1;
5551 else if (can_negate)
5552 temp1 = -temp1;
5554 temp1 = trunc_int_for_mode (temp1, mode);
5555 temp1_rtx = GEN_INT (temp1);
5557 if (code == SET)
5559 else if (code == MINUS)
5560 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5561 else
5562 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5564 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5565 source = new_src;
5567 if (code == SET)
5569 can_negate = can_invert;
5570 can_invert = 0;
5571 code = PLUS;
5573 else if (code == MINUS)
5574 code = PLUS;
5578 if (final_invert)
5580 if (generate)
5581 emit_constant_insn (cond, gen_rtx_SET (target,
5582 gen_rtx_NOT (mode, source)));
5583 insns++;
5586 return insns;
5589 /* Return TRUE if op is a constant where both the low and top words are
5590 suitable for RSB/RSC instructions. This is never true for Thumb, since
5591 we do not have RSC in that case. */
5592 static bool
5593 arm_const_double_prefer_rsbs_rsc (rtx op)
5595 /* Thumb lacks RSC, so we never prefer that sequence. */
5596 if (TARGET_THUMB || !CONST_INT_P (op))
5597 return false;
5598 HOST_WIDE_INT hi, lo;
5599 lo = UINTVAL (op) & 0xffffffffULL;
5600 hi = UINTVAL (op) >> 32;
5601 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5604 /* Canonicalize a comparison so that we are more likely to recognize it.
5605 This can be done for a few constant compares, where we can make the
5606 immediate value easier to load. */
5608 static void
5609 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5610 bool op0_preserve_value)
5612 machine_mode mode;
5613 unsigned HOST_WIDE_INT i, maxval;
5615 mode = GET_MODE (*op0);
5616 if (mode == VOIDmode)
5617 mode = GET_MODE (*op1);
5619 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5621 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5622 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5623 either reversed or (for constant OP1) adjusted to GE/LT.
5624 Similarly for GTU/LEU in Thumb mode. */
5625 if (mode == DImode)
5628 if (*code == GT || *code == LE
5629 || *code == GTU || *code == LEU)
5631 /* Missing comparison. First try to use an available
5632 comparison. */
5633 if (CONST_INT_P (*op1))
5635 i = INTVAL (*op1);
5636 switch (*code)
5638 case GT:
5639 case LE:
5640 if (i != maxval)
5642 /* Try to convert to GE/LT, unless that would be more
5643 expensive. */
5644 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5645 && arm_const_double_prefer_rsbs_rsc (*op1))
5646 return;
5647 *op1 = GEN_INT (i + 1);
5648 *code = *code == GT ? GE : LT;
5650 else
5652 /* GT maxval is always false, LE maxval is always true.
5653 We can't fold that away here as we must make a
5654 comparison, but we can fold them to comparisons
5655 with the same result that can be handled:
5656 op0 GT maxval -> op0 LT minval
5657 op0 LE maxval -> op0 GE minval
5658 where minval = (-maxval - 1). */
5659 *op1 = GEN_INT (-maxval - 1);
5660 *code = *code == GT ? LT : GE;
5662 return;
5664 case GTU:
5665 case LEU:
5666 if (i != ~((unsigned HOST_WIDE_INT) 0))
5668 /* Try to convert to GEU/LTU, unless that would
5669 be more expensive. */
5670 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5671 && arm_const_double_prefer_rsbs_rsc (*op1))
5672 return;
5673 *op1 = GEN_INT (i + 1);
5674 *code = *code == GTU ? GEU : LTU;
5676 else
5678 /* GTU ~0 is always false, LEU ~0 is always true.
5679 We can't fold that away here as we must make a
5680 comparison, but we can fold them to comparisons
5681 with the same result that can be handled:
5682 op0 GTU ~0 -> op0 LTU 0
5683 op0 LEU ~0 -> op0 GEU 0. */
5684 *op1 = const0_rtx;
5685 *code = *code == GTU ? LTU : GEU;
5687 return;
5689 default:
5690 gcc_unreachable ();
5694 if (!op0_preserve_value)
5696 std::swap (*op0, *op1);
5697 *code = (int)swap_condition ((enum rtx_code)*code);
5700 return;
5703 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5704 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5705 to facilitate possible combining with a cmp into 'ands'. */
5706 if (mode == SImode
5707 && GET_CODE (*op0) == ZERO_EXTEND
5708 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5709 && GET_MODE (XEXP (*op0, 0)) == QImode
5710 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5711 && subreg_lowpart_p (XEXP (*op0, 0))
5712 && *op1 == const0_rtx)
5713 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5714 GEN_INT (255));
5716 /* Comparisons smaller than DImode. Only adjust comparisons against
5717 an out-of-range constant. */
5718 if (!CONST_INT_P (*op1)
5719 || const_ok_for_arm (INTVAL (*op1))
5720 || const_ok_for_arm (- INTVAL (*op1)))
5721 return;
5723 i = INTVAL (*op1);
5725 switch (*code)
5727 case EQ:
5728 case NE:
5729 return;
5731 case GT:
5732 case LE:
5733 if (i != maxval
5734 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5736 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5737 *code = *code == GT ? GE : LT;
5738 return;
5740 break;
5742 case GE:
5743 case LT:
5744 if (i != ~maxval
5745 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5747 *op1 = GEN_INT (i - 1);
5748 *code = *code == GE ? GT : LE;
5749 return;
5751 break;
5753 case GTU:
5754 case LEU:
5755 if (i != ~((unsigned HOST_WIDE_INT) 0)
5756 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5758 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5759 *code = *code == GTU ? GEU : LTU;
5760 return;
5762 break;
5764 case GEU:
5765 case LTU:
5766 if (i != 0
5767 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5769 *op1 = GEN_INT (i - 1);
5770 *code = *code == GEU ? GTU : LEU;
5771 return;
5773 break;
5775 default:
5776 gcc_unreachable ();
5781 /* Define how to find the value returned by a function. */
5783 static rtx
5784 arm_function_value(const_tree type, const_tree func,
5785 bool outgoing ATTRIBUTE_UNUSED)
5787 machine_mode mode;
5788 int unsignedp ATTRIBUTE_UNUSED;
5789 rtx r ATTRIBUTE_UNUSED;
5791 mode = TYPE_MODE (type);
5793 if (TARGET_AAPCS_BASED)
5794 return aapcs_allocate_return_reg (mode, type, func);
5796 /* Promote integer types. */
5797 if (INTEGRAL_TYPE_P (type))
5798 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5800 /* Promotes small structs returned in a register to full-word size
5801 for big-endian AAPCS. */
5802 if (arm_return_in_msb (type))
5804 HOST_WIDE_INT size = int_size_in_bytes (type);
5805 if (size % UNITS_PER_WORD != 0)
5807 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5808 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5812 return arm_libcall_value_1 (mode);
5815 /* libcall hashtable helpers. */
5817 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5819 static inline hashval_t hash (const rtx_def *);
5820 static inline bool equal (const rtx_def *, const rtx_def *);
5821 static inline void remove (rtx_def *);
5824 inline bool
5825 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5827 return rtx_equal_p (p1, p2);
5830 inline hashval_t
5831 libcall_hasher::hash (const rtx_def *p1)
5833 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5836 typedef hash_table<libcall_hasher> libcall_table_type;
5838 static void
5839 add_libcall (libcall_table_type *htab, rtx libcall)
5841 *htab->find_slot (libcall, INSERT) = libcall;
5844 static bool
5845 arm_libcall_uses_aapcs_base (const_rtx libcall)
5847 static bool init_done = false;
5848 static libcall_table_type *libcall_htab = NULL;
5850 if (!init_done)
5852 init_done = true;
5854 libcall_htab = new libcall_table_type (31);
5855 add_libcall (libcall_htab,
5856 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5857 add_libcall (libcall_htab,
5858 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5859 add_libcall (libcall_htab,
5860 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5861 add_libcall (libcall_htab,
5862 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5864 add_libcall (libcall_htab,
5865 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5866 add_libcall (libcall_htab,
5867 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5868 add_libcall (libcall_htab,
5869 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5870 add_libcall (libcall_htab,
5871 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5873 add_libcall (libcall_htab,
5874 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5875 add_libcall (libcall_htab,
5876 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5877 add_libcall (libcall_htab,
5878 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5879 add_libcall (libcall_htab,
5880 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5881 add_libcall (libcall_htab,
5882 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5883 add_libcall (libcall_htab,
5884 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5885 add_libcall (libcall_htab,
5886 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5887 add_libcall (libcall_htab,
5888 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5889 add_libcall (libcall_htab,
5890 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5891 add_libcall (libcall_htab,
5892 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5894 /* Values from double-precision helper functions are returned in core
5895 registers if the selected core only supports single-precision
5896 arithmetic, even if we are using the hard-float ABI. The same is
5897 true for single-precision helpers except in case of MVE, because in
5898 MVE we will be using the hard-float ABI on a CPU which doesn't support
5899 single-precision operations in hardware. In MVE the following check
5900 enables use of emulation for the single-precision arithmetic
5901 operations. */
5902 if (TARGET_HAVE_MVE)
5904 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5905 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5906 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5907 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5908 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5909 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5910 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5911 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5912 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5913 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5914 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5916 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5917 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5918 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5919 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5920 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5921 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5922 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5923 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5924 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5925 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5926 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5927 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5928 SFmode));
5929 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5930 DFmode));
5931 add_libcall (libcall_htab,
5932 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5935 return libcall && libcall_htab->find (libcall) != NULL;
5938 static rtx
5939 arm_libcall_value_1 (machine_mode mode)
5941 if (TARGET_AAPCS_BASED)
5942 return aapcs_libcall_value (mode);
5943 else if (TARGET_IWMMXT_ABI
5944 && arm_vector_mode_supported_p (mode))
5945 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5946 else
5947 return gen_rtx_REG (mode, ARG_REGISTER (1));
5950 /* Define how to find the value returned by a library function
5951 assuming the value has mode MODE. */
5953 static rtx
5954 arm_libcall_value (machine_mode mode, const_rtx libcall)
5956 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5957 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5959 /* The following libcalls return their result in integer registers,
5960 even though they return a floating point value. */
5961 if (arm_libcall_uses_aapcs_base (libcall))
5962 return gen_rtx_REG (mode, ARG_REGISTER(1));
5966 return arm_libcall_value_1 (mode);
5969 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5971 static bool
5972 arm_function_value_regno_p (const unsigned int regno)
5974 if (regno == ARG_REGISTER (1)
5975 || (TARGET_32BIT
5976 && TARGET_AAPCS_BASED
5977 && TARGET_HARD_FLOAT
5978 && regno == FIRST_VFP_REGNUM)
5979 || (TARGET_IWMMXT_ABI
5980 && regno == FIRST_IWMMXT_REGNUM))
5981 return true;
5983 return false;
5986 /* Determine the amount of memory needed to store the possible return
5987 registers of an untyped call. */
5989 arm_apply_result_size (void)
5991 int size = 16;
5993 if (TARGET_32BIT)
5995 if (TARGET_HARD_FLOAT_ABI)
5996 size += 32;
5997 if (TARGET_IWMMXT_ABI)
5998 size += 8;
6001 return size;
6004 /* Decide whether TYPE should be returned in memory (true)
6005 or in a register (false). FNTYPE is the type of the function making
6006 the call. */
6007 static bool
6008 arm_return_in_memory (const_tree type, const_tree fntype)
6010 HOST_WIDE_INT size;
6012 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6014 if (TARGET_AAPCS_BASED)
6016 /* Simple, non-aggregate types (ie not including vectors and
6017 complex) are always returned in a register (or registers).
6018 We don't care about which register here, so we can short-cut
6019 some of the detail. */
6020 if (!AGGREGATE_TYPE_P (type)
6021 && TREE_CODE (type) != VECTOR_TYPE
6022 && TREE_CODE (type) != COMPLEX_TYPE)
6023 return false;
6025 /* Any return value that is no larger than one word can be
6026 returned in r0. */
6027 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6028 return false;
6030 /* Check any available co-processors to see if they accept the
6031 type as a register candidate (VFP, for example, can return
6032 some aggregates in consecutive registers). These aren't
6033 available if the call is variadic. */
6034 if (aapcs_select_return_coproc (type, fntype) >= 0)
6035 return false;
6037 /* Vector values should be returned using ARM registers, not
6038 memory (unless they're over 16 bytes, which will break since
6039 we only have four call-clobbered registers to play with). */
6040 if (TREE_CODE (type) == VECTOR_TYPE)
6041 return (size < 0 || size > (4 * UNITS_PER_WORD));
6043 /* The rest go in memory. */
6044 return true;
6047 if (TREE_CODE (type) == VECTOR_TYPE)
6048 return (size < 0 || size > (4 * UNITS_PER_WORD));
6050 if (!AGGREGATE_TYPE_P (type) &&
6051 (TREE_CODE (type) != VECTOR_TYPE))
6052 /* All simple types are returned in registers. */
6053 return false;
6055 if (arm_abi != ARM_ABI_APCS)
6057 /* ATPCS and later return aggregate types in memory only if they are
6058 larger than a word (or are variable size). */
6059 return (size < 0 || size > UNITS_PER_WORD);
6062 /* For the arm-wince targets we choose to be compatible with Microsoft's
6063 ARM and Thumb compilers, which always return aggregates in memory. */
6064 #ifndef ARM_WINCE
6065 /* All structures/unions bigger than one word are returned in memory.
6066 Also catch the case where int_size_in_bytes returns -1. In this case
6067 the aggregate is either huge or of variable size, and in either case
6068 we will want to return it via memory and not in a register. */
6069 if (size < 0 || size > UNITS_PER_WORD)
6070 return true;
6072 if (TREE_CODE (type) == RECORD_TYPE)
6074 tree field;
6076 /* For a struct the APCS says that we only return in a register
6077 if the type is 'integer like' and every addressable element
6078 has an offset of zero. For practical purposes this means
6079 that the structure can have at most one non bit-field element
6080 and that this element must be the first one in the structure. */
6082 /* Find the first field, ignoring non FIELD_DECL things which will
6083 have been created by C++. */
6084 /* NOTE: This code is deprecated and has not been updated to handle
6085 DECL_FIELD_ABI_IGNORED. */
6086 for (field = TYPE_FIELDS (type);
6087 field && TREE_CODE (field) != FIELD_DECL;
6088 field = DECL_CHAIN (field))
6089 continue;
6091 if (field == NULL)
6092 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6094 /* Check that the first field is valid for returning in a register. */
6096 /* ... Floats are not allowed */
6097 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6098 return true;
6100 /* ... Aggregates that are not themselves valid for returning in
6101 a register are not allowed. */
6102 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6103 return true;
6105 /* Now check the remaining fields, if any. Only bitfields are allowed,
6106 since they are not addressable. */
6107 for (field = DECL_CHAIN (field);
6108 field;
6109 field = DECL_CHAIN (field))
6111 if (TREE_CODE (field) != FIELD_DECL)
6112 continue;
6114 if (!DECL_BIT_FIELD_TYPE (field))
6115 return true;
6118 return false;
6121 if (TREE_CODE (type) == UNION_TYPE)
6123 tree field;
6125 /* Unions can be returned in registers if every element is
6126 integral, or can be returned in an integer register. */
6127 for (field = TYPE_FIELDS (type);
6128 field;
6129 field = DECL_CHAIN (field))
6131 if (TREE_CODE (field) != FIELD_DECL)
6132 continue;
6134 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6135 return true;
6137 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6138 return true;
6141 return false;
6143 #endif /* not ARM_WINCE */
6145 /* Return all other types in memory. */
6146 return true;
6149 const struct pcs_attribute_arg
6151 const char *arg;
6152 enum arm_pcs value;
6153 } pcs_attribute_args[] =
6155 {"aapcs", ARM_PCS_AAPCS},
6156 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6157 #if 0
6158 /* We could recognize these, but changes would be needed elsewhere
6159 * to implement them. */
6160 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6161 {"atpcs", ARM_PCS_ATPCS},
6162 {"apcs", ARM_PCS_APCS},
6163 #endif
6164 {NULL, ARM_PCS_UNKNOWN}
6167 static enum arm_pcs
6168 arm_pcs_from_attribute (tree attr)
6170 const struct pcs_attribute_arg *ptr;
6171 const char *arg;
6173 /* Get the value of the argument. */
6174 if (TREE_VALUE (attr) == NULL_TREE
6175 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6176 return ARM_PCS_UNKNOWN;
6178 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6180 /* Check it against the list of known arguments. */
6181 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6182 if (streq (arg, ptr->arg))
6183 return ptr->value;
6185 /* An unrecognized interrupt type. */
6186 return ARM_PCS_UNKNOWN;
6189 /* Get the PCS variant to use for this call. TYPE is the function's type
6190 specification, DECL is the specific declartion. DECL may be null if
6191 the call could be indirect or if this is a library call. */
6192 static enum arm_pcs
6193 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6195 bool user_convention = false;
6196 enum arm_pcs user_pcs = arm_pcs_default;
6197 tree attr;
6199 gcc_assert (type);
6201 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6202 if (attr)
6204 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6205 user_convention = true;
6208 if (TARGET_AAPCS_BASED)
6210 /* Detect varargs functions. These always use the base rules
6211 (no argument is ever a candidate for a co-processor
6212 register). */
6213 bool base_rules = stdarg_p (type);
6215 if (user_convention)
6217 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6218 sorry ("non-AAPCS derived PCS variant");
6219 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6220 error ("variadic functions must use the base AAPCS variant");
6223 if (base_rules)
6224 return ARM_PCS_AAPCS;
6225 else if (user_convention)
6226 return user_pcs;
6227 #if 0
6228 /* Unfortunately, this is not safe and can lead to wrong code
6229 being generated (PR96882). Not all calls into the back-end
6230 pass the DECL, so it is unsafe to make any PCS-changing
6231 decisions based on it. In particular the RETURN_IN_MEMORY
6232 hook is only ever passed a TYPE. This needs revisiting to
6233 see if there are any partial improvements that can be
6234 re-enabled. */
6235 else if (decl && flag_unit_at_a_time)
6237 /* Local functions never leak outside this compilation unit,
6238 so we are free to use whatever conventions are
6239 appropriate. */
6240 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6241 cgraph_node *local_info_node
6242 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6243 if (local_info_node && local_info_node->local)
6244 return ARM_PCS_AAPCS_LOCAL;
6246 #endif
6248 else if (user_convention && user_pcs != arm_pcs_default)
6249 sorry ("PCS variant");
6251 /* For everything else we use the target's default. */
6252 return arm_pcs_default;
6256 static void
6257 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6258 const_tree fntype ATTRIBUTE_UNUSED,
6259 rtx libcall ATTRIBUTE_UNUSED,
6260 const_tree fndecl ATTRIBUTE_UNUSED)
6262 /* Record the unallocated VFP registers. */
6263 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6264 pcum->aapcs_vfp_reg_alloc = 0;
6267 /* Bitmasks that indicate whether earlier versions of GCC would have
6268 taken a different path through the ABI logic. This should result in
6269 a -Wpsabi warning if the earlier path led to a different ABI decision.
6271 WARN_PSABI_EMPTY_CXX17_BASE
6272 Indicates that the type includes an artificial empty C++17 base field
6273 that, prior to GCC 10.1, would prevent the type from being treated as
6274 a HFA or HVA. See PR94711 for details.
6276 WARN_PSABI_NO_UNIQUE_ADDRESS
6277 Indicates that the type includes an empty [[no_unique_address]] field
6278 that, prior to GCC 10.1, would prevent the type from being treated as
6279 a HFA or HVA. */
6280 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6281 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6282 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6284 /* Walk down the type tree of TYPE counting consecutive base elements.
6285 If *MODEP is VOIDmode, then set it to the first valid floating point
6286 type. If a non-floating point type is found, or if a floating point
6287 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6288 otherwise return the count in the sub-tree.
6290 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6291 function has changed its behavior relative to earlier versions of GCC.
6292 Normally the argument should be nonnull and point to a zero-initialized
6293 variable. The function then records whether the ABI decision might
6294 be affected by a known fix to the ABI logic, setting the associated
6295 WARN_PSABI_* bits if so.
6297 When the argument is instead a null pointer, the function tries to
6298 simulate the behavior of GCC before all such ABI fixes were made.
6299 This is useful to check whether the function returns something
6300 different after the ABI fixes. */
6301 static int
6302 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6303 unsigned int *warn_psabi_flags)
6305 machine_mode mode;
6306 HOST_WIDE_INT size;
6308 switch (TREE_CODE (type))
6310 case REAL_TYPE:
6311 mode = TYPE_MODE (type);
6312 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6313 return -1;
6315 if (*modep == VOIDmode)
6316 *modep = mode;
6318 if (*modep == mode)
6319 return 1;
6321 break;
6323 case COMPLEX_TYPE:
6324 mode = TYPE_MODE (TREE_TYPE (type));
6325 if (mode != DFmode && mode != SFmode)
6326 return -1;
6328 if (*modep == VOIDmode)
6329 *modep = mode;
6331 if (*modep == mode)
6332 return 2;
6334 break;
6336 case VECTOR_TYPE:
6337 /* Use V2SImode and V4SImode as representatives of all 64-bit
6338 and 128-bit vector types, whether or not those modes are
6339 supported with the present options. */
6340 size = int_size_in_bytes (type);
6341 switch (size)
6343 case 8:
6344 mode = V2SImode;
6345 break;
6346 case 16:
6347 mode = V4SImode;
6348 break;
6349 default:
6350 return -1;
6353 if (*modep == VOIDmode)
6354 *modep = mode;
6356 /* Vector modes are considered to be opaque: two vectors are
6357 equivalent for the purposes of being homogeneous aggregates
6358 if they are the same size. */
6359 if (*modep == mode)
6360 return 1;
6362 break;
6364 case ARRAY_TYPE:
6366 int count;
6367 tree index = TYPE_DOMAIN (type);
6369 /* Can't handle incomplete types nor sizes that are not
6370 fixed. */
6371 if (!COMPLETE_TYPE_P (type)
6372 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6373 return -1;
6375 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6376 warn_psabi_flags);
6377 if (count == -1
6378 || !index
6379 || !TYPE_MAX_VALUE (index)
6380 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6381 || !TYPE_MIN_VALUE (index)
6382 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6383 || count < 0)
6384 return -1;
6386 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6387 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6389 /* There must be no padding. */
6390 if (wi::to_wide (TYPE_SIZE (type))
6391 != count * GET_MODE_BITSIZE (*modep))
6392 return -1;
6394 return count;
6397 case RECORD_TYPE:
6399 int count = 0;
6400 int sub_count;
6401 tree field;
6403 /* Can't handle incomplete types nor sizes that are not
6404 fixed. */
6405 if (!COMPLETE_TYPE_P (type)
6406 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6407 return -1;
6409 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6411 if (TREE_CODE (field) != FIELD_DECL)
6412 continue;
6414 if (DECL_FIELD_ABI_IGNORED (field))
6416 /* See whether this is something that earlier versions of
6417 GCC failed to ignore. */
6418 unsigned int flag;
6419 if (lookup_attribute ("no_unique_address",
6420 DECL_ATTRIBUTES (field)))
6421 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6422 else if (cxx17_empty_base_field_p (field))
6423 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6424 else
6425 /* No compatibility problem. */
6426 continue;
6428 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6429 if (warn_psabi_flags)
6431 *warn_psabi_flags |= flag;
6432 continue;
6435 /* A zero-width bitfield may affect layout in some
6436 circumstances, but adds no members. The determination
6437 of whether or not a type is an HFA is performed after
6438 layout is complete, so if the type still looks like an
6439 HFA afterwards, it is still classed as one. This is
6440 potentially an ABI break for the hard-float ABI. */
6441 else if (DECL_BIT_FIELD (field)
6442 && integer_zerop (DECL_SIZE (field)))
6444 /* Prior to GCC-12 these fields were striped early,
6445 hiding them from the back-end entirely and
6446 resulting in the correct behaviour for argument
6447 passing. Simulate that old behaviour without
6448 generating a warning. */
6449 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6450 continue;
6451 if (warn_psabi_flags)
6453 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6454 continue;
6458 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6459 warn_psabi_flags);
6460 if (sub_count < 0)
6461 return -1;
6462 count += sub_count;
6465 /* There must be no padding. */
6466 if (wi::to_wide (TYPE_SIZE (type))
6467 != count * GET_MODE_BITSIZE (*modep))
6468 return -1;
6470 return count;
6473 case UNION_TYPE:
6474 case QUAL_UNION_TYPE:
6476 /* These aren't very interesting except in a degenerate case. */
6477 int count = 0;
6478 int sub_count;
6479 tree field;
6481 /* Can't handle incomplete types nor sizes that are not
6482 fixed. */
6483 if (!COMPLETE_TYPE_P (type)
6484 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6485 return -1;
6487 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6489 if (TREE_CODE (field) != FIELD_DECL)
6490 continue;
6492 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6493 warn_psabi_flags);
6494 if (sub_count < 0)
6495 return -1;
6496 count = count > sub_count ? count : sub_count;
6499 /* There must be no padding. */
6500 if (wi::to_wide (TYPE_SIZE (type))
6501 != count * GET_MODE_BITSIZE (*modep))
6502 return -1;
6504 return count;
6507 default:
6508 break;
6511 return -1;
6514 /* Return true if PCS_VARIANT should use VFP registers. */
6515 static bool
6516 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6518 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6520 static bool seen_thumb1_vfp = false;
6522 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6524 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6525 /* sorry() is not immediately fatal, so only display this once. */
6526 seen_thumb1_vfp = true;
6529 return true;
6532 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6533 return false;
6535 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6536 (TARGET_VFP_DOUBLE || !is_double));
6539 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6540 suitable for passing or returning in VFP registers for the PCS
6541 variant selected. If it is, then *BASE_MODE is updated to contain
6542 a machine mode describing each element of the argument's type and
6543 *COUNT to hold the number of such elements. */
6544 static bool
6545 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6546 machine_mode mode, const_tree type,
6547 machine_mode *base_mode, int *count)
6549 machine_mode new_mode = VOIDmode;
6551 /* If we have the type information, prefer that to working things
6552 out from the mode. */
6553 if (type)
6555 unsigned int warn_psabi_flags = 0;
6556 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6557 &warn_psabi_flags);
6558 if (ag_count > 0 && ag_count <= 4)
6560 static unsigned last_reported_type_uid;
6561 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6562 int alt;
6563 if (warn_psabi
6564 && warn_psabi_flags
6565 && uid != last_reported_type_uid
6566 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6567 != ag_count))
6569 const char *url10
6570 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6571 const char *url12
6572 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6573 gcc_assert (alt == -1);
6574 last_reported_type_uid = uid;
6575 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6576 qualification. */
6577 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6578 inform (input_location, "parameter passing for argument of "
6579 "type %qT with %<[[no_unique_address]]%> members "
6580 "changed %{in GCC 10.1%}",
6581 TYPE_MAIN_VARIANT (type), url10);
6582 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6583 inform (input_location, "parameter passing for argument of "
6584 "type %qT when C++17 is enabled changed to match "
6585 "C++14 %{in GCC 10.1%}",
6586 TYPE_MAIN_VARIANT (type), url10);
6587 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6588 inform (input_location, "parameter passing for argument of "
6589 "type %qT changed %{in GCC 12.1%}",
6590 TYPE_MAIN_VARIANT (type), url12);
6592 *count = ag_count;
6594 else
6595 return false;
6597 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6598 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6599 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6601 *count = 1;
6602 new_mode = mode;
6604 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6606 *count = 2;
6607 new_mode = (mode == DCmode ? DFmode : SFmode);
6609 else
6610 return false;
6613 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6614 return false;
6616 *base_mode = new_mode;
6618 if (TARGET_GENERAL_REGS_ONLY)
6619 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6620 type);
6622 return true;
6625 static bool
6626 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6627 machine_mode mode, const_tree type)
6629 int count ATTRIBUTE_UNUSED;
6630 machine_mode ag_mode ATTRIBUTE_UNUSED;
6632 if (!use_vfp_abi (pcs_variant, false))
6633 return false;
6634 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6635 &ag_mode, &count);
6638 static bool
6639 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6640 const_tree type)
6642 if (!use_vfp_abi (pcum->pcs_variant, false))
6643 return false;
6645 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6646 &pcum->aapcs_vfp_rmode,
6647 &pcum->aapcs_vfp_rcount);
6650 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6651 for the behaviour of this function. */
6653 static bool
6654 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6655 const_tree type ATTRIBUTE_UNUSED)
6657 int rmode_size
6658 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6659 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6660 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6661 int regno;
6663 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6664 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6666 pcum->aapcs_vfp_reg_alloc = mask << regno;
6667 if (mode == BLKmode
6668 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6669 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6671 int i;
6672 int rcount = pcum->aapcs_vfp_rcount;
6673 int rshift = shift;
6674 machine_mode rmode = pcum->aapcs_vfp_rmode;
6675 rtx par;
6676 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6678 /* Avoid using unsupported vector modes. */
6679 if (rmode == V2SImode)
6680 rmode = DImode;
6681 else if (rmode == V4SImode)
6683 rmode = DImode;
6684 rcount *= 2;
6685 rshift /= 2;
6688 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6689 for (i = 0; i < rcount; i++)
6691 rtx tmp = gen_rtx_REG (rmode,
6692 FIRST_VFP_REGNUM + regno + i * rshift);
6693 tmp = gen_rtx_EXPR_LIST
6694 (VOIDmode, tmp,
6695 GEN_INT (i * GET_MODE_SIZE (rmode)));
6696 XVECEXP (par, 0, i) = tmp;
6699 pcum->aapcs_reg = par;
6701 else
6702 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6703 return true;
6705 return false;
6708 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6709 comment there for the behaviour of this function. */
6711 static rtx
6712 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6713 machine_mode mode,
6714 const_tree type ATTRIBUTE_UNUSED)
6716 if (!use_vfp_abi (pcs_variant, false))
6717 return NULL;
6719 if (mode == BLKmode
6720 || (GET_MODE_CLASS (mode) == MODE_INT
6721 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6722 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6724 int count;
6725 machine_mode ag_mode;
6726 int i;
6727 rtx par;
6728 int shift;
6730 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6731 &ag_mode, &count);
6733 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6735 if (ag_mode == V2SImode)
6736 ag_mode = DImode;
6737 else if (ag_mode == V4SImode)
6739 ag_mode = DImode;
6740 count *= 2;
6743 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6744 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6745 for (i = 0; i < count; i++)
6747 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6748 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6749 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6750 XVECEXP (par, 0, i) = tmp;
6753 return par;
6756 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6759 static void
6760 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6761 machine_mode mode ATTRIBUTE_UNUSED,
6762 const_tree type ATTRIBUTE_UNUSED)
6764 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6765 pcum->aapcs_vfp_reg_alloc = 0;
6766 return;
6769 #define AAPCS_CP(X) \
6771 aapcs_ ## X ## _cum_init, \
6772 aapcs_ ## X ## _is_call_candidate, \
6773 aapcs_ ## X ## _allocate, \
6774 aapcs_ ## X ## _is_return_candidate, \
6775 aapcs_ ## X ## _allocate_return_reg, \
6776 aapcs_ ## X ## _advance \
6779 /* Table of co-processors that can be used to pass arguments in
6780 registers. Idealy no arugment should be a candidate for more than
6781 one co-processor table entry, but the table is processed in order
6782 and stops after the first match. If that entry then fails to put
6783 the argument into a co-processor register, the argument will go on
6784 the stack. */
6785 static struct
6787 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6788 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6790 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6791 BLKmode) is a candidate for this co-processor's registers; this
6792 function should ignore any position-dependent state in
6793 CUMULATIVE_ARGS and only use call-type dependent information. */
6794 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6796 /* Return true if the argument does get a co-processor register; it
6797 should set aapcs_reg to an RTX of the register allocated as is
6798 required for a return from FUNCTION_ARG. */
6799 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6801 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6802 be returned in this co-processor's registers. */
6803 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6805 /* Allocate and return an RTX element to hold the return type of a call. This
6806 routine must not fail and will only be called if is_return_candidate
6807 returned true with the same parameters. */
6808 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6810 /* Finish processing this argument and prepare to start processing
6811 the next one. */
6812 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6813 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6815 AAPCS_CP(vfp)
6818 #undef AAPCS_CP
6820 static int
6821 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6822 const_tree type)
6824 int i;
6826 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6827 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6828 return i;
6830 return -1;
6833 static int
6834 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6836 /* We aren't passed a decl, so we can't check that a call is local.
6837 However, it isn't clear that that would be a win anyway, since it
6838 might limit some tail-calling opportunities. */
6839 enum arm_pcs pcs_variant;
6841 if (fntype)
6843 const_tree fndecl = NULL_TREE;
6845 if (TREE_CODE (fntype) == FUNCTION_DECL)
6847 fndecl = fntype;
6848 fntype = TREE_TYPE (fntype);
6851 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6853 else
6854 pcs_variant = arm_pcs_default;
6856 if (pcs_variant != ARM_PCS_AAPCS)
6858 int i;
6860 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6861 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6862 TYPE_MODE (type),
6863 type))
6864 return i;
6866 return -1;
6869 static rtx
6870 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6871 const_tree fntype)
6873 /* We aren't passed a decl, so we can't check that a call is local.
6874 However, it isn't clear that that would be a win anyway, since it
6875 might limit some tail-calling opportunities. */
6876 enum arm_pcs pcs_variant;
6877 int unsignedp ATTRIBUTE_UNUSED;
6879 if (fntype)
6881 const_tree fndecl = NULL_TREE;
6883 if (TREE_CODE (fntype) == FUNCTION_DECL)
6885 fndecl = fntype;
6886 fntype = TREE_TYPE (fntype);
6889 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6891 else
6892 pcs_variant = arm_pcs_default;
6894 /* Promote integer types. */
6895 if (type && INTEGRAL_TYPE_P (type))
6896 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6898 if (pcs_variant != ARM_PCS_AAPCS)
6900 int i;
6902 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6903 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6904 type))
6905 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6906 mode, type);
6909 /* Promotes small structs returned in a register to full-word size
6910 for big-endian AAPCS. */
6911 if (type && arm_return_in_msb (type))
6913 HOST_WIDE_INT size = int_size_in_bytes (type);
6914 if (size % UNITS_PER_WORD != 0)
6916 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6917 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6921 return gen_rtx_REG (mode, R0_REGNUM);
6924 static rtx
6925 aapcs_libcall_value (machine_mode mode)
6927 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6928 && GET_MODE_SIZE (mode) <= 4)
6929 mode = SImode;
6931 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6934 /* Lay out a function argument using the AAPCS rules. The rule
6935 numbers referred to here are those in the AAPCS. */
6936 static void
6937 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6938 const_tree type, bool named)
6940 int nregs, nregs2;
6941 int ncrn;
6943 /* We only need to do this once per argument. */
6944 if (pcum->aapcs_arg_processed)
6945 return;
6947 pcum->aapcs_arg_processed = true;
6949 /* Special case: if named is false then we are handling an incoming
6950 anonymous argument which is on the stack. */
6951 if (!named)
6952 return;
6954 /* Is this a potential co-processor register candidate? */
6955 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6957 int slot = aapcs_select_call_coproc (pcum, mode, type);
6958 pcum->aapcs_cprc_slot = slot;
6960 /* We don't have to apply any of the rules from part B of the
6961 preparation phase, these are handled elsewhere in the
6962 compiler. */
6964 if (slot >= 0)
6966 /* A Co-processor register candidate goes either in its own
6967 class of registers or on the stack. */
6968 if (!pcum->aapcs_cprc_failed[slot])
6970 /* C1.cp - Try to allocate the argument to co-processor
6971 registers. */
6972 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6973 return;
6975 /* C2.cp - Put the argument on the stack and note that we
6976 can't assign any more candidates in this slot. We also
6977 need to note that we have allocated stack space, so that
6978 we won't later try to split a non-cprc candidate between
6979 core registers and the stack. */
6980 pcum->aapcs_cprc_failed[slot] = true;
6981 pcum->can_split = false;
6984 /* We didn't get a register, so this argument goes on the
6985 stack. */
6986 gcc_assert (pcum->can_split == false);
6987 return;
6991 /* C3 - For double-word aligned arguments, round the NCRN up to the
6992 next even number. */
6993 ncrn = pcum->aapcs_ncrn;
6994 if (ncrn & 1)
6996 int res = arm_needs_doubleword_align (mode, type);
6997 /* Only warn during RTL expansion of call stmts, otherwise we would
6998 warn e.g. during gimplification even on functions that will be
6999 always inlined, and we'd warn multiple times. Don't warn when
7000 called in expand_function_start either, as we warn instead in
7001 arm_function_arg_boundary in that case. */
7002 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7003 inform (input_location, "parameter passing for argument of type "
7004 "%qT changed in GCC 7.1", type);
7005 else if (res > 0)
7006 ncrn++;
7009 nregs = ARM_NUM_REGS2(mode, type);
7011 /* Sigh, this test should really assert that nregs > 0, but a GCC
7012 extension allows empty structs and then gives them empty size; it
7013 then allows such a structure to be passed by value. For some of
7014 the code below we have to pretend that such an argument has
7015 non-zero size so that we 'locate' it correctly either in
7016 registers or on the stack. */
7017 gcc_assert (nregs >= 0);
7019 nregs2 = nregs ? nregs : 1;
7021 /* C4 - Argument fits entirely in core registers. */
7022 if (ncrn + nregs2 <= NUM_ARG_REGS)
7024 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7025 pcum->aapcs_next_ncrn = ncrn + nregs;
7026 return;
7029 /* C5 - Some core registers left and there are no arguments already
7030 on the stack: split this argument between the remaining core
7031 registers and the stack. */
7032 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7034 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7035 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7036 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7037 return;
7040 /* C6 - NCRN is set to 4. */
7041 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7043 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7044 return;
7047 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7048 for a call to a function whose data type is FNTYPE.
7049 For a library call, FNTYPE is NULL. */
7050 void
7051 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7052 rtx libname,
7053 tree fndecl ATTRIBUTE_UNUSED)
7055 /* Long call handling. */
7056 if (fntype)
7057 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7058 else
7059 pcum->pcs_variant = arm_pcs_default;
7061 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7063 if (arm_libcall_uses_aapcs_base (libname))
7064 pcum->pcs_variant = ARM_PCS_AAPCS;
7066 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7067 pcum->aapcs_reg = NULL_RTX;
7068 pcum->aapcs_partial = 0;
7069 pcum->aapcs_arg_processed = false;
7070 pcum->aapcs_cprc_slot = -1;
7071 pcum->can_split = true;
7073 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7075 int i;
7077 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7079 pcum->aapcs_cprc_failed[i] = false;
7080 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7083 return;
7086 /* Legacy ABIs */
7088 /* On the ARM, the offset starts at 0. */
7089 pcum->nregs = 0;
7090 pcum->iwmmxt_nregs = 0;
7091 pcum->can_split = true;
7093 /* Varargs vectors are treated the same as long long.
7094 named_count avoids having to change the way arm handles 'named' */
7095 pcum->named_count = 0;
7096 pcum->nargs = 0;
7098 if (TARGET_REALLY_IWMMXT && fntype)
7100 tree fn_arg;
7102 for (fn_arg = TYPE_ARG_TYPES (fntype);
7103 fn_arg;
7104 fn_arg = TREE_CHAIN (fn_arg))
7105 pcum->named_count += 1;
7107 if (! pcum->named_count)
7108 pcum->named_count = INT_MAX;
7112 /* Return 2 if double word alignment is required for argument passing,
7113 but wasn't required before the fix for PR88469.
7114 Return 1 if double word alignment is required for argument passing.
7115 Return -1 if double word alignment used to be required for argument
7116 passing before PR77728 ABI fix, but is not required anymore.
7117 Return 0 if double word alignment is not required and wasn't requried
7118 before either. */
7119 static int
7120 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7122 if (!type)
7123 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7125 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7126 if (!AGGREGATE_TYPE_P (type))
7127 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7129 /* Array types: Use member alignment of element type. */
7130 if (TREE_CODE (type) == ARRAY_TYPE)
7131 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7133 int ret = 0;
7134 int ret2 = 0;
7135 /* Record/aggregate types: Use greatest member alignment of any member.
7137 Note that we explicitly consider zero-sized fields here, even though
7138 they don't map to AAPCS machine types. For example, in:
7140 struct __attribute__((aligned(8))) empty {};
7142 struct s {
7143 [[no_unique_address]] empty e;
7144 int x;
7147 "s" contains only one Fundamental Data Type (the int field)
7148 but gains 8-byte alignment and size thanks to "e". */
7149 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7150 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7152 if (TREE_CODE (field) == FIELD_DECL)
7153 return 1;
7154 else
7155 /* Before PR77728 fix, we were incorrectly considering also
7156 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7157 Make sure we can warn about that with -Wpsabi. */
7158 ret = -1;
7160 else if (TREE_CODE (field) == FIELD_DECL
7161 && DECL_BIT_FIELD_TYPE (field)
7162 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7163 ret2 = 1;
7165 if (ret2)
7166 return 2;
7168 return ret;
7172 /* Determine where to put an argument to a function.
7173 Value is zero to push the argument on the stack,
7174 or a hard register in which to store the argument.
7176 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7177 the preceding args and about the function being called.
7178 ARG is a description of the argument.
7180 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7181 other arguments are passed on the stack. If (NAMED == 0) (which happens
7182 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7183 defined), say it is passed in the stack (function_prologue will
7184 indeed make it pass in the stack if necessary). */
7186 static rtx
7187 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7189 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7190 int nregs;
7192 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7193 a call insn (op3 of a call_value insn). */
7194 if (arg.end_marker_p ())
7195 return const0_rtx;
7197 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7199 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7200 return pcum->aapcs_reg;
7203 /* Varargs vectors are treated the same as long long.
7204 named_count avoids having to change the way arm handles 'named' */
7205 if (TARGET_IWMMXT_ABI
7206 && arm_vector_mode_supported_p (arg.mode)
7207 && pcum->named_count > pcum->nargs + 1)
7209 if (pcum->iwmmxt_nregs <= 9)
7210 return gen_rtx_REG (arg.mode,
7211 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7212 else
7214 pcum->can_split = false;
7215 return NULL_RTX;
7219 /* Put doubleword aligned quantities in even register pairs. */
7220 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7222 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7223 if (res < 0 && warn_psabi)
7224 inform (input_location, "parameter passing for argument of type "
7225 "%qT changed in GCC 7.1", arg.type);
7226 else if (res > 0)
7228 pcum->nregs++;
7229 if (res > 1 && warn_psabi)
7230 inform (input_location, "parameter passing for argument of type "
7231 "%qT changed in GCC 9.1", arg.type);
7235 /* Only allow splitting an arg between regs and memory if all preceding
7236 args were allocated to regs. For args passed by reference we only count
7237 the reference pointer. */
7238 if (pcum->can_split)
7239 nregs = 1;
7240 else
7241 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7243 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7244 return NULL_RTX;
7246 return gen_rtx_REG (arg.mode, pcum->nregs);
7249 static unsigned int
7250 arm_function_arg_boundary (machine_mode mode, const_tree type)
7252 if (!ARM_DOUBLEWORD_ALIGN)
7253 return PARM_BOUNDARY;
7255 int res = arm_needs_doubleword_align (mode, type);
7256 if (res < 0 && warn_psabi)
7257 inform (input_location, "parameter passing for argument of type %qT "
7258 "changed in GCC 7.1", type);
7259 if (res > 1 && warn_psabi)
7260 inform (input_location, "parameter passing for argument of type "
7261 "%qT changed in GCC 9.1", type);
7263 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7266 static int
7267 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7269 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7270 int nregs = pcum->nregs;
7272 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7274 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7275 return pcum->aapcs_partial;
7278 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7279 return 0;
7281 if (NUM_ARG_REGS > nregs
7282 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7283 && pcum->can_split)
7284 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7286 return 0;
7289 /* Update the data in PCUM to advance over argument ARG. */
7291 static void
7292 arm_function_arg_advance (cumulative_args_t pcum_v,
7293 const function_arg_info &arg)
7295 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7297 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7299 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7301 if (pcum->aapcs_cprc_slot >= 0)
7303 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7304 arg.type);
7305 pcum->aapcs_cprc_slot = -1;
7308 /* Generic stuff. */
7309 pcum->aapcs_arg_processed = false;
7310 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7311 pcum->aapcs_reg = NULL_RTX;
7312 pcum->aapcs_partial = 0;
7314 else
7316 pcum->nargs += 1;
7317 if (arm_vector_mode_supported_p (arg.mode)
7318 && pcum->named_count > pcum->nargs
7319 && TARGET_IWMMXT_ABI)
7320 pcum->iwmmxt_nregs += 1;
7321 else
7322 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7326 /* Variable sized types are passed by reference. This is a GCC
7327 extension to the ARM ABI. */
7329 static bool
7330 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7332 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7335 /* Encode the current state of the #pragma [no_]long_calls. */
7336 typedef enum
7338 OFF, /* No #pragma [no_]long_calls is in effect. */
7339 LONG, /* #pragma long_calls is in effect. */
7340 SHORT /* #pragma no_long_calls is in effect. */
7341 } arm_pragma_enum;
7343 static arm_pragma_enum arm_pragma_long_calls = OFF;
7345 void
7346 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7348 arm_pragma_long_calls = LONG;
7351 void
7352 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7354 arm_pragma_long_calls = SHORT;
7357 void
7358 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7360 arm_pragma_long_calls = OFF;
7363 /* Handle an attribute requiring a FUNCTION_DECL;
7364 arguments as in struct attribute_spec.handler. */
7365 static tree
7366 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7367 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7369 if (TREE_CODE (*node) != FUNCTION_DECL)
7371 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7372 name);
7373 *no_add_attrs = true;
7376 return NULL_TREE;
7379 /* Handle an "interrupt" or "isr" attribute;
7380 arguments as in struct attribute_spec.handler. */
7381 static tree
7382 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7383 bool *no_add_attrs)
7385 if (DECL_P (*node))
7387 if (TREE_CODE (*node) != FUNCTION_DECL)
7389 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7390 name);
7391 *no_add_attrs = true;
7393 else if (TARGET_VFP_BASE)
7395 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7396 name);
7398 /* FIXME: the argument if any is checked for type attributes;
7399 should it be checked for decl ones? */
7401 else
7403 if (TREE_CODE (*node) == FUNCTION_TYPE
7404 || TREE_CODE (*node) == METHOD_TYPE)
7406 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7408 warning (OPT_Wattributes, "%qE attribute ignored",
7409 name);
7410 *no_add_attrs = true;
7413 else if (TREE_CODE (*node) == POINTER_TYPE
7414 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7415 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7416 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7418 *node = build_variant_type_copy (*node);
7419 TREE_TYPE (*node) = build_type_attribute_variant
7420 (TREE_TYPE (*node),
7421 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7422 *no_add_attrs = true;
7424 else
7426 /* Possibly pass this attribute on from the type to a decl. */
7427 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7428 | (int) ATTR_FLAG_FUNCTION_NEXT
7429 | (int) ATTR_FLAG_ARRAY_NEXT))
7431 *no_add_attrs = true;
7432 return tree_cons (name, args, NULL_TREE);
7434 else
7436 warning (OPT_Wattributes, "%qE attribute ignored",
7437 name);
7442 return NULL_TREE;
7445 /* Handle a "pcs" attribute; arguments as in struct
7446 attribute_spec.handler. */
7447 static tree
7448 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7449 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7451 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7453 warning (OPT_Wattributes, "%qE attribute ignored", name);
7454 *no_add_attrs = true;
7456 return NULL_TREE;
7459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7460 /* Handle the "notshared" attribute. This attribute is another way of
7461 requesting hidden visibility. ARM's compiler supports
7462 "__declspec(notshared)"; we support the same thing via an
7463 attribute. */
7465 static tree
7466 arm_handle_notshared_attribute (tree *node,
7467 tree name ATTRIBUTE_UNUSED,
7468 tree args ATTRIBUTE_UNUSED,
7469 int flags ATTRIBUTE_UNUSED,
7470 bool *no_add_attrs)
7472 tree decl = TYPE_NAME (*node);
7474 if (decl)
7476 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7477 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7478 *no_add_attrs = false;
7480 return NULL_TREE;
7482 #endif
7484 /* This function returns true if a function with declaration FNDECL and type
7485 FNTYPE uses the stack to pass arguments or return variables and false
7486 otherwise. This is used for functions with the attributes
7487 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7488 diagnostic messages if the stack is used. NAME is the name of the attribute
7489 used. */
7491 static bool
7492 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7494 function_args_iterator args_iter;
7495 CUMULATIVE_ARGS args_so_far_v;
7496 cumulative_args_t args_so_far;
7497 bool first_param = true;
7498 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7500 /* Error out if any argument is passed on the stack. */
7501 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7502 args_so_far = pack_cumulative_args (&args_so_far_v);
7503 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7505 rtx arg_rtx;
7507 prev_arg_type = arg_type;
7508 if (VOID_TYPE_P (arg_type))
7509 continue;
7511 function_arg_info arg (arg_type, /*named=*/true);
7512 if (!first_param)
7513 /* ??? We should advance after processing the argument and pass
7514 the argument we're advancing past. */
7515 arm_function_arg_advance (args_so_far, arg);
7516 arg_rtx = arm_function_arg (args_so_far, arg);
7517 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7519 error ("%qE attribute not available to functions with arguments "
7520 "passed on the stack", name);
7521 return true;
7523 first_param = false;
7526 /* Error out for variadic functions since we cannot control how many
7527 arguments will be passed and thus stack could be used. stdarg_p () is not
7528 used for the checking to avoid browsing arguments twice. */
7529 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7531 error ("%qE attribute not available to functions with variable number "
7532 "of arguments", name);
7533 return true;
7536 /* Error out if return value is passed on the stack. */
7537 ret_type = TREE_TYPE (fntype);
7538 if (arm_return_in_memory (ret_type, fntype))
7540 error ("%qE attribute not available to functions that return value on "
7541 "the stack", name);
7542 return true;
7544 return false;
7547 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7548 function will check whether the attribute is allowed here and will add the
7549 attribute to the function declaration tree or otherwise issue a warning. */
7551 static tree
7552 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7553 tree /* args */,
7554 int /* flags */,
7555 bool *no_add_attrs)
7557 tree fndecl;
7559 if (!use_cmse)
7561 *no_add_attrs = true;
7562 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7563 "option", name);
7564 return NULL_TREE;
7567 /* Ignore attribute for function types. */
7568 if (TREE_CODE (*node) != FUNCTION_DECL)
7570 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7571 name);
7572 *no_add_attrs = true;
7573 return NULL_TREE;
7576 fndecl = *node;
7578 /* Warn for static linkage functions. */
7579 if (!TREE_PUBLIC (fndecl))
7581 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7582 "with static linkage", name);
7583 *no_add_attrs = true;
7584 return NULL_TREE;
7587 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7588 TREE_TYPE (fndecl));
7589 return NULL_TREE;
7593 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7594 function will check whether the attribute is allowed here and will add the
7595 attribute to the function type tree or otherwise issue a diagnostic. The
7596 reason we check this at declaration time is to only allow the use of the
7597 attribute with declarations of function pointers and not function
7598 declarations. This function checks NODE is of the expected type and issues
7599 diagnostics otherwise using NAME. If it is not of the expected type
7600 *NO_ADD_ATTRS will be set to true. */
7602 static tree
7603 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7604 tree /* args */,
7605 int /* flags */,
7606 bool *no_add_attrs)
7608 tree decl = NULL_TREE, fntype = NULL_TREE;
7609 tree type;
7611 if (!use_cmse)
7613 *no_add_attrs = true;
7614 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7615 "option", name);
7616 return NULL_TREE;
7619 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7621 decl = *node;
7622 fntype = TREE_TYPE (decl);
7625 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7626 fntype = TREE_TYPE (fntype);
7628 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7630 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7631 "function pointer", name);
7632 *no_add_attrs = true;
7633 return NULL_TREE;
7636 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7638 if (*no_add_attrs)
7639 return NULL_TREE;
7641 /* Prevent trees being shared among function types with and without
7642 cmse_nonsecure_call attribute. */
7643 type = TREE_TYPE (decl);
7645 type = build_distinct_type_copy (type);
7646 TREE_TYPE (decl) = type;
7647 fntype = type;
7649 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7651 type = fntype;
7652 fntype = TREE_TYPE (fntype);
7653 fntype = build_distinct_type_copy (fntype);
7654 TREE_TYPE (type) = fntype;
7657 /* Construct a type attribute and add it to the function type. */
7658 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7659 TYPE_ATTRIBUTES (fntype));
7660 TYPE_ATTRIBUTES (fntype) = attrs;
7661 return NULL_TREE;
7664 /* Return 0 if the attributes for two types are incompatible, 1 if they
7665 are compatible, and 2 if they are nearly compatible (which causes a
7666 warning to be generated). */
7667 static int
7668 arm_comp_type_attributes (const_tree type1, const_tree type2)
7670 int l1, l2, s1, s2;
7672 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7673 TYPE_ATTRIBUTES (type1));
7674 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7675 TYPE_ATTRIBUTES (type2));
7676 if (bool (attrs1) != bool (attrs2))
7677 return 0;
7678 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7679 return 0;
7681 /* Check for mismatch of non-default calling convention. */
7682 if (TREE_CODE (type1) != FUNCTION_TYPE)
7683 return 1;
7685 /* Check for mismatched call attributes. */
7686 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7687 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7688 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7689 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7691 /* Only bother to check if an attribute is defined. */
7692 if (l1 | l2 | s1 | s2)
7694 /* If one type has an attribute, the other must have the same attribute. */
7695 if ((l1 != l2) || (s1 != s2))
7696 return 0;
7698 /* Disallow mixed attributes. */
7699 if ((l1 & s2) || (l2 & s1))
7700 return 0;
7703 /* Check for mismatched ISR attribute. */
7704 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7705 if (! l1)
7706 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7707 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7708 if (! l2)
7709 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7710 if (l1 != l2)
7711 return 0;
7713 l1 = lookup_attribute ("cmse_nonsecure_call",
7714 TYPE_ATTRIBUTES (type1)) != NULL;
7715 l2 = lookup_attribute ("cmse_nonsecure_call",
7716 TYPE_ATTRIBUTES (type2)) != NULL;
7718 if (l1 != l2)
7719 return 0;
7721 return 1;
7724 /* Assigns default attributes to newly defined type. This is used to
7725 set short_call/long_call attributes for function types of
7726 functions defined inside corresponding #pragma scopes. */
7727 static void
7728 arm_set_default_type_attributes (tree type)
7730 /* Add __attribute__ ((long_call)) to all functions, when
7731 inside #pragma long_calls or __attribute__ ((short_call)),
7732 when inside #pragma no_long_calls. */
7733 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7735 tree type_attr_list, attr_name;
7736 type_attr_list = TYPE_ATTRIBUTES (type);
7738 if (arm_pragma_long_calls == LONG)
7739 attr_name = get_identifier ("long_call");
7740 else if (arm_pragma_long_calls == SHORT)
7741 attr_name = get_identifier ("short_call");
7742 else
7743 return;
7745 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7746 TYPE_ATTRIBUTES (type) = type_attr_list;
7750 /* Return true if DECL is known to be linked into section SECTION. */
7752 static bool
7753 arm_function_in_section_p (tree decl, section *section)
7755 /* We can only be certain about the prevailing symbol definition. */
7756 if (!decl_binds_to_current_def_p (decl))
7757 return false;
7759 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7760 if (!DECL_SECTION_NAME (decl))
7762 /* Make sure that we will not create a unique section for DECL. */
7763 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7764 return false;
7767 return function_section (decl) == section;
7770 /* Return nonzero if a 32-bit "long_call" should be generated for
7771 a call from the current function to DECL. We generate a long_call
7772 if the function:
7774 a. has an __attribute__((long call))
7775 or b. is within the scope of a #pragma long_calls
7776 or c. the -mlong-calls command line switch has been specified
7778 However we do not generate a long call if the function:
7780 d. has an __attribute__ ((short_call))
7781 or e. is inside the scope of a #pragma no_long_calls
7782 or f. is defined in the same section as the current function. */
7784 bool
7785 arm_is_long_call_p (tree decl)
7787 tree attrs;
7789 if (!decl)
7790 return TARGET_LONG_CALLS;
7792 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7793 if (lookup_attribute ("short_call", attrs))
7794 return false;
7796 /* For "f", be conservative, and only cater for cases in which the
7797 whole of the current function is placed in the same section. */
7798 if (!flag_reorder_blocks_and_partition
7799 && TREE_CODE (decl) == FUNCTION_DECL
7800 && arm_function_in_section_p (decl, current_function_section ()))
7801 return false;
7803 if (lookup_attribute ("long_call", attrs))
7804 return true;
7806 return TARGET_LONG_CALLS;
7809 /* Return nonzero if it is ok to make a tail-call to DECL. */
7810 static bool
7811 arm_function_ok_for_sibcall (tree decl, tree exp)
7813 unsigned long func_type;
7815 if (cfun->machine->sibcall_blocked)
7816 return false;
7818 if (TARGET_FDPIC)
7820 /* In FDPIC, never tailcall something for which we have no decl:
7821 the target function could be in a different module, requiring
7822 a different FDPIC register value. */
7823 if (decl == NULL)
7824 return false;
7827 /* Never tailcall something if we are generating code for Thumb-1. */
7828 if (TARGET_THUMB1)
7829 return false;
7831 /* The PIC register is live on entry to VxWorks PLT entries, so we
7832 must make the call before restoring the PIC register. */
7833 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7834 return false;
7836 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7837 may be used both as target of the call and base register for restoring
7838 the VFP registers */
7839 if (TARGET_APCS_FRAME && TARGET_ARM
7840 && TARGET_HARD_FLOAT
7841 && decl && arm_is_long_call_p (decl))
7842 return false;
7844 /* If we are interworking and the function is not declared static
7845 then we can't tail-call it unless we know that it exists in this
7846 compilation unit (since it might be a Thumb routine). */
7847 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7848 && !TREE_ASM_WRITTEN (decl))
7849 return false;
7851 func_type = arm_current_func_type ();
7852 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7853 if (IS_INTERRUPT (func_type))
7854 return false;
7856 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7857 generated for entry functions themselves. */
7858 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7859 return false;
7861 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7862 this would complicate matters for later code generation. */
7863 if (TREE_CODE (exp) == CALL_EXPR)
7865 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7866 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7867 return false;
7870 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7872 /* Check that the return value locations are the same. For
7873 example that we aren't returning a value from the sibling in
7874 a VFP register but then need to transfer it to a core
7875 register. */
7876 rtx a, b;
7877 tree decl_or_type = decl;
7879 /* If it is an indirect function pointer, get the function type. */
7880 if (!decl)
7881 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7883 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7884 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7885 cfun->decl, false);
7886 if (!rtx_equal_p (a, b))
7887 return false;
7890 /* Never tailcall if function may be called with a misaligned SP. */
7891 if (IS_STACKALIGN (func_type))
7892 return false;
7894 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7895 references should become a NOP. Don't convert such calls into
7896 sibling calls. */
7897 if (TARGET_AAPCS_BASED
7898 && arm_abi == ARM_ABI_AAPCS
7899 && decl
7900 && DECL_WEAK (decl))
7901 return false;
7903 /* We cannot do a tailcall for an indirect call by descriptor if all the
7904 argument registers are used because the only register left to load the
7905 address is IP and it will already contain the static chain. */
7906 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7908 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7909 CUMULATIVE_ARGS cum;
7910 cumulative_args_t cum_v;
7912 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7913 cum_v = pack_cumulative_args (&cum);
7915 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7917 tree type = TREE_VALUE (t);
7918 if (!VOID_TYPE_P (type))
7920 function_arg_info arg (type, /*named=*/true);
7921 arm_function_arg_advance (cum_v, arg);
7925 function_arg_info arg (integer_type_node, /*named=*/true);
7926 if (!arm_function_arg (cum_v, arg))
7927 return false;
7930 /* Everything else is ok. */
7931 return true;
7935 /* Addressing mode support functions. */
7937 /* Return nonzero if X is a legitimate immediate operand when compiling
7938 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7940 legitimate_pic_operand_p (rtx x)
7942 if (SYMBOL_REF_P (x)
7943 || (GET_CODE (x) == CONST
7944 && GET_CODE (XEXP (x, 0)) == PLUS
7945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7946 return 0;
7948 return 1;
7951 /* Record that the current function needs a PIC register. If PIC_REG is null,
7952 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7953 both case cfun->machine->pic_reg is initialized if we have not already done
7954 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7955 PIC register is reloaded in the current position of the instruction stream
7956 irregardless of whether it was loaded before. Otherwise, it is only loaded
7957 if not already done so (crtl->uses_pic_offset_table is null). Note that
7958 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7959 is only supported iff COMPUTE_NOW is false. */
7961 static void
7962 require_pic_register (rtx pic_reg, bool compute_now)
7964 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7966 /* A lot of the logic here is made obscure by the fact that this
7967 routine gets called as part of the rtx cost estimation process.
7968 We don't want those calls to affect any assumptions about the real
7969 function; and further, we can't call entry_of_function() until we
7970 start the real expansion process. */
7971 if (!crtl->uses_pic_offset_table || compute_now)
7973 gcc_assert (can_create_pseudo_p ()
7974 || (pic_reg != NULL_RTX
7975 && REG_P (pic_reg)
7976 && GET_MODE (pic_reg) == Pmode));
7977 if (arm_pic_register != INVALID_REGNUM
7978 && !compute_now
7979 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7981 if (!cfun->machine->pic_reg)
7982 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7984 /* Play games to avoid marking the function as needing pic
7985 if we are being called as part of the cost-estimation
7986 process. */
7987 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7988 crtl->uses_pic_offset_table = 1;
7990 else
7992 rtx_insn *seq, *insn;
7994 if (pic_reg == NULL_RTX)
7995 pic_reg = gen_reg_rtx (Pmode);
7996 if (!cfun->machine->pic_reg)
7997 cfun->machine->pic_reg = pic_reg;
7999 /* Play games to avoid marking the function as needing pic
8000 if we are being called as part of the cost-estimation
8001 process. */
8002 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8004 crtl->uses_pic_offset_table = 1;
8005 start_sequence ();
8007 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8008 && arm_pic_register > LAST_LO_REGNUM
8009 && !compute_now)
8010 emit_move_insn (cfun->machine->pic_reg,
8011 gen_rtx_REG (Pmode, arm_pic_register));
8012 else
8013 arm_load_pic_register (0UL, pic_reg);
8015 seq = get_insns ();
8016 end_sequence ();
8018 for (insn = seq; insn; insn = NEXT_INSN (insn))
8019 if (INSN_P (insn))
8020 INSN_LOCATION (insn) = prologue_location;
8022 /* We can be called during expansion of PHI nodes, where
8023 we can't yet emit instructions directly in the final
8024 insn stream. Queue the insns on the entry edge, they will
8025 be committed after everything else is expanded. */
8026 if (currently_expanding_to_rtl)
8027 insert_insn_on_edge (seq,
8028 single_succ_edge
8029 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8030 else
8031 emit_insn (seq);
8037 /* Generate insns to calculate the address of ORIG in pic mode. */
8038 static rtx_insn *
8039 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8041 rtx pat;
8042 rtx mem;
8044 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8046 /* Make the MEM as close to a constant as possible. */
8047 mem = SET_SRC (pat);
8048 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8049 MEM_READONLY_P (mem) = 1;
8050 MEM_NOTRAP_P (mem) = 1;
8052 return emit_insn (pat);
8055 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8056 created to hold the result of the load. If not NULL, PIC_REG indicates
8057 which register to use as PIC register, otherwise it is decided by register
8058 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8059 location in the instruction stream, irregardless of whether it was loaded
8060 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8061 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8063 Returns the register REG into which the PIC load is performed. */
8066 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8067 bool compute_now)
8069 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8071 if (SYMBOL_REF_P (orig)
8072 || LABEL_REF_P (orig))
8074 if (reg == 0)
8076 gcc_assert (can_create_pseudo_p ());
8077 reg = gen_reg_rtx (Pmode);
8080 /* VxWorks does not impose a fixed gap between segments; the run-time
8081 gap can be different from the object-file gap. We therefore can't
8082 use GOTOFF unless we are absolutely sure that the symbol is in the
8083 same segment as the GOT. Unfortunately, the flexibility of linker
8084 scripts means that we can't be sure of that in general, so assume
8085 that GOTOFF is never valid on VxWorks. */
8086 /* References to weak symbols cannot be resolved locally: they
8087 may be overridden by a non-weak definition at link time. */
8088 rtx_insn *insn;
8089 if ((LABEL_REF_P (orig)
8090 || (SYMBOL_REF_P (orig)
8091 && SYMBOL_REF_LOCAL_P (orig)
8092 && (SYMBOL_REF_DECL (orig)
8093 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8094 && (!SYMBOL_REF_FUNCTION_P (orig)
8095 || arm_fdpic_local_funcdesc_p (orig))))
8096 && NEED_GOT_RELOC
8097 && arm_pic_data_is_text_relative)
8098 insn = arm_pic_static_addr (orig, reg);
8099 else
8101 /* If this function doesn't have a pic register, create one now. */
8102 require_pic_register (pic_reg, compute_now);
8104 if (pic_reg == NULL_RTX)
8105 pic_reg = cfun->machine->pic_reg;
8107 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8110 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8111 by loop. */
8112 set_unique_reg_note (insn, REG_EQUAL, orig);
8114 return reg;
8116 else if (GET_CODE (orig) == CONST)
8118 rtx base, offset;
8120 if (GET_CODE (XEXP (orig, 0)) == PLUS
8121 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8122 return orig;
8124 /* Handle the case where we have: const (UNSPEC_TLS). */
8125 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8126 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8127 return orig;
8129 /* Handle the case where we have:
8130 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8131 CONST_INT. */
8132 if (GET_CODE (XEXP (orig, 0)) == PLUS
8133 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8134 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8136 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8137 return orig;
8140 if (reg == 0)
8142 gcc_assert (can_create_pseudo_p ());
8143 reg = gen_reg_rtx (Pmode);
8146 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8148 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8149 pic_reg, compute_now);
8150 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8151 base == reg ? 0 : reg, pic_reg,
8152 compute_now);
8154 if (CONST_INT_P (offset))
8156 /* The base register doesn't really matter, we only want to
8157 test the index for the appropriate mode. */
8158 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8160 gcc_assert (can_create_pseudo_p ());
8161 offset = force_reg (Pmode, offset);
8164 if (CONST_INT_P (offset))
8165 return plus_constant (Pmode, base, INTVAL (offset));
8168 if (GET_MODE_SIZE (mode) > 4
8169 && (GET_MODE_CLASS (mode) == MODE_INT
8170 || TARGET_SOFT_FLOAT))
8172 emit_insn (gen_addsi3 (reg, base, offset));
8173 return reg;
8176 return gen_rtx_PLUS (Pmode, base, offset);
8179 return orig;
8183 /* Generate insns that produce the address of the stack canary */
8185 arm_stack_protect_tls_canary_mem (bool reload)
8187 rtx tp = gen_reg_rtx (SImode);
8188 if (reload)
8189 emit_insn (gen_reload_tp_hard (tp));
8190 else
8191 emit_insn (gen_load_tp_hard (tp));
8193 rtx reg = gen_reg_rtx (SImode);
8194 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8195 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8196 return gen_rtx_MEM (SImode, reg);
8200 /* Whether a register is callee saved or not. This is necessary because high
8201 registers are marked as caller saved when optimizing for size on Thumb-1
8202 targets despite being callee saved in order to avoid using them. */
8203 #define callee_saved_reg_p(reg) \
8204 (!call_used_or_fixed_reg_p (reg) \
8205 || (TARGET_THUMB1 && optimize_size \
8206 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8208 /* Return a mask for the call-clobbered low registers that are unused
8209 at the end of the prologue. */
8210 static unsigned long
8211 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8213 unsigned long mask = 0;
8214 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8216 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8217 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8218 mask |= 1 << (reg - FIRST_LO_REGNUM);
8219 return mask;
8222 /* Similarly for the start of the epilogue. */
8223 static unsigned long
8224 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8226 unsigned long mask = 0;
8227 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8229 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8230 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8231 mask |= 1 << (reg - FIRST_LO_REGNUM);
8232 return mask;
8235 /* Find a spare register to use during the prolog of a function. */
8237 static int
8238 thumb_find_work_register (unsigned long pushed_regs_mask)
8240 int reg;
8242 unsigned long unused_regs
8243 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8245 /* Check the argument registers first as these are call-used. The
8246 register allocation order means that sometimes r3 might be used
8247 but earlier argument registers might not, so check them all. */
8248 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8249 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8250 return reg;
8252 /* Otherwise look for a call-saved register that is going to be pushed. */
8253 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8254 if (pushed_regs_mask & (1 << reg))
8255 return reg;
8257 if (TARGET_THUMB2)
8259 /* Thumb-2 can use high regs. */
8260 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8261 if (pushed_regs_mask & (1 << reg))
8262 return reg;
8264 /* Something went wrong - thumb_compute_save_reg_mask()
8265 should have arranged for a suitable register to be pushed. */
8266 gcc_unreachable ();
8269 static GTY(()) int pic_labelno;
8271 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8272 low register. */
8274 void
8275 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8277 rtx l1, labelno, pic_tmp, pic_rtx;
8279 if (crtl->uses_pic_offset_table == 0
8280 || TARGET_SINGLE_PIC_BASE
8281 || TARGET_FDPIC)
8282 return;
8284 gcc_assert (flag_pic);
8286 if (pic_reg == NULL_RTX)
8287 pic_reg = cfun->machine->pic_reg;
8288 if (TARGET_VXWORKS_RTP)
8290 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8291 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8292 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8294 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8296 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8297 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8299 else
8301 /* We use an UNSPEC rather than a LABEL_REF because this label
8302 never appears in the code stream. */
8304 labelno = GEN_INT (pic_labelno++);
8305 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8306 l1 = gen_rtx_CONST (VOIDmode, l1);
8308 /* On the ARM the PC register contains 'dot + 8' at the time of the
8309 addition, on the Thumb it is 'dot + 4'. */
8310 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8311 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8312 UNSPEC_GOTSYM_OFF);
8313 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8315 if (TARGET_32BIT)
8317 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8319 else /* TARGET_THUMB1 */
8321 if (arm_pic_register != INVALID_REGNUM
8322 && REGNO (pic_reg) > LAST_LO_REGNUM)
8324 /* We will have pushed the pic register, so we should always be
8325 able to find a work register. */
8326 pic_tmp = gen_rtx_REG (SImode,
8327 thumb_find_work_register (saved_regs));
8328 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8329 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8330 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8332 else if (arm_pic_register != INVALID_REGNUM
8333 && arm_pic_register > LAST_LO_REGNUM
8334 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8336 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8337 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8338 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8340 else
8341 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8345 /* Need to emit this whether or not we obey regdecls,
8346 since setjmp/longjmp can cause life info to screw up. */
8347 emit_use (pic_reg);
8350 /* Try to determine whether an object, referenced via ORIG, will be
8351 placed in the text or data segment. This is used in FDPIC mode, to
8352 decide which relocations to use when accessing ORIG. *IS_READONLY
8353 is set to true if ORIG is a read-only location, false otherwise.
8354 Return true if we could determine the location of ORIG, false
8355 otherwise. *IS_READONLY is valid only when we return true. */
8356 static bool
8357 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8359 *is_readonly = false;
8361 if (LABEL_REF_P (orig))
8363 *is_readonly = true;
8364 return true;
8367 if (SYMBOL_REF_P (orig))
8369 if (CONSTANT_POOL_ADDRESS_P (orig))
8371 *is_readonly = true;
8372 return true;
8374 if (SYMBOL_REF_LOCAL_P (orig)
8375 && !SYMBOL_REF_EXTERNAL_P (orig)
8376 && SYMBOL_REF_DECL (orig)
8377 && (!DECL_P (SYMBOL_REF_DECL (orig))
8378 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8380 tree decl = SYMBOL_REF_DECL (orig);
8381 tree init = (TREE_CODE (decl) == VAR_DECL)
8382 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8383 ? decl : 0;
8384 int reloc = 0;
8385 bool named_section, readonly;
8387 if (init && init != error_mark_node)
8388 reloc = compute_reloc_for_constant (init);
8390 named_section = TREE_CODE (decl) == VAR_DECL
8391 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8392 readonly = decl_readonly_section (decl, reloc);
8394 /* We don't know where the link script will put a named
8395 section, so return false in such a case. */
8396 if (named_section)
8397 return false;
8399 *is_readonly = readonly;
8400 return true;
8403 /* We don't know. */
8404 return false;
8407 gcc_unreachable ();
8410 /* Generate code to load the address of a static var when flag_pic is set. */
8411 static rtx_insn *
8412 arm_pic_static_addr (rtx orig, rtx reg)
8414 rtx l1, labelno, offset_rtx;
8415 rtx_insn *insn;
8417 gcc_assert (flag_pic);
8419 bool is_readonly = false;
8420 bool info_known = false;
8422 if (TARGET_FDPIC
8423 && SYMBOL_REF_P (orig)
8424 && !SYMBOL_REF_FUNCTION_P (orig))
8425 info_known = arm_is_segment_info_known (orig, &is_readonly);
8427 if (TARGET_FDPIC
8428 && SYMBOL_REF_P (orig)
8429 && !SYMBOL_REF_FUNCTION_P (orig)
8430 && !info_known)
8432 /* We don't know where orig is stored, so we have be
8433 pessimistic and use a GOT relocation. */
8434 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8436 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8438 else if (TARGET_FDPIC
8439 && SYMBOL_REF_P (orig)
8440 && (SYMBOL_REF_FUNCTION_P (orig)
8441 || !is_readonly))
8443 /* We use the GOTOFF relocation. */
8444 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8446 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8447 emit_insn (gen_movsi (reg, l1));
8448 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8450 else
8452 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8453 PC-relative access. */
8454 /* We use an UNSPEC rather than a LABEL_REF because this label
8455 never appears in the code stream. */
8456 labelno = GEN_INT (pic_labelno++);
8457 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8458 l1 = gen_rtx_CONST (VOIDmode, l1);
8460 /* On the ARM the PC register contains 'dot + 8' at the time of the
8461 addition, on the Thumb it is 'dot + 4'. */
8462 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8463 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8464 UNSPEC_SYMBOL_OFFSET);
8465 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8467 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8468 labelno));
8471 return insn;
8474 /* Return nonzero if X is valid as an ARM state addressing register. */
8475 static int
8476 arm_address_register_rtx_p (rtx x, int strict_p)
8478 int regno;
8480 if (!REG_P (x))
8481 return 0;
8483 regno = REGNO (x);
8485 if (strict_p)
8486 return ARM_REGNO_OK_FOR_BASE_P (regno);
8488 return (regno <= LAST_ARM_REGNUM
8489 || regno >= FIRST_PSEUDO_REGISTER
8490 || regno == FRAME_POINTER_REGNUM
8491 || regno == ARG_POINTER_REGNUM);
8494 /* Return TRUE if this rtx is the difference of a symbol and a label,
8495 and will reduce to a PC-relative relocation in the object file.
8496 Expressions like this can be left alone when generating PIC, rather
8497 than forced through the GOT. */
8498 static int
8499 pcrel_constant_p (rtx x)
8501 if (GET_CODE (x) == MINUS)
8502 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8504 return FALSE;
8507 /* Return true if X will surely end up in an index register after next
8508 splitting pass. */
8509 static bool
8510 will_be_in_index_register (const_rtx x)
8512 /* arm.md: calculate_pic_address will split this into a register. */
8513 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8516 /* Return nonzero if X is a valid ARM state address operand. */
8518 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8519 int strict_p)
8521 bool use_ldrd;
8522 enum rtx_code code = GET_CODE (x);
8524 if (arm_address_register_rtx_p (x, strict_p))
8525 return 1;
8527 use_ldrd = (TARGET_LDRD
8528 && (mode == DImode || mode == DFmode));
8530 if (code == POST_INC || code == PRE_DEC
8531 || ((code == PRE_INC || code == POST_DEC)
8532 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8533 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8535 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8536 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8537 && GET_CODE (XEXP (x, 1)) == PLUS
8538 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8540 rtx addend = XEXP (XEXP (x, 1), 1);
8542 /* Don't allow ldrd post increment by register because it's hard
8543 to fixup invalid register choices. */
8544 if (use_ldrd
8545 && GET_CODE (x) == POST_MODIFY
8546 && REG_P (addend))
8547 return 0;
8549 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8550 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8553 /* After reload constants split into minipools will have addresses
8554 from a LABEL_REF. */
8555 else if (reload_completed
8556 && (code == LABEL_REF
8557 || (code == CONST
8558 && GET_CODE (XEXP (x, 0)) == PLUS
8559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8560 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8561 return 1;
8563 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8564 return 0;
8566 else if (code == PLUS)
8568 rtx xop0 = XEXP (x, 0);
8569 rtx xop1 = XEXP (x, 1);
8571 return ((arm_address_register_rtx_p (xop0, strict_p)
8572 && ((CONST_INT_P (xop1)
8573 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8574 || (!strict_p && will_be_in_index_register (xop1))))
8575 || (arm_address_register_rtx_p (xop1, strict_p)
8576 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8579 #if 0
8580 /* Reload currently can't handle MINUS, so disable this for now */
8581 else if (GET_CODE (x) == MINUS)
8583 rtx xop0 = XEXP (x, 0);
8584 rtx xop1 = XEXP (x, 1);
8586 return (arm_address_register_rtx_p (xop0, strict_p)
8587 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8589 #endif
8591 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8592 && code == SYMBOL_REF
8593 && CONSTANT_POOL_ADDRESS_P (x)
8594 && ! (flag_pic
8595 && symbol_mentioned_p (get_pool_constant (x))
8596 && ! pcrel_constant_p (get_pool_constant (x))))
8597 return 1;
8599 return 0;
8602 /* Return true if we can avoid creating a constant pool entry for x. */
8603 static bool
8604 can_avoid_literal_pool_for_label_p (rtx x)
8606 /* Normally we can assign constant values to target registers without
8607 the help of constant pool. But there are cases we have to use constant
8608 pool like:
8609 1) assign a label to register.
8610 2) sign-extend a 8bit value to 32bit and then assign to register.
8612 Constant pool access in format:
8613 (set (reg r0) (mem (symbol_ref (".LC0"))))
8614 will cause the use of literal pool (later in function arm_reorg).
8615 So here we mark such format as an invalid format, then the compiler
8616 will adjust it into:
8617 (set (reg r0) (symbol_ref (".LC0")))
8618 (set (reg r0) (mem (reg r0))).
8619 No extra register is required, and (mem (reg r0)) won't cause the use
8620 of literal pools. */
8621 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8622 && CONSTANT_POOL_ADDRESS_P (x))
8623 return 1;
8624 return 0;
8628 /* Return nonzero if X is a valid Thumb-2 address operand. */
8629 static int
8630 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8632 bool use_ldrd;
8633 enum rtx_code code = GET_CODE (x);
8635 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8636 return mve_vector_mem_operand (mode, x, strict_p);
8638 if (arm_address_register_rtx_p (x, strict_p))
8639 return 1;
8641 use_ldrd = (TARGET_LDRD
8642 && (mode == DImode || mode == DFmode));
8644 if (code == POST_INC || code == PRE_DEC
8645 || ((code == PRE_INC || code == POST_DEC)
8646 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8647 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8649 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8650 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8651 && GET_CODE (XEXP (x, 1)) == PLUS
8652 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8654 /* Thumb-2 only has autoincrement by constant. */
8655 rtx addend = XEXP (XEXP (x, 1), 1);
8656 HOST_WIDE_INT offset;
8658 if (!CONST_INT_P (addend))
8659 return 0;
8661 offset = INTVAL(addend);
8662 if (GET_MODE_SIZE (mode) <= 4)
8663 return (offset > -256 && offset < 256);
8665 return (use_ldrd && offset > -1024 && offset < 1024
8666 && (offset & 3) == 0);
8669 /* After reload constants split into minipools will have addresses
8670 from a LABEL_REF. */
8671 else if (reload_completed
8672 && (code == LABEL_REF
8673 || (code == CONST
8674 && GET_CODE (XEXP (x, 0)) == PLUS
8675 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8676 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8677 return 1;
8679 else if (mode == TImode
8680 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8681 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8682 return 0;
8684 else if (code == PLUS)
8686 rtx xop0 = XEXP (x, 0);
8687 rtx xop1 = XEXP (x, 1);
8689 return ((arm_address_register_rtx_p (xop0, strict_p)
8690 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8691 || (!strict_p && will_be_in_index_register (xop1))))
8692 || (arm_address_register_rtx_p (xop1, strict_p)
8693 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8696 else if (can_avoid_literal_pool_for_label_p (x))
8697 return 0;
8699 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8700 && code == SYMBOL_REF
8701 && CONSTANT_POOL_ADDRESS_P (x)
8702 && ! (flag_pic
8703 && symbol_mentioned_p (get_pool_constant (x))
8704 && ! pcrel_constant_p (get_pool_constant (x))))
8705 return 1;
8707 return 0;
8710 /* Return nonzero if INDEX is valid for an address index operand in
8711 ARM state. */
8712 static int
8713 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8714 int strict_p)
8716 HOST_WIDE_INT range;
8717 enum rtx_code code = GET_CODE (index);
8719 /* Standard coprocessor addressing modes. */
8720 if (TARGET_HARD_FLOAT
8721 && (mode == SFmode || mode == DFmode))
8722 return (code == CONST_INT && INTVAL (index) < 1024
8723 && INTVAL (index) > -1024
8724 && (INTVAL (index) & 3) == 0);
8726 /* For quad modes, we restrict the constant offset to be slightly less
8727 than what the instruction format permits. We do this because for
8728 quad mode moves, we will actually decompose them into two separate
8729 double-mode reads or writes. INDEX must therefore be a valid
8730 (double-mode) offset and so should INDEX+8. */
8731 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8732 return (code == CONST_INT
8733 && INTVAL (index) < 1016
8734 && INTVAL (index) > -1024
8735 && (INTVAL (index) & 3) == 0);
8737 /* We have no such constraint on double mode offsets, so we permit the
8738 full range of the instruction format. */
8739 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8740 return (code == CONST_INT
8741 && INTVAL (index) < 1024
8742 && INTVAL (index) > -1024
8743 && (INTVAL (index) & 3) == 0);
8745 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8746 return (code == CONST_INT
8747 && INTVAL (index) < 1024
8748 && INTVAL (index) > -1024
8749 && (INTVAL (index) & 3) == 0);
8751 if (arm_address_register_rtx_p (index, strict_p)
8752 && (GET_MODE_SIZE (mode) <= 4))
8753 return 1;
8755 if (mode == DImode || mode == DFmode)
8757 if (code == CONST_INT)
8759 HOST_WIDE_INT val = INTVAL (index);
8761 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8762 If vldr is selected it uses arm_coproc_mem_operand. */
8763 if (TARGET_LDRD)
8764 return val > -256 && val < 256;
8765 else
8766 return val > -4096 && val < 4092;
8769 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8772 if (GET_MODE_SIZE (mode) <= 4
8773 && ! (arm_arch4
8774 && (mode == HImode
8775 || mode == HFmode
8776 || (mode == QImode && outer == SIGN_EXTEND))))
8778 if (code == MULT)
8780 rtx xiop0 = XEXP (index, 0);
8781 rtx xiop1 = XEXP (index, 1);
8783 return ((arm_address_register_rtx_p (xiop0, strict_p)
8784 && power_of_two_operand (xiop1, SImode))
8785 || (arm_address_register_rtx_p (xiop1, strict_p)
8786 && power_of_two_operand (xiop0, SImode)));
8788 else if (code == LSHIFTRT || code == ASHIFTRT
8789 || code == ASHIFT || code == ROTATERT)
8791 rtx op = XEXP (index, 1);
8793 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8794 && CONST_INT_P (op)
8795 && INTVAL (op) > 0
8796 && INTVAL (op) <= 31);
8800 /* For ARM v4 we may be doing a sign-extend operation during the
8801 load. */
8802 if (arm_arch4)
8804 if (mode == HImode
8805 || mode == HFmode
8806 || (outer == SIGN_EXTEND && mode == QImode))
8807 range = 256;
8808 else
8809 range = 4096;
8811 else
8812 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8814 return (code == CONST_INT
8815 && INTVAL (index) < range
8816 && INTVAL (index) > -range);
8819 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8820 index operand. i.e. 1, 2, 4 or 8. */
8821 static bool
8822 thumb2_index_mul_operand (rtx op)
8824 HOST_WIDE_INT val;
8826 if (!CONST_INT_P (op))
8827 return false;
8829 val = INTVAL(op);
8830 return (val == 1 || val == 2 || val == 4 || val == 8);
8833 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8834 static int
8835 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8837 enum rtx_code code = GET_CODE (index);
8839 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8840 /* Standard coprocessor addressing modes. */
8841 if (TARGET_VFP_BASE
8842 && (mode == SFmode || mode == DFmode))
8843 return (code == CONST_INT && INTVAL (index) < 1024
8844 /* Thumb-2 allows only > -256 index range for it's core register
8845 load/stores. Since we allow SF/DF in core registers, we have
8846 to use the intersection between -256~4096 (core) and -1024~1024
8847 (coprocessor). */
8848 && INTVAL (index) > -256
8849 && (INTVAL (index) & 3) == 0);
8851 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8853 /* For DImode assume values will usually live in core regs
8854 and only allow LDRD addressing modes. */
8855 if (!TARGET_LDRD || mode != DImode)
8856 return (code == CONST_INT
8857 && INTVAL (index) < 1024
8858 && INTVAL (index) > -1024
8859 && (INTVAL (index) & 3) == 0);
8862 /* For quad modes, we restrict the constant offset to be slightly less
8863 than what the instruction format permits. We do this because for
8864 quad mode moves, we will actually decompose them into two separate
8865 double-mode reads or writes. INDEX must therefore be a valid
8866 (double-mode) offset and so should INDEX+8. */
8867 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8868 return (code == CONST_INT
8869 && INTVAL (index) < 1016
8870 && INTVAL (index) > -1024
8871 && (INTVAL (index) & 3) == 0);
8873 /* We have no such constraint on double mode offsets, so we permit the
8874 full range of the instruction format. */
8875 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8876 return (code == CONST_INT
8877 && INTVAL (index) < 1024
8878 && INTVAL (index) > -1024
8879 && (INTVAL (index) & 3) == 0);
8881 if (arm_address_register_rtx_p (index, strict_p)
8882 && (GET_MODE_SIZE (mode) <= 4))
8883 return 1;
8885 if (mode == DImode || mode == DFmode)
8887 if (code == CONST_INT)
8889 HOST_WIDE_INT val = INTVAL (index);
8890 /* Thumb-2 ldrd only has reg+const addressing modes.
8891 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8892 If vldr is selected it uses arm_coproc_mem_operand. */
8893 if (TARGET_LDRD)
8894 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8895 else
8896 return IN_RANGE (val, -255, 4095 - 4);
8898 else
8899 return 0;
8902 if (code == MULT)
8904 rtx xiop0 = XEXP (index, 0);
8905 rtx xiop1 = XEXP (index, 1);
8907 return ((arm_address_register_rtx_p (xiop0, strict_p)
8908 && thumb2_index_mul_operand (xiop1))
8909 || (arm_address_register_rtx_p (xiop1, strict_p)
8910 && thumb2_index_mul_operand (xiop0)));
8912 else if (code == ASHIFT)
8914 rtx op = XEXP (index, 1);
8916 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8917 && CONST_INT_P (op)
8918 && INTVAL (op) > 0
8919 && INTVAL (op) <= 3);
8922 return (code == CONST_INT
8923 && INTVAL (index) < 4096
8924 && INTVAL (index) > -256);
8927 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8928 static int
8929 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8931 int regno;
8933 if (!REG_P (x))
8934 return 0;
8936 regno = REGNO (x);
8938 if (strict_p)
8939 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8941 return (regno <= LAST_LO_REGNUM
8942 || regno > LAST_VIRTUAL_REGISTER
8943 || regno == FRAME_POINTER_REGNUM
8944 || (GET_MODE_SIZE (mode) >= 4
8945 && (regno == STACK_POINTER_REGNUM
8946 || regno >= FIRST_PSEUDO_REGISTER
8947 || x == hard_frame_pointer_rtx
8948 || x == arg_pointer_rtx)));
8951 /* Return nonzero if x is a legitimate index register. This is the case
8952 for any base register that can access a QImode object. */
8953 inline static int
8954 thumb1_index_register_rtx_p (rtx x, int strict_p)
8956 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8959 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8961 The AP may be eliminated to either the SP or the FP, so we use the
8962 least common denominator, e.g. SImode, and offsets from 0 to 64.
8964 ??? Verify whether the above is the right approach.
8966 ??? Also, the FP may be eliminated to the SP, so perhaps that
8967 needs special handling also.
8969 ??? Look at how the mips16 port solves this problem. It probably uses
8970 better ways to solve some of these problems.
8972 Although it is not incorrect, we don't accept QImode and HImode
8973 addresses based on the frame pointer or arg pointer until the
8974 reload pass starts. This is so that eliminating such addresses
8975 into stack based ones won't produce impossible code. */
8977 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8979 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8980 return 0;
8982 /* ??? Not clear if this is right. Experiment. */
8983 if (GET_MODE_SIZE (mode) < 4
8984 && !(reload_in_progress || reload_completed)
8985 && (reg_mentioned_p (frame_pointer_rtx, x)
8986 || reg_mentioned_p (arg_pointer_rtx, x)
8987 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8988 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8989 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8990 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8991 return 0;
8993 /* Accept any base register. SP only in SImode or larger. */
8994 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8995 return 1;
8997 /* This is PC relative data before arm_reorg runs. */
8998 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8999 && SYMBOL_REF_P (x)
9000 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9001 && !arm_disable_literal_pool)
9002 return 1;
9004 /* This is PC relative data after arm_reorg runs. */
9005 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9006 && reload_completed
9007 && (LABEL_REF_P (x)
9008 || (GET_CODE (x) == CONST
9009 && GET_CODE (XEXP (x, 0)) == PLUS
9010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9011 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9012 return 1;
9014 /* Post-inc indexing only supported for SImode and larger. */
9015 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9016 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9017 return 1;
9019 else if (GET_CODE (x) == PLUS)
9021 /* REG+REG address can be any two index registers. */
9022 /* We disallow FRAME+REG addressing since we know that FRAME
9023 will be replaced with STACK, and SP relative addressing only
9024 permits SP+OFFSET. */
9025 if (GET_MODE_SIZE (mode) <= 4
9026 && XEXP (x, 0) != frame_pointer_rtx
9027 && XEXP (x, 1) != frame_pointer_rtx
9028 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9029 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9030 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9031 return 1;
9033 /* REG+const has 5-7 bit offset for non-SP registers. */
9034 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9035 || XEXP (x, 0) == arg_pointer_rtx)
9036 && CONST_INT_P (XEXP (x, 1))
9037 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9038 return 1;
9040 /* REG+const has 10-bit offset for SP, but only SImode and
9041 larger is supported. */
9042 /* ??? Should probably check for DI/DFmode overflow here
9043 just like GO_IF_LEGITIMATE_OFFSET does. */
9044 else if (REG_P (XEXP (x, 0))
9045 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9046 && GET_MODE_SIZE (mode) >= 4
9047 && CONST_INT_P (XEXP (x, 1))
9048 && INTVAL (XEXP (x, 1)) >= 0
9049 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9050 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9051 return 1;
9053 else if (REG_P (XEXP (x, 0))
9054 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9055 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9056 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9057 && REGNO (XEXP (x, 0))
9058 <= LAST_VIRTUAL_POINTER_REGISTER))
9059 && GET_MODE_SIZE (mode) >= 4
9060 && CONST_INT_P (XEXP (x, 1))
9061 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9062 return 1;
9065 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9066 && GET_MODE_SIZE (mode) == 4
9067 && SYMBOL_REF_P (x)
9068 && CONSTANT_POOL_ADDRESS_P (x)
9069 && !arm_disable_literal_pool
9070 && ! (flag_pic
9071 && symbol_mentioned_p (get_pool_constant (x))
9072 && ! pcrel_constant_p (get_pool_constant (x))))
9073 return 1;
9075 return 0;
9078 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9079 instruction of mode MODE. */
9081 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9083 switch (GET_MODE_SIZE (mode))
9085 case 1:
9086 return val >= 0 && val < 32;
9088 case 2:
9089 return val >= 0 && val < 64 && (val & 1) == 0;
9091 default:
9092 return (val >= 0
9093 && (val + GET_MODE_SIZE (mode)) <= 128
9094 && (val & 3) == 0);
9098 bool
9099 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9101 if (TARGET_ARM)
9102 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9103 else if (TARGET_THUMB2)
9104 return thumb2_legitimate_address_p (mode, x, strict_p);
9105 else /* if (TARGET_THUMB1) */
9106 return thumb1_legitimate_address_p (mode, x, strict_p);
9109 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9111 Given an rtx X being reloaded into a reg required to be
9112 in class CLASS, return the class of reg to actually use.
9113 In general this is just CLASS, but for the Thumb core registers and
9114 immediate constants we prefer a LO_REGS class or a subset. */
9116 static reg_class_t
9117 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9119 if (TARGET_32BIT)
9120 return rclass;
9121 else
9123 if (rclass == GENERAL_REGS)
9124 return LO_REGS;
9125 else
9126 return rclass;
9130 /* Build the SYMBOL_REF for __tls_get_addr. */
9132 static GTY(()) rtx tls_get_addr_libfunc;
9134 static rtx
9135 get_tls_get_addr (void)
9137 if (!tls_get_addr_libfunc)
9138 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9139 return tls_get_addr_libfunc;
9143 arm_load_tp (rtx target)
9145 if (!target)
9146 target = gen_reg_rtx (SImode);
9148 if (TARGET_HARD_TP)
9150 /* Can return in any reg. */
9151 emit_insn (gen_load_tp_hard (target));
9153 else
9155 /* Always returned in r0. Immediately copy the result into a pseudo,
9156 otherwise other uses of r0 (e.g. setting up function arguments) may
9157 clobber the value. */
9159 rtx tmp;
9161 if (TARGET_FDPIC)
9163 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9164 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9166 emit_insn (gen_load_tp_soft_fdpic ());
9168 /* Restore r9. */
9169 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9171 else
9172 emit_insn (gen_load_tp_soft ());
9174 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9175 emit_move_insn (target, tmp);
9177 return target;
9180 static rtx
9181 load_tls_operand (rtx x, rtx reg)
9183 rtx tmp;
9185 if (reg == NULL_RTX)
9186 reg = gen_reg_rtx (SImode);
9188 tmp = gen_rtx_CONST (SImode, x);
9190 emit_move_insn (reg, tmp);
9192 return reg;
9195 static rtx_insn *
9196 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9198 rtx label, labelno = NULL_RTX, sum;
9200 gcc_assert (reloc != TLS_DESCSEQ);
9201 start_sequence ();
9203 if (TARGET_FDPIC)
9205 sum = gen_rtx_UNSPEC (Pmode,
9206 gen_rtvec (2, x, GEN_INT (reloc)),
9207 UNSPEC_TLS);
9209 else
9211 labelno = GEN_INT (pic_labelno++);
9212 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9213 label = gen_rtx_CONST (VOIDmode, label);
9215 sum = gen_rtx_UNSPEC (Pmode,
9216 gen_rtvec (4, x, GEN_INT (reloc), label,
9217 GEN_INT (TARGET_ARM ? 8 : 4)),
9218 UNSPEC_TLS);
9220 reg = load_tls_operand (sum, reg);
9222 if (TARGET_FDPIC)
9223 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9224 else if (TARGET_ARM)
9225 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9226 else
9227 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9229 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9230 LCT_PURE, /* LCT_CONST? */
9231 Pmode, reg, Pmode);
9233 rtx_insn *insns = get_insns ();
9234 end_sequence ();
9236 return insns;
9239 static rtx
9240 arm_tls_descseq_addr (rtx x, rtx reg)
9242 rtx labelno = GEN_INT (pic_labelno++);
9243 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9244 rtx sum = gen_rtx_UNSPEC (Pmode,
9245 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9246 gen_rtx_CONST (VOIDmode, label),
9247 GEN_INT (!TARGET_ARM)),
9248 UNSPEC_TLS);
9249 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9251 emit_insn (gen_tlscall (x, labelno));
9252 if (!reg)
9253 reg = gen_reg_rtx (SImode);
9254 else
9255 gcc_assert (REGNO (reg) != R0_REGNUM);
9257 emit_move_insn (reg, reg0);
9259 return reg;
9264 legitimize_tls_address (rtx x, rtx reg)
9266 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9267 rtx_insn *insns;
9268 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9270 switch (model)
9272 case TLS_MODEL_GLOBAL_DYNAMIC:
9273 if (TARGET_GNU2_TLS)
9275 gcc_assert (!TARGET_FDPIC);
9277 reg = arm_tls_descseq_addr (x, reg);
9279 tp = arm_load_tp (NULL_RTX);
9281 dest = gen_rtx_PLUS (Pmode, tp, reg);
9283 else
9285 /* Original scheme */
9286 if (TARGET_FDPIC)
9287 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9288 else
9289 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9290 dest = gen_reg_rtx (Pmode);
9291 emit_libcall_block (insns, dest, ret, x);
9293 return dest;
9295 case TLS_MODEL_LOCAL_DYNAMIC:
9296 if (TARGET_GNU2_TLS)
9298 gcc_assert (!TARGET_FDPIC);
9300 reg = arm_tls_descseq_addr (x, reg);
9302 tp = arm_load_tp (NULL_RTX);
9304 dest = gen_rtx_PLUS (Pmode, tp, reg);
9306 else
9308 if (TARGET_FDPIC)
9309 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9310 else
9311 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9313 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9314 share the LDM result with other LD model accesses. */
9315 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9316 UNSPEC_TLS);
9317 dest = gen_reg_rtx (Pmode);
9318 emit_libcall_block (insns, dest, ret, eqv);
9320 /* Load the addend. */
9321 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9322 GEN_INT (TLS_LDO32)),
9323 UNSPEC_TLS);
9324 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9325 dest = gen_rtx_PLUS (Pmode, dest, addend);
9327 return dest;
9329 case TLS_MODEL_INITIAL_EXEC:
9330 if (TARGET_FDPIC)
9332 sum = gen_rtx_UNSPEC (Pmode,
9333 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9334 UNSPEC_TLS);
9335 reg = load_tls_operand (sum, reg);
9336 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9337 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9339 else
9341 labelno = GEN_INT (pic_labelno++);
9342 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9343 label = gen_rtx_CONST (VOIDmode, label);
9344 sum = gen_rtx_UNSPEC (Pmode,
9345 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9346 GEN_INT (TARGET_ARM ? 8 : 4)),
9347 UNSPEC_TLS);
9348 reg = load_tls_operand (sum, reg);
9350 if (TARGET_ARM)
9351 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9352 else if (TARGET_THUMB2)
9353 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9354 else
9356 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9357 emit_move_insn (reg, gen_const_mem (SImode, reg));
9361 tp = arm_load_tp (NULL_RTX);
9363 return gen_rtx_PLUS (Pmode, tp, reg);
9365 case TLS_MODEL_LOCAL_EXEC:
9366 tp = arm_load_tp (NULL_RTX);
9368 reg = gen_rtx_UNSPEC (Pmode,
9369 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9370 UNSPEC_TLS);
9371 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9373 return gen_rtx_PLUS (Pmode, tp, reg);
9375 default:
9376 abort ();
9380 /* Try machine-dependent ways of modifying an illegitimate address
9381 to be legitimate. If we find one, return the new, valid address. */
9383 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9385 if (arm_tls_referenced_p (x))
9387 rtx addend = NULL;
9389 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9391 addend = XEXP (XEXP (x, 0), 1);
9392 x = XEXP (XEXP (x, 0), 0);
9395 if (!SYMBOL_REF_P (x))
9396 return x;
9398 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9400 x = legitimize_tls_address (x, NULL_RTX);
9402 if (addend)
9404 x = gen_rtx_PLUS (SImode, x, addend);
9405 orig_x = x;
9407 else
9408 return x;
9411 if (TARGET_THUMB1)
9412 return thumb_legitimize_address (x, orig_x, mode);
9414 if (GET_CODE (x) == PLUS)
9416 rtx xop0 = XEXP (x, 0);
9417 rtx xop1 = XEXP (x, 1);
9419 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9420 xop0 = force_reg (SImode, xop0);
9422 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9423 && !symbol_mentioned_p (xop1))
9424 xop1 = force_reg (SImode, xop1);
9426 if (ARM_BASE_REGISTER_RTX_P (xop0)
9427 && CONST_INT_P (xop1))
9429 HOST_WIDE_INT n, low_n;
9430 rtx base_reg, val;
9431 n = INTVAL (xop1);
9433 /* VFP addressing modes actually allow greater offsets, but for
9434 now we just stick with the lowest common denominator. */
9435 if (mode == DImode || mode == DFmode)
9437 low_n = n & 0x0f;
9438 n &= ~0x0f;
9439 if (low_n > 4)
9441 n += 16;
9442 low_n -= 16;
9445 else
9447 low_n = ((mode) == TImode ? 0
9448 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9449 n -= low_n;
9452 base_reg = gen_reg_rtx (SImode);
9453 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9454 emit_move_insn (base_reg, val);
9455 x = plus_constant (Pmode, base_reg, low_n);
9457 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9458 x = gen_rtx_PLUS (SImode, xop0, xop1);
9461 /* XXX We don't allow MINUS any more -- see comment in
9462 arm_legitimate_address_outer_p (). */
9463 else if (GET_CODE (x) == MINUS)
9465 rtx xop0 = XEXP (x, 0);
9466 rtx xop1 = XEXP (x, 1);
9468 if (CONSTANT_P (xop0))
9469 xop0 = force_reg (SImode, xop0);
9471 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9472 xop1 = force_reg (SImode, xop1);
9474 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9475 x = gen_rtx_MINUS (SImode, xop0, xop1);
9478 /* Make sure to take full advantage of the pre-indexed addressing mode
9479 with absolute addresses which often allows for the base register to
9480 be factorized for multiple adjacent memory references, and it might
9481 even allows for the mini pool to be avoided entirely. */
9482 else if (CONST_INT_P (x) && optimize > 0)
9484 unsigned int bits;
9485 HOST_WIDE_INT mask, base, index;
9486 rtx base_reg;
9488 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9489 only use a 8-bit index. So let's use a 12-bit index for
9490 SImode only and hope that arm_gen_constant will enable LDRB
9491 to use more bits. */
9492 bits = (mode == SImode) ? 12 : 8;
9493 mask = (1 << bits) - 1;
9494 base = INTVAL (x) & ~mask;
9495 index = INTVAL (x) & mask;
9496 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9498 /* It'll most probably be more efficient to generate the
9499 base with more bits set and use a negative index instead.
9500 Don't do this for Thumb as negative offsets are much more
9501 limited. */
9502 base |= mask;
9503 index -= mask;
9505 base_reg = force_reg (SImode, GEN_INT (base));
9506 x = plus_constant (Pmode, base_reg, index);
9509 if (flag_pic)
9511 /* We need to find and carefully transform any SYMBOL and LABEL
9512 references; so go back to the original address expression. */
9513 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9514 false /*compute_now*/);
9516 if (new_x != orig_x)
9517 x = new_x;
9520 return x;
9524 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9525 to be legitimate. If we find one, return the new, valid address. */
9527 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9529 if (GET_CODE (x) == PLUS
9530 && CONST_INT_P (XEXP (x, 1))
9531 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9532 || INTVAL (XEXP (x, 1)) < 0))
9534 rtx xop0 = XEXP (x, 0);
9535 rtx xop1 = XEXP (x, 1);
9536 HOST_WIDE_INT offset = INTVAL (xop1);
9538 /* Try and fold the offset into a biasing of the base register and
9539 then offsetting that. Don't do this when optimizing for space
9540 since it can cause too many CSEs. */
9541 if (optimize_size && offset >= 0
9542 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9544 HOST_WIDE_INT delta;
9546 if (offset >= 256)
9547 delta = offset - (256 - GET_MODE_SIZE (mode));
9548 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9549 delta = 31 * GET_MODE_SIZE (mode);
9550 else
9551 delta = offset & (~31 * GET_MODE_SIZE (mode));
9553 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9554 NULL_RTX);
9555 x = plus_constant (Pmode, xop0, delta);
9557 else if (offset < 0 && offset > -256)
9558 /* Small negative offsets are best done with a subtract before the
9559 dereference, forcing these into a register normally takes two
9560 instructions. */
9561 x = force_operand (x, NULL_RTX);
9562 else
9564 /* For the remaining cases, force the constant into a register. */
9565 xop1 = force_reg (SImode, xop1);
9566 x = gen_rtx_PLUS (SImode, xop0, xop1);
9569 else if (GET_CODE (x) == PLUS
9570 && s_register_operand (XEXP (x, 1), SImode)
9571 && !s_register_operand (XEXP (x, 0), SImode))
9573 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9575 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9578 if (flag_pic)
9580 /* We need to find and carefully transform any SYMBOL and LABEL
9581 references; so go back to the original address expression. */
9582 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9583 false /*compute_now*/);
9585 if (new_x != orig_x)
9586 x = new_x;
9589 return x;
9592 /* Return TRUE if X contains any TLS symbol references. */
9594 bool
9595 arm_tls_referenced_p (rtx x)
9597 if (! TARGET_HAVE_TLS)
9598 return false;
9600 subrtx_iterator::array_type array;
9601 FOR_EACH_SUBRTX (iter, array, x, ALL)
9603 const_rtx x = *iter;
9604 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9606 /* ARM currently does not provide relocations to encode TLS variables
9607 into AArch32 instructions, only data, so there is no way to
9608 currently implement these if a literal pool is disabled. */
9609 if (arm_disable_literal_pool)
9610 sorry ("accessing thread-local storage is not currently supported "
9611 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9613 return true;
9616 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9617 TLS offsets, not real symbol references. */
9618 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9619 iter.skip_subrtxes ();
9621 return false;
9624 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9626 On the ARM, allow any integer (invalid ones are removed later by insn
9627 patterns), nice doubles and symbol_refs which refer to the function's
9628 constant pool XXX.
9630 When generating pic allow anything. */
9632 static bool
9633 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9635 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9636 return false;
9638 return flag_pic || !label_mentioned_p (x);
9641 static bool
9642 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9644 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9645 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9646 for ARMv8-M Baseline or later the result is valid. */
9647 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9648 x = XEXP (x, 0);
9650 return (CONST_INT_P (x)
9651 || CONST_DOUBLE_P (x)
9652 || CONSTANT_ADDRESS_P (x)
9653 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9654 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9655 we build the symbol address with upper/lower
9656 relocations. */
9657 || (TARGET_THUMB1
9658 && !label_mentioned_p (x)
9659 && arm_valid_symbolic_address_p (x)
9660 && arm_disable_literal_pool)
9661 || flag_pic);
9664 static bool
9665 arm_legitimate_constant_p (machine_mode mode, rtx x)
9667 return (!arm_cannot_force_const_mem (mode, x)
9668 && (TARGET_32BIT
9669 ? arm_legitimate_constant_p_1 (mode, x)
9670 : thumb_legitimate_constant_p (mode, x)));
9673 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9675 static bool
9676 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9678 rtx base, offset;
9679 split_const (x, &base, &offset);
9681 if (SYMBOL_REF_P (base))
9683 /* Function symbols cannot have an offset due to the Thumb bit. */
9684 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9685 && INTVAL (offset) != 0)
9686 return true;
9688 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9689 && !offset_within_block_p (base, INTVAL (offset)))
9690 return true;
9692 return arm_tls_referenced_p (x);
9695 #define REG_OR_SUBREG_REG(X) \
9696 (REG_P (X) \
9697 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9699 #define REG_OR_SUBREG_RTX(X) \
9700 (REG_P (X) ? (X) : SUBREG_REG (X))
9702 static inline int
9703 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9705 machine_mode mode = GET_MODE (x);
9706 int total, words;
9708 switch (code)
9710 case ASHIFT:
9711 case ASHIFTRT:
9712 case LSHIFTRT:
9713 case ROTATERT:
9714 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9716 case PLUS:
9717 case MINUS:
9718 case COMPARE:
9719 case NEG:
9720 case NOT:
9721 return COSTS_N_INSNS (1);
9723 case MULT:
9724 if (arm_arch6m && arm_m_profile_small_mul)
9725 return COSTS_N_INSNS (32);
9727 if (CONST_INT_P (XEXP (x, 1)))
9729 int cycles = 0;
9730 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9732 while (i)
9734 i >>= 2;
9735 cycles++;
9737 return COSTS_N_INSNS (2) + cycles;
9739 return COSTS_N_INSNS (1) + 16;
9741 case SET:
9742 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9743 the mode. */
9744 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9745 return (COSTS_N_INSNS (words)
9746 + 4 * ((MEM_P (SET_SRC (x)))
9747 + MEM_P (SET_DEST (x))));
9749 case CONST_INT:
9750 if (outer == SET)
9752 if (UINTVAL (x) < 256
9753 /* 16-bit constant. */
9754 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9755 return 0;
9756 if (thumb_shiftable_const (INTVAL (x)))
9757 return COSTS_N_INSNS (2);
9758 return arm_disable_literal_pool
9759 ? COSTS_N_INSNS (8)
9760 : COSTS_N_INSNS (3);
9762 else if ((outer == PLUS || outer == COMPARE)
9763 && INTVAL (x) < 256 && INTVAL (x) > -256)
9764 return 0;
9765 else if ((outer == IOR || outer == XOR || outer == AND)
9766 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9767 return COSTS_N_INSNS (1);
9768 else if (outer == AND)
9770 int i;
9771 /* This duplicates the tests in the andsi3 expander. */
9772 for (i = 9; i <= 31; i++)
9773 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9774 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9775 return COSTS_N_INSNS (2);
9777 else if (outer == ASHIFT || outer == ASHIFTRT
9778 || outer == LSHIFTRT)
9779 return 0;
9780 return COSTS_N_INSNS (2);
9782 case CONST:
9783 case CONST_DOUBLE:
9784 case LABEL_REF:
9785 case SYMBOL_REF:
9786 return COSTS_N_INSNS (3);
9788 case UDIV:
9789 case UMOD:
9790 case DIV:
9791 case MOD:
9792 return 100;
9794 case TRUNCATE:
9795 return 99;
9797 case AND:
9798 case XOR:
9799 case IOR:
9800 /* XXX guess. */
9801 return 8;
9803 case MEM:
9804 /* XXX another guess. */
9805 /* Memory costs quite a lot for the first word, but subsequent words
9806 load at the equivalent of a single insn each. */
9807 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9808 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9809 ? 4 : 0));
9811 case IF_THEN_ELSE:
9812 /* XXX a guess. */
9813 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9814 return 14;
9815 return 2;
9817 case SIGN_EXTEND:
9818 case ZERO_EXTEND:
9819 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9820 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9822 if (mode == SImode)
9823 return total;
9825 if (arm_arch6)
9826 return total + COSTS_N_INSNS (1);
9828 /* Assume a two-shift sequence. Increase the cost slightly so
9829 we prefer actual shifts over an extend operation. */
9830 return total + 1 + COSTS_N_INSNS (2);
9832 default:
9833 return 99;
9837 /* Estimates the size cost of thumb1 instructions.
9838 For now most of the code is copied from thumb1_rtx_costs. We need more
9839 fine grain tuning when we have more related test cases. */
9840 static inline int
9841 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9843 machine_mode mode = GET_MODE (x);
9844 int words, cost;
9846 switch (code)
9848 case ASHIFT:
9849 case ASHIFTRT:
9850 case LSHIFTRT:
9851 case ROTATERT:
9852 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9854 case PLUS:
9855 case MINUS:
9856 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9857 defined by RTL expansion, especially for the expansion of
9858 multiplication. */
9859 if ((GET_CODE (XEXP (x, 0)) == MULT
9860 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9861 || (GET_CODE (XEXP (x, 1)) == MULT
9862 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9863 return COSTS_N_INSNS (2);
9864 /* Fall through. */
9865 case COMPARE:
9866 case NEG:
9867 case NOT:
9868 return COSTS_N_INSNS (1);
9870 case MULT:
9871 if (CONST_INT_P (XEXP (x, 1)))
9873 /* Thumb1 mul instruction can't operate on const. We must Load it
9874 into a register first. */
9875 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9876 /* For the targets which have a very small and high-latency multiply
9877 unit, we prefer to synthesize the mult with up to 5 instructions,
9878 giving a good balance between size and performance. */
9879 if (arm_arch6m && arm_m_profile_small_mul)
9880 return COSTS_N_INSNS (5);
9881 else
9882 return COSTS_N_INSNS (1) + const_size;
9884 return COSTS_N_INSNS (1);
9886 case SET:
9887 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9888 the mode. */
9889 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9890 cost = COSTS_N_INSNS (words);
9891 if (satisfies_constraint_J (SET_SRC (x))
9892 || satisfies_constraint_K (SET_SRC (x))
9893 /* Too big an immediate for a 2-byte mov, using MOVT. */
9894 || (CONST_INT_P (SET_SRC (x))
9895 && UINTVAL (SET_SRC (x)) >= 256
9896 && TARGET_HAVE_MOVT
9897 && satisfies_constraint_j (SET_SRC (x)))
9898 /* thumb1_movdi_insn. */
9899 || ((words > 1) && MEM_P (SET_SRC (x))))
9900 cost += COSTS_N_INSNS (1);
9901 return cost;
9903 case CONST_INT:
9904 if (outer == SET)
9906 if (UINTVAL (x) < 256)
9907 return COSTS_N_INSNS (1);
9908 /* movw is 4byte long. */
9909 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9910 return COSTS_N_INSNS (2);
9911 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9912 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9913 return COSTS_N_INSNS (2);
9914 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9915 if (thumb_shiftable_const (INTVAL (x)))
9916 return COSTS_N_INSNS (2);
9917 return arm_disable_literal_pool
9918 ? COSTS_N_INSNS (8)
9919 : COSTS_N_INSNS (3);
9921 else if ((outer == PLUS || outer == COMPARE)
9922 && INTVAL (x) < 256 && INTVAL (x) > -256)
9923 return 0;
9924 else if ((outer == IOR || outer == XOR || outer == AND)
9925 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9926 return COSTS_N_INSNS (1);
9927 else if (outer == AND)
9929 int i;
9930 /* This duplicates the tests in the andsi3 expander. */
9931 for (i = 9; i <= 31; i++)
9932 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9933 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9934 return COSTS_N_INSNS (2);
9936 else if (outer == ASHIFT || outer == ASHIFTRT
9937 || outer == LSHIFTRT)
9938 return 0;
9939 return COSTS_N_INSNS (2);
9941 case CONST:
9942 case CONST_DOUBLE:
9943 case LABEL_REF:
9944 case SYMBOL_REF:
9945 return COSTS_N_INSNS (3);
9947 case UDIV:
9948 case UMOD:
9949 case DIV:
9950 case MOD:
9951 return 100;
9953 case TRUNCATE:
9954 return 99;
9956 case AND:
9957 case XOR:
9958 case IOR:
9959 return COSTS_N_INSNS (1);
9961 case MEM:
9962 return (COSTS_N_INSNS (1)
9963 + COSTS_N_INSNS (1)
9964 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9965 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9966 ? COSTS_N_INSNS (1) : 0));
9968 case IF_THEN_ELSE:
9969 /* XXX a guess. */
9970 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9971 return 14;
9972 return 2;
9974 case ZERO_EXTEND:
9975 /* XXX still guessing. */
9976 switch (GET_MODE (XEXP (x, 0)))
9978 case E_QImode:
9979 return (1 + (mode == DImode ? 4 : 0)
9980 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9982 case E_HImode:
9983 return (4 + (mode == DImode ? 4 : 0)
9984 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9986 case E_SImode:
9987 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9989 default:
9990 return 99;
9993 default:
9994 return 99;
9998 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9999 PLUS, adds the carry flag, then return the other operand. If
10000 neither is a carry, return OP unchanged. */
10001 static rtx
10002 strip_carry_operation (rtx op)
10004 gcc_assert (GET_CODE (op) == PLUS);
10005 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10006 return XEXP (op, 1);
10007 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10008 return XEXP (op, 0);
10009 return op;
10012 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10013 operand, then return the operand that is being shifted. If the shift
10014 is not by a constant, then set SHIFT_REG to point to the operand.
10015 Return NULL if OP is not a shifter operand. */
10016 static rtx
10017 shifter_op_p (rtx op, rtx *shift_reg)
10019 enum rtx_code code = GET_CODE (op);
10021 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10022 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10023 return XEXP (op, 0);
10024 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10025 return XEXP (op, 0);
10026 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10027 || code == ASHIFTRT)
10029 if (!CONST_INT_P (XEXP (op, 1)))
10030 *shift_reg = XEXP (op, 1);
10031 return XEXP (op, 0);
10034 return NULL;
10037 static bool
10038 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10040 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10041 rtx_code code = GET_CODE (x);
10042 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10044 switch (XINT (x, 1))
10046 case UNSPEC_UNALIGNED_LOAD:
10047 /* We can only do unaligned loads into the integer unit, and we can't
10048 use LDM or LDRD. */
10049 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10050 if (speed_p)
10051 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10052 + extra_cost->ldst.load_unaligned);
10054 #ifdef NOT_YET
10055 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10056 ADDR_SPACE_GENERIC, speed_p);
10057 #endif
10058 return true;
10060 case UNSPEC_UNALIGNED_STORE:
10061 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10062 if (speed_p)
10063 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10064 + extra_cost->ldst.store_unaligned);
10066 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10067 #ifdef NOT_YET
10068 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10069 ADDR_SPACE_GENERIC, speed_p);
10070 #endif
10071 return true;
10073 case UNSPEC_VRINTZ:
10074 case UNSPEC_VRINTP:
10075 case UNSPEC_VRINTM:
10076 case UNSPEC_VRINTR:
10077 case UNSPEC_VRINTX:
10078 case UNSPEC_VRINTA:
10079 if (speed_p)
10080 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10082 return true;
10083 default:
10084 *cost = COSTS_N_INSNS (2);
10085 break;
10087 return true;
10090 /* Cost of a libcall. We assume one insn per argument, an amount for the
10091 call (one insn for -Os) and then one for processing the result. */
10092 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10094 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10095 do \
10097 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10098 if (shift_op != NULL \
10099 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10101 if (shift_reg) \
10103 if (speed_p) \
10104 *cost += extra_cost->alu.arith_shift_reg; \
10105 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10106 ASHIFT, 1, speed_p); \
10108 else if (speed_p) \
10109 *cost += extra_cost->alu.arith_shift; \
10111 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10112 ASHIFT, 0, speed_p) \
10113 + rtx_cost (XEXP (x, 1 - IDX), \
10114 GET_MODE (shift_op), \
10115 OP, 1, speed_p)); \
10116 return true; \
10119 while (0)
10121 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10122 considering the costs of the addressing mode and memory access
10123 separately. */
10124 static bool
10125 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10126 int *cost, bool speed_p)
10128 machine_mode mode = GET_MODE (x);
10130 *cost = COSTS_N_INSNS (1);
10132 if (flag_pic
10133 && GET_CODE (XEXP (x, 0)) == PLUS
10134 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10135 /* This will be split into two instructions. Add the cost of the
10136 additional instruction here. The cost of the memory access is computed
10137 below. See arm.md:calculate_pic_address. */
10138 *cost += COSTS_N_INSNS (1);
10140 /* Calculate cost of the addressing mode. */
10141 if (speed_p)
10143 arm_addr_mode_op op_type;
10144 switch (GET_CODE (XEXP (x, 0)))
10146 default:
10147 case REG:
10148 op_type = AMO_DEFAULT;
10149 break;
10150 case MINUS:
10151 /* MINUS does not appear in RTL, but the architecture supports it,
10152 so handle this case defensively. */
10153 /* fall through */
10154 case PLUS:
10155 op_type = AMO_NO_WB;
10156 break;
10157 case PRE_INC:
10158 case PRE_DEC:
10159 case POST_INC:
10160 case POST_DEC:
10161 case PRE_MODIFY:
10162 case POST_MODIFY:
10163 op_type = AMO_WB;
10164 break;
10167 if (VECTOR_MODE_P (mode))
10168 *cost += current_tune->addr_mode_costs->vector[op_type];
10169 else if (FLOAT_MODE_P (mode))
10170 *cost += current_tune->addr_mode_costs->fp[op_type];
10171 else
10172 *cost += current_tune->addr_mode_costs->integer[op_type];
10175 /* Calculate cost of memory access. */
10176 if (speed_p)
10178 if (FLOAT_MODE_P (mode))
10180 if (GET_MODE_SIZE (mode) == 8)
10181 *cost += extra_cost->ldst.loadd;
10182 else
10183 *cost += extra_cost->ldst.loadf;
10185 else if (VECTOR_MODE_P (mode))
10186 *cost += extra_cost->ldst.loadv;
10187 else
10189 /* Integer modes */
10190 if (GET_MODE_SIZE (mode) == 8)
10191 *cost += extra_cost->ldst.ldrd;
10192 else
10193 *cost += extra_cost->ldst.load;
10197 return true;
10200 /* Helper for arm_bfi_p. */
10201 static bool
10202 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10204 unsigned HOST_WIDE_INT const1;
10205 unsigned HOST_WIDE_INT const2 = 0;
10207 if (!CONST_INT_P (XEXP (op0, 1)))
10208 return false;
10210 const1 = UINTVAL (XEXP (op0, 1));
10211 if (!CONST_INT_P (XEXP (op1, 1))
10212 || ~UINTVAL (XEXP (op1, 1)) != const1)
10213 return false;
10215 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10216 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10218 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10219 *sub0 = XEXP (XEXP (op0, 0), 0);
10221 else
10222 *sub0 = XEXP (op0, 0);
10224 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10225 return false;
10227 *sub1 = XEXP (op1, 0);
10228 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10231 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10232 format looks something like:
10234 (IOR (AND (reg1) (~const1))
10235 (AND (ASHIFT (reg2) (const2))
10236 (const1)))
10238 where const1 is a consecutive sequence of 1-bits with the
10239 least-significant non-zero bit starting at bit position const2. If
10240 const2 is zero, then the shift will not appear at all, due to
10241 canonicalization. The two arms of the IOR expression may be
10242 flipped. */
10243 static bool
10244 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10246 if (GET_CODE (x) != IOR)
10247 return false;
10248 if (GET_CODE (XEXP (x, 0)) != AND
10249 || GET_CODE (XEXP (x, 1)) != AND)
10250 return false;
10251 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10252 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10255 /* RTX costs. Make an estimate of the cost of executing the operation
10256 X, which is contained within an operation with code OUTER_CODE.
10257 SPEED_P indicates whether the cost desired is the performance cost,
10258 or the size cost. The estimate is stored in COST and the return
10259 value is TRUE if the cost calculation is final, or FALSE if the
10260 caller should recurse through the operands of X to add additional
10261 costs.
10263 We currently make no attempt to model the size savings of Thumb-2
10264 16-bit instructions. At the normal points in compilation where
10265 this code is called we have no measure of whether the condition
10266 flags are live or not, and thus no realistic way to determine what
10267 the size will eventually be. */
10268 static bool
10269 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10270 const struct cpu_cost_table *extra_cost,
10271 int *cost, bool speed_p)
10273 machine_mode mode = GET_MODE (x);
10275 *cost = COSTS_N_INSNS (1);
10277 if (TARGET_THUMB1)
10279 if (speed_p)
10280 *cost = thumb1_rtx_costs (x, code, outer_code);
10281 else
10282 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10283 return true;
10286 switch (code)
10288 case SET:
10289 *cost = 0;
10290 /* SET RTXs don't have a mode so we get it from the destination. */
10291 mode = GET_MODE (SET_DEST (x));
10293 if (REG_P (SET_SRC (x))
10294 && REG_P (SET_DEST (x)))
10296 /* Assume that most copies can be done with a single insn,
10297 unless we don't have HW FP, in which case everything
10298 larger than word mode will require two insns. */
10299 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10300 && GET_MODE_SIZE (mode) > 4)
10301 || mode == DImode)
10302 ? 2 : 1);
10303 /* Conditional register moves can be encoded
10304 in 16 bits in Thumb mode. */
10305 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10306 *cost >>= 1;
10308 return true;
10311 if (CONST_INT_P (SET_SRC (x)))
10313 /* Handle CONST_INT here, since the value doesn't have a mode
10314 and we would otherwise be unable to work out the true cost. */
10315 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10316 0, speed_p);
10317 outer_code = SET;
10318 /* Slightly lower the cost of setting a core reg to a constant.
10319 This helps break up chains and allows for better scheduling. */
10320 if (REG_P (SET_DEST (x))
10321 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10322 *cost -= 1;
10323 x = SET_SRC (x);
10324 /* Immediate moves with an immediate in the range [0, 255] can be
10325 encoded in 16 bits in Thumb mode. */
10326 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10327 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10328 *cost >>= 1;
10329 goto const_int_cost;
10332 return false;
10334 case MEM:
10335 return arm_mem_costs (x, extra_cost, cost, speed_p);
10337 case PARALLEL:
10339 /* Calculations of LDM costs are complex. We assume an initial cost
10340 (ldm_1st) which will load the number of registers mentioned in
10341 ldm_regs_per_insn_1st registers; then each additional
10342 ldm_regs_per_insn_subsequent registers cost one more insn. The
10343 formula for N regs is thus:
10345 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10346 + ldm_regs_per_insn_subsequent - 1)
10347 / ldm_regs_per_insn_subsequent).
10349 Additional costs may also be added for addressing. A similar
10350 formula is used for STM. */
10352 bool is_ldm = load_multiple_operation (x, SImode);
10353 bool is_stm = store_multiple_operation (x, SImode);
10355 if (is_ldm || is_stm)
10357 if (speed_p)
10359 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10360 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10361 ? extra_cost->ldst.ldm_regs_per_insn_1st
10362 : extra_cost->ldst.stm_regs_per_insn_1st;
10363 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10364 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10365 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10367 *cost += regs_per_insn_1st
10368 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10369 + regs_per_insn_sub - 1)
10370 / regs_per_insn_sub);
10371 return true;
10375 return false;
10377 case DIV:
10378 case UDIV:
10379 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10380 && (mode == SFmode || !TARGET_VFP_SINGLE))
10381 *cost += COSTS_N_INSNS (speed_p
10382 ? extra_cost->fp[mode != SFmode].div : 0);
10383 else if (mode == SImode && TARGET_IDIV)
10384 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10385 else
10386 *cost = LIBCALL_COST (2);
10388 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10389 possible udiv is prefered. */
10390 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10391 return false; /* All arguments must be in registers. */
10393 case MOD:
10394 /* MOD by a power of 2 can be expanded as:
10395 rsbs r1, r0, #0
10396 and r0, r0, #(n - 1)
10397 and r1, r1, #(n - 1)
10398 rsbpl r0, r1, #0. */
10399 if (CONST_INT_P (XEXP (x, 1))
10400 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10401 && mode == SImode)
10403 *cost += COSTS_N_INSNS (3);
10405 if (speed_p)
10406 *cost += 2 * extra_cost->alu.logical
10407 + extra_cost->alu.arith;
10408 return true;
10411 /* Fall-through. */
10412 case UMOD:
10413 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10414 possible udiv is prefered. */
10415 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10416 return false; /* All arguments must be in registers. */
10418 case ROTATE:
10419 if (mode == SImode && REG_P (XEXP (x, 1)))
10421 *cost += (COSTS_N_INSNS (1)
10422 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10423 if (speed_p)
10424 *cost += extra_cost->alu.shift_reg;
10425 return true;
10427 /* Fall through */
10428 case ROTATERT:
10429 case ASHIFT:
10430 case LSHIFTRT:
10431 case ASHIFTRT:
10432 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10434 *cost += (COSTS_N_INSNS (2)
10435 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10436 if (speed_p)
10437 *cost += 2 * extra_cost->alu.shift;
10438 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10439 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10440 *cost += 1;
10441 return true;
10443 else if (mode == SImode)
10445 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10446 /* Slightly disparage register shifts at -Os, but not by much. */
10447 if (!CONST_INT_P (XEXP (x, 1)))
10448 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10449 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10450 return true;
10452 else if (GET_MODE_CLASS (mode) == MODE_INT
10453 && GET_MODE_SIZE (mode) < 4)
10455 if (code == ASHIFT)
10457 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10458 /* Slightly disparage register shifts at -Os, but not by
10459 much. */
10460 if (!CONST_INT_P (XEXP (x, 1)))
10461 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10462 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10464 else if (code == LSHIFTRT || code == ASHIFTRT)
10466 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10468 /* Can use SBFX/UBFX. */
10469 if (speed_p)
10470 *cost += extra_cost->alu.bfx;
10471 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10473 else
10475 *cost += COSTS_N_INSNS (1);
10476 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10477 if (speed_p)
10479 if (CONST_INT_P (XEXP (x, 1)))
10480 *cost += 2 * extra_cost->alu.shift;
10481 else
10482 *cost += (extra_cost->alu.shift
10483 + extra_cost->alu.shift_reg);
10485 else
10486 /* Slightly disparage register shifts. */
10487 *cost += !CONST_INT_P (XEXP (x, 1));
10490 else /* Rotates. */
10492 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10493 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10494 if (speed_p)
10496 if (CONST_INT_P (XEXP (x, 1)))
10497 *cost += (2 * extra_cost->alu.shift
10498 + extra_cost->alu.log_shift);
10499 else
10500 *cost += (extra_cost->alu.shift
10501 + extra_cost->alu.shift_reg
10502 + extra_cost->alu.log_shift_reg);
10505 return true;
10508 *cost = LIBCALL_COST (2);
10509 return false;
10511 case BSWAP:
10512 if (arm_arch6)
10514 if (mode == SImode)
10516 if (speed_p)
10517 *cost += extra_cost->alu.rev;
10519 return false;
10522 else
10524 /* No rev instruction available. Look at arm_legacy_rev
10525 and thumb_legacy_rev for the form of RTL used then. */
10526 if (TARGET_THUMB)
10528 *cost += COSTS_N_INSNS (9);
10530 if (speed_p)
10532 *cost += 6 * extra_cost->alu.shift;
10533 *cost += 3 * extra_cost->alu.logical;
10536 else
10538 *cost += COSTS_N_INSNS (4);
10540 if (speed_p)
10542 *cost += 2 * extra_cost->alu.shift;
10543 *cost += extra_cost->alu.arith_shift;
10544 *cost += 2 * extra_cost->alu.logical;
10547 return true;
10549 return false;
10551 case MINUS:
10552 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10553 && (mode == SFmode || !TARGET_VFP_SINGLE))
10555 if (GET_CODE (XEXP (x, 0)) == MULT
10556 || GET_CODE (XEXP (x, 1)) == MULT)
10558 rtx mul_op0, mul_op1, sub_op;
10560 if (speed_p)
10561 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10563 if (GET_CODE (XEXP (x, 0)) == MULT)
10565 mul_op0 = XEXP (XEXP (x, 0), 0);
10566 mul_op1 = XEXP (XEXP (x, 0), 1);
10567 sub_op = XEXP (x, 1);
10569 else
10571 mul_op0 = XEXP (XEXP (x, 1), 0);
10572 mul_op1 = XEXP (XEXP (x, 1), 1);
10573 sub_op = XEXP (x, 0);
10576 /* The first operand of the multiply may be optionally
10577 negated. */
10578 if (GET_CODE (mul_op0) == NEG)
10579 mul_op0 = XEXP (mul_op0, 0);
10581 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10582 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10583 + rtx_cost (sub_op, mode, code, 0, speed_p));
10585 return true;
10588 if (speed_p)
10589 *cost += extra_cost->fp[mode != SFmode].addsub;
10590 return false;
10593 if (mode == SImode)
10595 rtx shift_by_reg = NULL;
10596 rtx shift_op;
10597 rtx non_shift_op;
10598 rtx op0 = XEXP (x, 0);
10599 rtx op1 = XEXP (x, 1);
10601 /* Factor out any borrow operation. There's more than one way
10602 of expressing this; try to recognize them all. */
10603 if (GET_CODE (op0) == MINUS)
10605 if (arm_borrow_operation (op1, SImode))
10607 op1 = XEXP (op0, 1);
10608 op0 = XEXP (op0, 0);
10610 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10611 op0 = XEXP (op0, 0);
10613 else if (GET_CODE (op1) == PLUS
10614 && arm_borrow_operation (XEXP (op1, 0), SImode))
10615 op1 = XEXP (op1, 0);
10616 else if (GET_CODE (op0) == NEG
10617 && arm_borrow_operation (op1, SImode))
10619 /* Negate with carry-in. For Thumb2 this is done with
10620 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10621 RSC instruction that exists in Arm mode. */
10622 if (speed_p)
10623 *cost += (TARGET_THUMB2
10624 ? extra_cost->alu.arith_shift
10625 : extra_cost->alu.arith);
10626 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10627 return true;
10629 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10630 Note we do mean ~borrow here. */
10631 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10633 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10634 return true;
10637 shift_op = shifter_op_p (op0, &shift_by_reg);
10638 if (shift_op == NULL)
10640 shift_op = shifter_op_p (op1, &shift_by_reg);
10641 non_shift_op = op0;
10643 else
10644 non_shift_op = op1;
10646 if (shift_op != NULL)
10648 if (shift_by_reg != NULL)
10650 if (speed_p)
10651 *cost += extra_cost->alu.arith_shift_reg;
10652 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10654 else if (speed_p)
10655 *cost += extra_cost->alu.arith_shift;
10657 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10658 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10659 return true;
10662 if (arm_arch_thumb2
10663 && GET_CODE (XEXP (x, 1)) == MULT)
10665 /* MLS. */
10666 if (speed_p)
10667 *cost += extra_cost->mult[0].add;
10668 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10669 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10670 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10671 return true;
10674 if (CONST_INT_P (op0))
10676 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10677 INTVAL (op0), NULL_RTX,
10678 NULL_RTX, 1, 0);
10679 *cost = COSTS_N_INSNS (insns);
10680 if (speed_p)
10681 *cost += insns * extra_cost->alu.arith;
10682 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10683 return true;
10685 else if (speed_p)
10686 *cost += extra_cost->alu.arith;
10688 /* Don't recurse as we don't want to cost any borrow that
10689 we've stripped. */
10690 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10691 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10692 return true;
10695 if (GET_MODE_CLASS (mode) == MODE_INT
10696 && GET_MODE_SIZE (mode) < 4)
10698 rtx shift_op, shift_reg;
10699 shift_reg = NULL;
10701 /* We check both sides of the MINUS for shifter operands since,
10702 unlike PLUS, it's not commutative. */
10704 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10705 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10707 /* Slightly disparage, as we might need to widen the result. */
10708 *cost += 1;
10709 if (speed_p)
10710 *cost += extra_cost->alu.arith;
10712 if (CONST_INT_P (XEXP (x, 0)))
10714 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10715 return true;
10718 return false;
10721 if (mode == DImode)
10723 *cost += COSTS_N_INSNS (1);
10725 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10727 rtx op1 = XEXP (x, 1);
10729 if (speed_p)
10730 *cost += 2 * extra_cost->alu.arith;
10732 if (GET_CODE (op1) == ZERO_EXTEND)
10733 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10734 0, speed_p);
10735 else
10736 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10737 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10738 0, speed_p);
10739 return true;
10741 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10743 if (speed_p)
10744 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10745 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10746 0, speed_p)
10747 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10748 return true;
10750 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10751 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10753 if (speed_p)
10754 *cost += (extra_cost->alu.arith
10755 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10756 ? extra_cost->alu.arith
10757 : extra_cost->alu.arith_shift));
10758 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10759 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10760 GET_CODE (XEXP (x, 1)), 0, speed_p));
10761 return true;
10764 if (speed_p)
10765 *cost += 2 * extra_cost->alu.arith;
10766 return false;
10769 /* Vector mode? */
10771 *cost = LIBCALL_COST (2);
10772 return false;
10774 case PLUS:
10775 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10776 && (mode == SFmode || !TARGET_VFP_SINGLE))
10778 if (GET_CODE (XEXP (x, 0)) == MULT)
10780 rtx mul_op0, mul_op1, add_op;
10782 if (speed_p)
10783 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10785 mul_op0 = XEXP (XEXP (x, 0), 0);
10786 mul_op1 = XEXP (XEXP (x, 0), 1);
10787 add_op = XEXP (x, 1);
10789 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10790 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10791 + rtx_cost (add_op, mode, code, 0, speed_p));
10793 return true;
10796 if (speed_p)
10797 *cost += extra_cost->fp[mode != SFmode].addsub;
10798 return false;
10800 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10802 *cost = LIBCALL_COST (2);
10803 return false;
10806 /* Narrow modes can be synthesized in SImode, but the range
10807 of useful sub-operations is limited. Check for shift operations
10808 on one of the operands. Only left shifts can be used in the
10809 narrow modes. */
10810 if (GET_MODE_CLASS (mode) == MODE_INT
10811 && GET_MODE_SIZE (mode) < 4)
10813 rtx shift_op, shift_reg;
10814 shift_reg = NULL;
10816 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10818 if (CONST_INT_P (XEXP (x, 1)))
10820 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10821 INTVAL (XEXP (x, 1)), NULL_RTX,
10822 NULL_RTX, 1, 0);
10823 *cost = COSTS_N_INSNS (insns);
10824 if (speed_p)
10825 *cost += insns * extra_cost->alu.arith;
10826 /* Slightly penalize a narrow operation as the result may
10827 need widening. */
10828 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10829 return true;
10832 /* Slightly penalize a narrow operation as the result may
10833 need widening. */
10834 *cost += 1;
10835 if (speed_p)
10836 *cost += extra_cost->alu.arith;
10838 return false;
10841 if (mode == SImode)
10843 rtx shift_op, shift_reg;
10845 if (TARGET_INT_SIMD
10846 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10847 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10849 /* UXTA[BH] or SXTA[BH]. */
10850 if (speed_p)
10851 *cost += extra_cost->alu.extend_arith;
10852 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10853 0, speed_p)
10854 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10855 return true;
10858 rtx op0 = XEXP (x, 0);
10859 rtx op1 = XEXP (x, 1);
10861 /* Handle a side effect of adding in the carry to an addition. */
10862 if (GET_CODE (op0) == PLUS
10863 && arm_carry_operation (op1, mode))
10865 op1 = XEXP (op0, 1);
10866 op0 = XEXP (op0, 0);
10868 else if (GET_CODE (op1) == PLUS
10869 && arm_carry_operation (op0, mode))
10871 op0 = XEXP (op1, 0);
10872 op1 = XEXP (op1, 1);
10874 else if (GET_CODE (op0) == PLUS)
10876 op0 = strip_carry_operation (op0);
10877 if (swap_commutative_operands_p (op0, op1))
10878 std::swap (op0, op1);
10881 if (arm_carry_operation (op0, mode))
10883 /* Adding the carry to a register is a canonicalization of
10884 adding 0 to the register plus the carry. */
10885 if (speed_p)
10886 *cost += extra_cost->alu.arith;
10887 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10888 return true;
10891 shift_reg = NULL;
10892 shift_op = shifter_op_p (op0, &shift_reg);
10893 if (shift_op != NULL)
10895 if (shift_reg)
10897 if (speed_p)
10898 *cost += extra_cost->alu.arith_shift_reg;
10899 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10901 else if (speed_p)
10902 *cost += extra_cost->alu.arith_shift;
10904 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10905 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10906 return true;
10909 if (GET_CODE (op0) == MULT)
10911 rtx mul_op = op0;
10913 if (TARGET_DSP_MULTIPLY
10914 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10915 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10916 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10917 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10918 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10919 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10920 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10921 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10922 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10923 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10924 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10925 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10926 == 16))))))
10928 /* SMLA[BT][BT]. */
10929 if (speed_p)
10930 *cost += extra_cost->mult[0].extend_add;
10931 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10932 SIGN_EXTEND, 0, speed_p)
10933 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10934 SIGN_EXTEND, 0, speed_p)
10935 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10936 return true;
10939 if (speed_p)
10940 *cost += extra_cost->mult[0].add;
10941 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10942 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10943 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10944 return true;
10947 if (CONST_INT_P (op1))
10949 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10950 INTVAL (op1), NULL_RTX,
10951 NULL_RTX, 1, 0);
10952 *cost = COSTS_N_INSNS (insns);
10953 if (speed_p)
10954 *cost += insns * extra_cost->alu.arith;
10955 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10956 return true;
10959 if (speed_p)
10960 *cost += extra_cost->alu.arith;
10962 /* Don't recurse here because we want to test the operands
10963 without any carry operation. */
10964 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10965 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10966 return true;
10969 if (mode == DImode)
10971 if (GET_CODE (XEXP (x, 0)) == MULT
10972 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10974 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10975 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10977 if (speed_p)
10978 *cost += extra_cost->mult[1].extend_add;
10979 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10980 ZERO_EXTEND, 0, speed_p)
10981 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10982 ZERO_EXTEND, 0, speed_p)
10983 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10984 return true;
10987 *cost += COSTS_N_INSNS (1);
10989 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10990 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10992 if (speed_p)
10993 *cost += (extra_cost->alu.arith
10994 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10995 ? extra_cost->alu.arith
10996 : extra_cost->alu.arith_shift));
10998 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10999 0, speed_p)
11000 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11001 return true;
11004 if (speed_p)
11005 *cost += 2 * extra_cost->alu.arith;
11006 return false;
11009 /* Vector mode? */
11010 *cost = LIBCALL_COST (2);
11011 return false;
11012 case IOR:
11014 rtx sub0, sub1;
11015 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11017 if (speed_p)
11018 *cost += extra_cost->alu.rev;
11020 return true;
11022 else if (mode == SImode && arm_arch_thumb2
11023 && arm_bfi_p (x, &sub0, &sub1))
11025 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11026 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11027 if (speed_p)
11028 *cost += extra_cost->alu.bfi;
11030 return true;
11034 /* Fall through. */
11035 case AND: case XOR:
11036 if (mode == SImode)
11038 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11039 rtx op0 = XEXP (x, 0);
11040 rtx shift_op, shift_reg;
11042 if (subcode == NOT
11043 && (code == AND
11044 || (code == IOR && TARGET_THUMB2)))
11045 op0 = XEXP (op0, 0);
11047 shift_reg = NULL;
11048 shift_op = shifter_op_p (op0, &shift_reg);
11049 if (shift_op != NULL)
11051 if (shift_reg)
11053 if (speed_p)
11054 *cost += extra_cost->alu.log_shift_reg;
11055 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11057 else if (speed_p)
11058 *cost += extra_cost->alu.log_shift;
11060 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11061 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11062 return true;
11065 if (CONST_INT_P (XEXP (x, 1)))
11067 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11068 INTVAL (XEXP (x, 1)), NULL_RTX,
11069 NULL_RTX, 1, 0);
11071 *cost = COSTS_N_INSNS (insns);
11072 if (speed_p)
11073 *cost += insns * extra_cost->alu.logical;
11074 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11075 return true;
11078 if (speed_p)
11079 *cost += extra_cost->alu.logical;
11080 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11081 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11082 return true;
11085 if (mode == DImode)
11087 rtx op0 = XEXP (x, 0);
11088 enum rtx_code subcode = GET_CODE (op0);
11090 *cost += COSTS_N_INSNS (1);
11092 if (subcode == NOT
11093 && (code == AND
11094 || (code == IOR && TARGET_THUMB2)))
11095 op0 = XEXP (op0, 0);
11097 if (GET_CODE (op0) == ZERO_EXTEND)
11099 if (speed_p)
11100 *cost += 2 * extra_cost->alu.logical;
11102 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11103 0, speed_p)
11104 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11105 return true;
11107 else if (GET_CODE (op0) == SIGN_EXTEND)
11109 if (speed_p)
11110 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11112 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11113 0, speed_p)
11114 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11115 return true;
11118 if (speed_p)
11119 *cost += 2 * extra_cost->alu.logical;
11121 return true;
11123 /* Vector mode? */
11125 *cost = LIBCALL_COST (2);
11126 return false;
11128 case MULT:
11129 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11130 && (mode == SFmode || !TARGET_VFP_SINGLE))
11132 rtx op0 = XEXP (x, 0);
11134 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11135 op0 = XEXP (op0, 0);
11137 if (speed_p)
11138 *cost += extra_cost->fp[mode != SFmode].mult;
11140 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11141 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11142 return true;
11144 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11146 *cost = LIBCALL_COST (2);
11147 return false;
11150 if (mode == SImode)
11152 if (TARGET_DSP_MULTIPLY
11153 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11154 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11155 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11156 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11157 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11158 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11159 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11160 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11161 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11162 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11163 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11164 && (INTVAL (XEXP (XEXP (x, 1), 1))
11165 == 16))))))
11167 /* SMUL[TB][TB]. */
11168 if (speed_p)
11169 *cost += extra_cost->mult[0].extend;
11170 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11171 SIGN_EXTEND, 0, speed_p);
11172 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11173 SIGN_EXTEND, 1, speed_p);
11174 return true;
11176 if (speed_p)
11177 *cost += extra_cost->mult[0].simple;
11178 return false;
11181 if (mode == DImode)
11183 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11184 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11185 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11186 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11188 if (speed_p)
11189 *cost += extra_cost->mult[1].extend;
11190 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11191 ZERO_EXTEND, 0, speed_p)
11192 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11193 ZERO_EXTEND, 0, speed_p));
11194 return true;
11197 *cost = LIBCALL_COST (2);
11198 return false;
11201 /* Vector mode? */
11202 *cost = LIBCALL_COST (2);
11203 return false;
11205 case NEG:
11206 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11207 && (mode == SFmode || !TARGET_VFP_SINGLE))
11209 if (GET_CODE (XEXP (x, 0)) == MULT)
11211 /* VNMUL. */
11212 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11213 return true;
11216 if (speed_p)
11217 *cost += extra_cost->fp[mode != SFmode].neg;
11219 return false;
11221 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11223 *cost = LIBCALL_COST (1);
11224 return false;
11227 if (mode == SImode)
11229 if (GET_CODE (XEXP (x, 0)) == ABS)
11231 *cost += COSTS_N_INSNS (1);
11232 /* Assume the non-flag-changing variant. */
11233 if (speed_p)
11234 *cost += (extra_cost->alu.log_shift
11235 + extra_cost->alu.arith_shift);
11236 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11237 return true;
11240 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11241 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11243 *cost += COSTS_N_INSNS (1);
11244 /* No extra cost for MOV imm and MVN imm. */
11245 /* If the comparison op is using the flags, there's no further
11246 cost, otherwise we need to add the cost of the comparison. */
11247 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11248 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11249 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11251 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11252 *cost += (COSTS_N_INSNS (1)
11253 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11254 0, speed_p)
11255 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11256 1, speed_p));
11257 if (speed_p)
11258 *cost += extra_cost->alu.arith;
11260 return true;
11263 if (speed_p)
11264 *cost += extra_cost->alu.arith;
11265 return false;
11268 if (GET_MODE_CLASS (mode) == MODE_INT
11269 && GET_MODE_SIZE (mode) < 4)
11271 /* Slightly disparage, as we might need an extend operation. */
11272 *cost += 1;
11273 if (speed_p)
11274 *cost += extra_cost->alu.arith;
11275 return false;
11278 if (mode == DImode)
11280 *cost += COSTS_N_INSNS (1);
11281 if (speed_p)
11282 *cost += 2 * extra_cost->alu.arith;
11283 return false;
11286 /* Vector mode? */
11287 *cost = LIBCALL_COST (1);
11288 return false;
11290 case NOT:
11291 if (mode == SImode)
11293 rtx shift_op;
11294 rtx shift_reg = NULL;
11296 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11298 if (shift_op)
11300 if (shift_reg != NULL)
11302 if (speed_p)
11303 *cost += extra_cost->alu.log_shift_reg;
11304 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11306 else if (speed_p)
11307 *cost += extra_cost->alu.log_shift;
11308 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11309 return true;
11312 if (speed_p)
11313 *cost += extra_cost->alu.logical;
11314 return false;
11316 if (mode == DImode)
11318 *cost += COSTS_N_INSNS (1);
11319 return false;
11322 /* Vector mode? */
11324 *cost += LIBCALL_COST (1);
11325 return false;
11327 case IF_THEN_ELSE:
11329 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11331 *cost += COSTS_N_INSNS (3);
11332 return true;
11334 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11335 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11337 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11338 /* Assume that if one arm of the if_then_else is a register,
11339 that it will be tied with the result and eliminate the
11340 conditional insn. */
11341 if (REG_P (XEXP (x, 1)))
11342 *cost += op2cost;
11343 else if (REG_P (XEXP (x, 2)))
11344 *cost += op1cost;
11345 else
11347 if (speed_p)
11349 if (extra_cost->alu.non_exec_costs_exec)
11350 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11351 else
11352 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11354 else
11355 *cost += op1cost + op2cost;
11358 return true;
11360 case COMPARE:
11361 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11362 *cost = 0;
11363 else
11365 machine_mode op0mode;
11366 /* We'll mostly assume that the cost of a compare is the cost of the
11367 LHS. However, there are some notable exceptions. */
11369 /* Floating point compares are never done as side-effects. */
11370 op0mode = GET_MODE (XEXP (x, 0));
11371 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11372 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11374 if (speed_p)
11375 *cost += extra_cost->fp[op0mode != SFmode].compare;
11377 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11379 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11380 return true;
11383 return false;
11385 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11387 *cost = LIBCALL_COST (2);
11388 return false;
11391 /* DImode compares normally take two insns. */
11392 if (op0mode == DImode)
11394 *cost += COSTS_N_INSNS (1);
11395 if (speed_p)
11396 *cost += 2 * extra_cost->alu.arith;
11397 return false;
11400 if (op0mode == SImode)
11402 rtx shift_op;
11403 rtx shift_reg;
11405 if (XEXP (x, 1) == const0_rtx
11406 && !(REG_P (XEXP (x, 0))
11407 || (GET_CODE (XEXP (x, 0)) == SUBREG
11408 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11410 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11412 /* Multiply operations that set the flags are often
11413 significantly more expensive. */
11414 if (speed_p
11415 && GET_CODE (XEXP (x, 0)) == MULT
11416 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11417 *cost += extra_cost->mult[0].flag_setting;
11419 if (speed_p
11420 && GET_CODE (XEXP (x, 0)) == PLUS
11421 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11422 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11423 0), 1), mode))
11424 *cost += extra_cost->mult[0].flag_setting;
11425 return true;
11428 shift_reg = NULL;
11429 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11430 if (shift_op != NULL)
11432 if (shift_reg != NULL)
11434 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11435 1, speed_p);
11436 if (speed_p)
11437 *cost += extra_cost->alu.arith_shift_reg;
11439 else if (speed_p)
11440 *cost += extra_cost->alu.arith_shift;
11441 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11442 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11443 return true;
11446 if (speed_p)
11447 *cost += extra_cost->alu.arith;
11448 if (CONST_INT_P (XEXP (x, 1))
11449 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11451 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11452 return true;
11454 return false;
11457 /* Vector mode? */
11459 *cost = LIBCALL_COST (2);
11460 return false;
11462 return true;
11464 case EQ:
11465 case GE:
11466 case GT:
11467 case LE:
11468 case LT:
11469 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11470 vcle and vclt). */
11471 if (TARGET_NEON
11472 && TARGET_HARD_FLOAT
11473 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11474 && (XEXP (x, 1) == CONST0_RTX (mode)))
11476 *cost = 0;
11477 return true;
11480 /* Fall through. */
11481 case NE:
11482 case LTU:
11483 case LEU:
11484 case GEU:
11485 case GTU:
11486 case ORDERED:
11487 case UNORDERED:
11488 case UNEQ:
11489 case UNLE:
11490 case UNLT:
11491 case UNGE:
11492 case UNGT:
11493 case LTGT:
11494 if (outer_code == SET)
11496 /* Is it a store-flag operation? */
11497 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11498 && XEXP (x, 1) == const0_rtx)
11500 /* Thumb also needs an IT insn. */
11501 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11502 return true;
11504 if (XEXP (x, 1) == const0_rtx)
11506 switch (code)
11508 case LT:
11509 /* LSR Rd, Rn, #31. */
11510 if (speed_p)
11511 *cost += extra_cost->alu.shift;
11512 break;
11514 case EQ:
11515 /* RSBS T1, Rn, #0
11516 ADC Rd, Rn, T1. */
11518 case NE:
11519 /* SUBS T1, Rn, #1
11520 SBC Rd, Rn, T1. */
11521 *cost += COSTS_N_INSNS (1);
11522 break;
11524 case LE:
11525 /* RSBS T1, Rn, Rn, LSR #31
11526 ADC Rd, Rn, T1. */
11527 *cost += COSTS_N_INSNS (1);
11528 if (speed_p)
11529 *cost += extra_cost->alu.arith_shift;
11530 break;
11532 case GT:
11533 /* RSB Rd, Rn, Rn, ASR #1
11534 LSR Rd, Rd, #31. */
11535 *cost += COSTS_N_INSNS (1);
11536 if (speed_p)
11537 *cost += (extra_cost->alu.arith_shift
11538 + extra_cost->alu.shift);
11539 break;
11541 case GE:
11542 /* ASR Rd, Rn, #31
11543 ADD Rd, Rn, #1. */
11544 *cost += COSTS_N_INSNS (1);
11545 if (speed_p)
11546 *cost += extra_cost->alu.shift;
11547 break;
11549 default:
11550 /* Remaining cases are either meaningless or would take
11551 three insns anyway. */
11552 *cost = COSTS_N_INSNS (3);
11553 break;
11555 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11556 return true;
11558 else
11560 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11561 if (CONST_INT_P (XEXP (x, 1))
11562 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11564 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11565 return true;
11568 return false;
11571 /* Not directly inside a set. If it involves the condition code
11572 register it must be the condition for a branch, cond_exec or
11573 I_T_E operation. Since the comparison is performed elsewhere
11574 this is just the control part which has no additional
11575 cost. */
11576 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11577 && XEXP (x, 1) == const0_rtx)
11579 *cost = 0;
11580 return true;
11582 return false;
11584 case ABS:
11585 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11586 && (mode == SFmode || !TARGET_VFP_SINGLE))
11588 if (speed_p)
11589 *cost += extra_cost->fp[mode != SFmode].neg;
11591 return false;
11593 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11595 *cost = LIBCALL_COST (1);
11596 return false;
11599 if (mode == SImode)
11601 if (speed_p)
11602 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11603 return false;
11605 /* Vector mode? */
11606 *cost = LIBCALL_COST (1);
11607 return false;
11609 case SIGN_EXTEND:
11610 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11611 && MEM_P (XEXP (x, 0)))
11613 if (mode == DImode)
11614 *cost += COSTS_N_INSNS (1);
11616 if (!speed_p)
11617 return true;
11619 if (GET_MODE (XEXP (x, 0)) == SImode)
11620 *cost += extra_cost->ldst.load;
11621 else
11622 *cost += extra_cost->ldst.load_sign_extend;
11624 if (mode == DImode)
11625 *cost += extra_cost->alu.shift;
11627 return true;
11630 /* Widening from less than 32-bits requires an extend operation. */
11631 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11633 /* We have SXTB/SXTH. */
11634 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11635 if (speed_p)
11636 *cost += extra_cost->alu.extend;
11638 else if (GET_MODE (XEXP (x, 0)) != SImode)
11640 /* Needs two shifts. */
11641 *cost += COSTS_N_INSNS (1);
11642 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11643 if (speed_p)
11644 *cost += 2 * extra_cost->alu.shift;
11647 /* Widening beyond 32-bits requires one more insn. */
11648 if (mode == DImode)
11650 *cost += COSTS_N_INSNS (1);
11651 if (speed_p)
11652 *cost += extra_cost->alu.shift;
11655 return true;
11657 case ZERO_EXTEND:
11658 if ((arm_arch4
11659 || GET_MODE (XEXP (x, 0)) == SImode
11660 || GET_MODE (XEXP (x, 0)) == QImode)
11661 && MEM_P (XEXP (x, 0)))
11663 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11665 if (mode == DImode)
11666 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11668 return true;
11671 /* Widening from less than 32-bits requires an extend operation. */
11672 if (GET_MODE (XEXP (x, 0)) == QImode)
11674 /* UXTB can be a shorter instruction in Thumb2, but it might
11675 be slower than the AND Rd, Rn, #255 alternative. When
11676 optimizing for speed it should never be slower to use
11677 AND, and we don't really model 16-bit vs 32-bit insns
11678 here. */
11679 if (speed_p)
11680 *cost += extra_cost->alu.logical;
11682 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11684 /* We have UXTB/UXTH. */
11685 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11686 if (speed_p)
11687 *cost += extra_cost->alu.extend;
11689 else if (GET_MODE (XEXP (x, 0)) != SImode)
11691 /* Needs two shifts. It's marginally preferable to use
11692 shifts rather than two BIC instructions as the second
11693 shift may merge with a subsequent insn as a shifter
11694 op. */
11695 *cost = COSTS_N_INSNS (2);
11696 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11697 if (speed_p)
11698 *cost += 2 * extra_cost->alu.shift;
11701 /* Widening beyond 32-bits requires one more insn. */
11702 if (mode == DImode)
11704 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11707 return true;
11709 case CONST_INT:
11710 *cost = 0;
11711 /* CONST_INT has no mode, so we cannot tell for sure how many
11712 insns are really going to be needed. The best we can do is
11713 look at the value passed. If it fits in SImode, then assume
11714 that's the mode it will be used for. Otherwise assume it
11715 will be used in DImode. */
11716 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11717 mode = SImode;
11718 else
11719 mode = DImode;
11721 /* Avoid blowing up in arm_gen_constant (). */
11722 if (!(outer_code == PLUS
11723 || outer_code == AND
11724 || outer_code == IOR
11725 || outer_code == XOR
11726 || outer_code == MINUS))
11727 outer_code = SET;
11729 const_int_cost:
11730 if (mode == SImode)
11732 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11733 INTVAL (x), NULL, NULL,
11734 0, 0));
11735 /* Extra costs? */
11737 else
11739 *cost += COSTS_N_INSNS (arm_gen_constant
11740 (outer_code, SImode, NULL,
11741 trunc_int_for_mode (INTVAL (x), SImode),
11742 NULL, NULL, 0, 0)
11743 + arm_gen_constant (outer_code, SImode, NULL,
11744 INTVAL (x) >> 32, NULL,
11745 NULL, 0, 0));
11746 /* Extra costs? */
11749 return true;
11751 case CONST:
11752 case LABEL_REF:
11753 case SYMBOL_REF:
11754 if (speed_p)
11756 if (arm_arch_thumb2 && !flag_pic)
11757 *cost += COSTS_N_INSNS (1);
11758 else
11759 *cost += extra_cost->ldst.load;
11761 else
11762 *cost += COSTS_N_INSNS (1);
11764 if (flag_pic)
11766 *cost += COSTS_N_INSNS (1);
11767 if (speed_p)
11768 *cost += extra_cost->alu.arith;
11771 return true;
11773 case CONST_FIXED:
11774 *cost = COSTS_N_INSNS (4);
11775 /* Fixme. */
11776 return true;
11778 case CONST_DOUBLE:
11779 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11780 && (mode == SFmode || !TARGET_VFP_SINGLE))
11782 if (vfp3_const_double_rtx (x))
11784 if (speed_p)
11785 *cost += extra_cost->fp[mode == DFmode].fpconst;
11786 return true;
11789 if (speed_p)
11791 if (mode == DFmode)
11792 *cost += extra_cost->ldst.loadd;
11793 else
11794 *cost += extra_cost->ldst.loadf;
11796 else
11797 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11799 return true;
11801 *cost = COSTS_N_INSNS (4);
11802 return true;
11804 case CONST_VECTOR:
11805 /* Fixme. */
11806 if (((TARGET_NEON && TARGET_HARD_FLOAT
11807 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11808 || TARGET_HAVE_MVE)
11809 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11810 *cost = COSTS_N_INSNS (1);
11811 else
11812 *cost = COSTS_N_INSNS (4);
11813 return true;
11815 case HIGH:
11816 case LO_SUM:
11817 /* When optimizing for size, we prefer constant pool entries to
11818 MOVW/MOVT pairs, so bump the cost of these slightly. */
11819 if (!speed_p)
11820 *cost += 1;
11821 return true;
11823 case CLZ:
11824 if (speed_p)
11825 *cost += extra_cost->alu.clz;
11826 return false;
11828 case SMIN:
11829 if (XEXP (x, 1) == const0_rtx)
11831 if (speed_p)
11832 *cost += extra_cost->alu.log_shift;
11833 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11834 return true;
11836 /* Fall through. */
11837 case SMAX:
11838 case UMIN:
11839 case UMAX:
11840 *cost += COSTS_N_INSNS (1);
11841 return false;
11843 case TRUNCATE:
11844 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11845 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11846 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11847 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11848 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11849 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11850 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11851 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11852 == ZERO_EXTEND))))
11854 if (speed_p)
11855 *cost += extra_cost->mult[1].extend;
11856 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11857 ZERO_EXTEND, 0, speed_p)
11858 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11859 ZERO_EXTEND, 0, speed_p));
11860 return true;
11862 *cost = LIBCALL_COST (1);
11863 return false;
11865 case UNSPEC_VOLATILE:
11866 case UNSPEC:
11867 return arm_unspec_cost (x, outer_code, speed_p, cost);
11869 case PC:
11870 /* Reading the PC is like reading any other register. Writing it
11871 is more expensive, but we take that into account elsewhere. */
11872 *cost = 0;
11873 return true;
11875 case ZERO_EXTRACT:
11876 /* TODO: Simple zero_extract of bottom bits using AND. */
11877 /* Fall through. */
11878 case SIGN_EXTRACT:
11879 if (arm_arch6
11880 && mode == SImode
11881 && CONST_INT_P (XEXP (x, 1))
11882 && CONST_INT_P (XEXP (x, 2)))
11884 if (speed_p)
11885 *cost += extra_cost->alu.bfx;
11886 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11887 return true;
11889 /* Without UBFX/SBFX, need to resort to shift operations. */
11890 *cost += COSTS_N_INSNS (1);
11891 if (speed_p)
11892 *cost += 2 * extra_cost->alu.shift;
11893 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11894 return true;
11896 case FLOAT_EXTEND:
11897 if (TARGET_HARD_FLOAT)
11899 if (speed_p)
11900 *cost += extra_cost->fp[mode == DFmode].widen;
11901 if (!TARGET_VFP5
11902 && GET_MODE (XEXP (x, 0)) == HFmode)
11904 /* Pre v8, widening HF->DF is a two-step process, first
11905 widening to SFmode. */
11906 *cost += COSTS_N_INSNS (1);
11907 if (speed_p)
11908 *cost += extra_cost->fp[0].widen;
11910 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11911 return true;
11914 *cost = LIBCALL_COST (1);
11915 return false;
11917 case FLOAT_TRUNCATE:
11918 if (TARGET_HARD_FLOAT)
11920 if (speed_p)
11921 *cost += extra_cost->fp[mode == DFmode].narrow;
11922 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11923 return true;
11924 /* Vector modes? */
11926 *cost = LIBCALL_COST (1);
11927 return false;
11929 case FMA:
11930 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11932 rtx op0 = XEXP (x, 0);
11933 rtx op1 = XEXP (x, 1);
11934 rtx op2 = XEXP (x, 2);
11937 /* vfms or vfnma. */
11938 if (GET_CODE (op0) == NEG)
11939 op0 = XEXP (op0, 0);
11941 /* vfnms or vfnma. */
11942 if (GET_CODE (op2) == NEG)
11943 op2 = XEXP (op2, 0);
11945 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11946 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11947 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11949 if (speed_p)
11950 *cost += extra_cost->fp[mode ==DFmode].fma;
11952 return true;
11955 *cost = LIBCALL_COST (3);
11956 return false;
11958 case FIX:
11959 case UNSIGNED_FIX:
11960 if (TARGET_HARD_FLOAT)
11962 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11963 a vcvt fixed-point conversion. */
11964 if (code == FIX && mode == SImode
11965 && GET_CODE (XEXP (x, 0)) == FIX
11966 && GET_MODE (XEXP (x, 0)) == SFmode
11967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11968 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11969 > 0)
11971 if (speed_p)
11972 *cost += extra_cost->fp[0].toint;
11974 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11975 code, 0, speed_p);
11976 return true;
11979 if (GET_MODE_CLASS (mode) == MODE_INT)
11981 mode = GET_MODE (XEXP (x, 0));
11982 if (speed_p)
11983 *cost += extra_cost->fp[mode == DFmode].toint;
11984 /* Strip of the 'cost' of rounding towards zero. */
11985 if (GET_CODE (XEXP (x, 0)) == FIX)
11986 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11987 0, speed_p);
11988 else
11989 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11990 /* ??? Increase the cost to deal with transferring from
11991 FP -> CORE registers? */
11992 return true;
11994 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11995 && TARGET_VFP5)
11997 if (speed_p)
11998 *cost += extra_cost->fp[mode == DFmode].roundint;
11999 return false;
12001 /* Vector costs? */
12003 *cost = LIBCALL_COST (1);
12004 return false;
12006 case FLOAT:
12007 case UNSIGNED_FLOAT:
12008 if (TARGET_HARD_FLOAT)
12010 /* ??? Increase the cost to deal with transferring from CORE
12011 -> FP registers? */
12012 if (speed_p)
12013 *cost += extra_cost->fp[mode == DFmode].fromint;
12014 return false;
12016 *cost = LIBCALL_COST (1);
12017 return false;
12019 case CALL:
12020 return true;
12022 case ASM_OPERANDS:
12024 /* Just a guess. Guess number of instructions in the asm
12025 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12026 though (see PR60663). */
12027 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12028 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12030 *cost = COSTS_N_INSNS (asm_length + num_operands);
12031 return true;
12033 default:
12034 if (mode != VOIDmode)
12035 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12036 else
12037 *cost = COSTS_N_INSNS (4); /* Who knows? */
12038 return false;
12042 #undef HANDLE_NARROW_SHIFT_ARITH
12044 /* RTX costs entry point. */
12046 static bool
12047 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12048 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12050 bool result;
12051 int code = GET_CODE (x);
12052 gcc_assert (current_tune->insn_extra_cost);
12054 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12055 (enum rtx_code) outer_code,
12056 current_tune->insn_extra_cost,
12057 total, speed);
12059 if (dump_file && arm_verbose_cost)
12061 print_rtl_single (dump_file, x);
12062 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12063 *total, result ? "final" : "partial");
12065 return result;
12068 static int
12069 arm_insn_cost (rtx_insn *insn, bool speed)
12071 int cost;
12073 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12074 will likely disappear during register allocation. */
12075 if (!reload_completed
12076 && GET_CODE (PATTERN (insn)) == SET
12077 && REG_P (SET_DEST (PATTERN (insn)))
12078 && REG_P (SET_SRC (PATTERN (insn))))
12079 return 2;
12080 cost = pattern_cost (PATTERN (insn), speed);
12081 /* If the cost is zero, then it's likely a complex insn. We don't want the
12082 cost of these to be less than something we know about. */
12083 return cost ? cost : COSTS_N_INSNS (2);
12086 /* All address computations that can be done are free, but rtx cost returns
12087 the same for practically all of them. So we weight the different types
12088 of address here in the order (most pref first):
12089 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12090 static inline int
12091 arm_arm_address_cost (rtx x)
12093 enum rtx_code c = GET_CODE (x);
12095 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12096 return 0;
12097 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12098 return 10;
12100 if (c == PLUS)
12102 if (CONST_INT_P (XEXP (x, 1)))
12103 return 2;
12105 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12106 return 3;
12108 return 4;
12111 return 6;
12114 static inline int
12115 arm_thumb_address_cost (rtx x)
12117 enum rtx_code c = GET_CODE (x);
12119 if (c == REG)
12120 return 1;
12121 if (c == PLUS
12122 && REG_P (XEXP (x, 0))
12123 && CONST_INT_P (XEXP (x, 1)))
12124 return 1;
12126 return 2;
12129 static int
12130 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12131 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12133 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12136 /* Adjust cost hook for XScale. */
12137 static bool
12138 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12139 int * cost)
12141 /* Some true dependencies can have a higher cost depending
12142 on precisely how certain input operands are used. */
12143 if (dep_type == 0
12144 && recog_memoized (insn) >= 0
12145 && recog_memoized (dep) >= 0)
12147 int shift_opnum = get_attr_shift (insn);
12148 enum attr_type attr_type = get_attr_type (dep);
12150 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12151 operand for INSN. If we have a shifted input operand and the
12152 instruction we depend on is another ALU instruction, then we may
12153 have to account for an additional stall. */
12154 if (shift_opnum != 0
12155 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12156 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12157 || attr_type == TYPE_ALUS_SHIFT_IMM
12158 || attr_type == TYPE_LOGIC_SHIFT_IMM
12159 || attr_type == TYPE_LOGICS_SHIFT_IMM
12160 || attr_type == TYPE_ALU_SHIFT_REG
12161 || attr_type == TYPE_ALUS_SHIFT_REG
12162 || attr_type == TYPE_LOGIC_SHIFT_REG
12163 || attr_type == TYPE_LOGICS_SHIFT_REG
12164 || attr_type == TYPE_MOV_SHIFT
12165 || attr_type == TYPE_MVN_SHIFT
12166 || attr_type == TYPE_MOV_SHIFT_REG
12167 || attr_type == TYPE_MVN_SHIFT_REG))
12169 rtx shifted_operand;
12170 int opno;
12172 /* Get the shifted operand. */
12173 extract_insn (insn);
12174 shifted_operand = recog_data.operand[shift_opnum];
12176 /* Iterate over all the operands in DEP. If we write an operand
12177 that overlaps with SHIFTED_OPERAND, then we have increase the
12178 cost of this dependency. */
12179 extract_insn (dep);
12180 preprocess_constraints (dep);
12181 for (opno = 0; opno < recog_data.n_operands; opno++)
12183 /* We can ignore strict inputs. */
12184 if (recog_data.operand_type[opno] == OP_IN)
12185 continue;
12187 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12188 shifted_operand))
12190 *cost = 2;
12191 return false;
12196 return true;
12199 /* Adjust cost hook for Cortex A9. */
12200 static bool
12201 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12202 int * cost)
12204 switch (dep_type)
12206 case REG_DEP_ANTI:
12207 *cost = 0;
12208 return false;
12210 case REG_DEP_TRUE:
12211 case REG_DEP_OUTPUT:
12212 if (recog_memoized (insn) >= 0
12213 && recog_memoized (dep) >= 0)
12215 if (GET_CODE (PATTERN (insn)) == SET)
12217 if (GET_MODE_CLASS
12218 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12219 || GET_MODE_CLASS
12220 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12222 enum attr_type attr_type_insn = get_attr_type (insn);
12223 enum attr_type attr_type_dep = get_attr_type (dep);
12225 /* By default all dependencies of the form
12226 s0 = s0 <op> s1
12227 s0 = s0 <op> s2
12228 have an extra latency of 1 cycle because
12229 of the input and output dependency in this
12230 case. However this gets modeled as an true
12231 dependency and hence all these checks. */
12232 if (REG_P (SET_DEST (PATTERN (insn)))
12233 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12235 /* FMACS is a special case where the dependent
12236 instruction can be issued 3 cycles before
12237 the normal latency in case of an output
12238 dependency. */
12239 if ((attr_type_insn == TYPE_FMACS
12240 || attr_type_insn == TYPE_FMACD)
12241 && (attr_type_dep == TYPE_FMACS
12242 || attr_type_dep == TYPE_FMACD))
12244 if (dep_type == REG_DEP_OUTPUT)
12245 *cost = insn_default_latency (dep) - 3;
12246 else
12247 *cost = insn_default_latency (dep);
12248 return false;
12250 else
12252 if (dep_type == REG_DEP_OUTPUT)
12253 *cost = insn_default_latency (dep) + 1;
12254 else
12255 *cost = insn_default_latency (dep);
12257 return false;
12262 break;
12264 default:
12265 gcc_unreachable ();
12268 return true;
12271 /* Adjust cost hook for FA726TE. */
12272 static bool
12273 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12274 int * cost)
12276 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12277 have penalty of 3. */
12278 if (dep_type == REG_DEP_TRUE
12279 && recog_memoized (insn) >= 0
12280 && recog_memoized (dep) >= 0
12281 && get_attr_conds (dep) == CONDS_SET)
12283 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12284 if (get_attr_conds (insn) == CONDS_USE
12285 && get_attr_type (insn) != TYPE_BRANCH)
12287 *cost = 3;
12288 return false;
12291 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12292 || get_attr_conds (insn) == CONDS_USE)
12294 *cost = 0;
12295 return false;
12299 return true;
12302 /* Implement TARGET_REGISTER_MOVE_COST.
12304 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12305 it is typically more expensive than a single memory access. We set
12306 the cost to less than two memory accesses so that floating
12307 point to integer conversion does not go through memory. */
12310 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12311 reg_class_t from, reg_class_t to)
12313 if (TARGET_32BIT)
12315 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12316 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12317 return 15;
12318 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12319 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12320 return 4;
12321 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12322 return 20;
12323 else
12324 return 2;
12326 else
12328 if (from == HI_REGS || to == HI_REGS)
12329 return 4;
12330 else
12331 return 2;
12335 /* Implement TARGET_MEMORY_MOVE_COST. */
12338 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12339 bool in ATTRIBUTE_UNUSED)
12341 if (TARGET_32BIT)
12342 return 10;
12343 else
12345 if (GET_MODE_SIZE (mode) < 4)
12346 return 8;
12347 else
12348 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12352 /* Vectorizer cost model implementation. */
12354 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12355 static int
12356 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12357 tree vectype,
12358 int misalign ATTRIBUTE_UNUSED)
12360 unsigned elements;
12362 switch (type_of_cost)
12364 case scalar_stmt:
12365 return current_tune->vec_costs->scalar_stmt_cost;
12367 case scalar_load:
12368 return current_tune->vec_costs->scalar_load_cost;
12370 case scalar_store:
12371 return current_tune->vec_costs->scalar_store_cost;
12373 case vector_stmt:
12374 return current_tune->vec_costs->vec_stmt_cost;
12376 case vector_load:
12377 return current_tune->vec_costs->vec_align_load_cost;
12379 case vector_store:
12380 return current_tune->vec_costs->vec_store_cost;
12382 case vec_to_scalar:
12383 return current_tune->vec_costs->vec_to_scalar_cost;
12385 case scalar_to_vec:
12386 return current_tune->vec_costs->scalar_to_vec_cost;
12388 case unaligned_load:
12389 case vector_gather_load:
12390 return current_tune->vec_costs->vec_unalign_load_cost;
12392 case unaligned_store:
12393 case vector_scatter_store:
12394 return current_tune->vec_costs->vec_unalign_store_cost;
12396 case cond_branch_taken:
12397 return current_tune->vec_costs->cond_taken_branch_cost;
12399 case cond_branch_not_taken:
12400 return current_tune->vec_costs->cond_not_taken_branch_cost;
12402 case vec_perm:
12403 case vec_promote_demote:
12404 return current_tune->vec_costs->vec_stmt_cost;
12406 case vec_construct:
12407 elements = TYPE_VECTOR_SUBPARTS (vectype);
12408 return elements / 2 + 1;
12410 default:
12411 gcc_unreachable ();
12415 /* Return true if and only if this insn can dual-issue only as older. */
12416 static bool
12417 cortexa7_older_only (rtx_insn *insn)
12419 if (recog_memoized (insn) < 0)
12420 return false;
12422 switch (get_attr_type (insn))
12424 case TYPE_ALU_DSP_REG:
12425 case TYPE_ALU_SREG:
12426 case TYPE_ALUS_SREG:
12427 case TYPE_LOGIC_REG:
12428 case TYPE_LOGICS_REG:
12429 case TYPE_ADC_REG:
12430 case TYPE_ADCS_REG:
12431 case TYPE_ADR:
12432 case TYPE_BFM:
12433 case TYPE_REV:
12434 case TYPE_MVN_REG:
12435 case TYPE_SHIFT_IMM:
12436 case TYPE_SHIFT_REG:
12437 case TYPE_LOAD_BYTE:
12438 case TYPE_LOAD_4:
12439 case TYPE_STORE_4:
12440 case TYPE_FFARITHS:
12441 case TYPE_FADDS:
12442 case TYPE_FFARITHD:
12443 case TYPE_FADDD:
12444 case TYPE_FMOV:
12445 case TYPE_F_CVT:
12446 case TYPE_FCMPS:
12447 case TYPE_FCMPD:
12448 case TYPE_FCONSTS:
12449 case TYPE_FCONSTD:
12450 case TYPE_FMULS:
12451 case TYPE_FMACS:
12452 case TYPE_FMULD:
12453 case TYPE_FMACD:
12454 case TYPE_FDIVS:
12455 case TYPE_FDIVD:
12456 case TYPE_F_MRC:
12457 case TYPE_F_MRRC:
12458 case TYPE_F_FLAG:
12459 case TYPE_F_LOADS:
12460 case TYPE_F_STORES:
12461 return true;
12462 default:
12463 return false;
12467 /* Return true if and only if this insn can dual-issue as younger. */
12468 static bool
12469 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12471 if (recog_memoized (insn) < 0)
12473 if (verbose > 5)
12474 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12475 return false;
12478 switch (get_attr_type (insn))
12480 case TYPE_ALU_IMM:
12481 case TYPE_ALUS_IMM:
12482 case TYPE_LOGIC_IMM:
12483 case TYPE_LOGICS_IMM:
12484 case TYPE_EXTEND:
12485 case TYPE_MVN_IMM:
12486 case TYPE_MOV_IMM:
12487 case TYPE_MOV_REG:
12488 case TYPE_MOV_SHIFT:
12489 case TYPE_MOV_SHIFT_REG:
12490 case TYPE_BRANCH:
12491 case TYPE_CALL:
12492 return true;
12493 default:
12494 return false;
12499 /* Look for an instruction that can dual issue only as an older
12500 instruction, and move it in front of any instructions that can
12501 dual-issue as younger, while preserving the relative order of all
12502 other instructions in the ready list. This is a hueuristic to help
12503 dual-issue in later cycles, by postponing issue of more flexible
12504 instructions. This heuristic may affect dual issue opportunities
12505 in the current cycle. */
12506 static void
12507 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12508 int *n_readyp, int clock)
12510 int i;
12511 int first_older_only = -1, first_younger = -1;
12513 if (verbose > 5)
12514 fprintf (file,
12515 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12516 clock,
12517 *n_readyp);
12519 /* Traverse the ready list from the head (the instruction to issue
12520 first), and looking for the first instruction that can issue as
12521 younger and the first instruction that can dual-issue only as
12522 older. */
12523 for (i = *n_readyp - 1; i >= 0; i--)
12525 rtx_insn *insn = ready[i];
12526 if (cortexa7_older_only (insn))
12528 first_older_only = i;
12529 if (verbose > 5)
12530 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12531 break;
12533 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12534 first_younger = i;
12537 /* Nothing to reorder because either no younger insn found or insn
12538 that can dual-issue only as older appears before any insn that
12539 can dual-issue as younger. */
12540 if (first_younger == -1)
12542 if (verbose > 5)
12543 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12544 return;
12547 /* Nothing to reorder because no older-only insn in the ready list. */
12548 if (first_older_only == -1)
12550 if (verbose > 5)
12551 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12552 return;
12555 /* Move first_older_only insn before first_younger. */
12556 if (verbose > 5)
12557 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12558 INSN_UID(ready [first_older_only]),
12559 INSN_UID(ready [first_younger]));
12560 rtx_insn *first_older_only_insn = ready [first_older_only];
12561 for (i = first_older_only; i < first_younger; i++)
12563 ready[i] = ready[i+1];
12566 ready[i] = first_older_only_insn;
12567 return;
12570 /* Implement TARGET_SCHED_REORDER. */
12571 static int
12572 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12573 int clock)
12575 switch (arm_tune)
12577 case TARGET_CPU_cortexa7:
12578 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12579 break;
12580 default:
12581 /* Do nothing for other cores. */
12582 break;
12585 return arm_issue_rate ();
12588 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12589 It corrects the value of COST based on the relationship between
12590 INSN and DEP through the dependence LINK. It returns the new
12591 value. There is a per-core adjust_cost hook to adjust scheduler costs
12592 and the per-core hook can choose to completely override the generic
12593 adjust_cost function. Only put bits of code into arm_adjust_cost that
12594 are common across all cores. */
12595 static int
12596 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12597 unsigned int)
12599 rtx i_pat, d_pat;
12601 /* When generating Thumb-1 code, we want to place flag-setting operations
12602 close to a conditional branch which depends on them, so that we can
12603 omit the comparison. */
12604 if (TARGET_THUMB1
12605 && dep_type == 0
12606 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12607 && recog_memoized (dep) >= 0
12608 && get_attr_conds (dep) == CONDS_SET)
12609 return 0;
12611 if (current_tune->sched_adjust_cost != NULL)
12613 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12614 return cost;
12617 /* XXX Is this strictly true? */
12618 if (dep_type == REG_DEP_ANTI
12619 || dep_type == REG_DEP_OUTPUT)
12620 return 0;
12622 /* Call insns don't incur a stall, even if they follow a load. */
12623 if (dep_type == 0
12624 && CALL_P (insn))
12625 return 1;
12627 if ((i_pat = single_set (insn)) != NULL
12628 && MEM_P (SET_SRC (i_pat))
12629 && (d_pat = single_set (dep)) != NULL
12630 && MEM_P (SET_DEST (d_pat)))
12632 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12633 /* This is a load after a store, there is no conflict if the load reads
12634 from a cached area. Assume that loads from the stack, and from the
12635 constant pool are cached, and that others will miss. This is a
12636 hack. */
12638 if ((SYMBOL_REF_P (src_mem)
12639 && CONSTANT_POOL_ADDRESS_P (src_mem))
12640 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12641 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12642 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12643 return 1;
12646 return cost;
12650 arm_max_conditional_execute (void)
12652 return max_insns_skipped;
12655 static int
12656 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12658 if (TARGET_32BIT)
12659 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12660 else
12661 return (optimize > 0) ? 2 : 0;
12664 static int
12665 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12667 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12670 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12671 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12672 sequences of non-executed instructions in IT blocks probably take the same
12673 amount of time as executed instructions (and the IT instruction itself takes
12674 space in icache). This function was experimentally determined to give good
12675 results on a popular embedded benchmark. */
12677 static int
12678 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12680 return (TARGET_32BIT && speed_p) ? 1
12681 : arm_default_branch_cost (speed_p, predictable_p);
12684 static int
12685 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12687 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12690 static bool fp_consts_inited = false;
12692 static REAL_VALUE_TYPE value_fp0;
12694 static void
12695 init_fp_table (void)
12697 REAL_VALUE_TYPE r;
12699 r = REAL_VALUE_ATOF ("0", DFmode);
12700 value_fp0 = r;
12701 fp_consts_inited = true;
12704 /* Return TRUE if rtx X is a valid immediate FP constant. */
12706 arm_const_double_rtx (rtx x)
12708 const REAL_VALUE_TYPE *r;
12710 if (!fp_consts_inited)
12711 init_fp_table ();
12713 r = CONST_DOUBLE_REAL_VALUE (x);
12714 if (REAL_VALUE_MINUS_ZERO (*r))
12715 return 0;
12717 if (real_equal (r, &value_fp0))
12718 return 1;
12720 return 0;
12723 /* VFPv3 has a fairly wide range of representable immediates, formed from
12724 "quarter-precision" floating-point values. These can be evaluated using this
12725 formula (with ^ for exponentiation):
12727 -1^s * n * 2^-r
12729 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12730 16 <= n <= 31 and 0 <= r <= 7.
12732 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12734 - A (most-significant) is the sign bit.
12735 - BCD are the exponent (encoded as r XOR 3).
12736 - EFGH are the mantissa (encoded as n - 16).
12739 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12740 fconst[sd] instruction, or -1 if X isn't suitable. */
12741 static int
12742 vfp3_const_double_index (rtx x)
12744 REAL_VALUE_TYPE r, m;
12745 int sign, exponent;
12746 unsigned HOST_WIDE_INT mantissa, mant_hi;
12747 unsigned HOST_WIDE_INT mask;
12748 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12749 bool fail;
12751 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12752 return -1;
12754 r = *CONST_DOUBLE_REAL_VALUE (x);
12756 /* We can't represent these things, so detect them first. */
12757 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12758 return -1;
12760 /* Extract sign, exponent and mantissa. */
12761 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12762 r = real_value_abs (&r);
12763 exponent = REAL_EXP (&r);
12764 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12765 highest (sign) bit, with a fixed binary point at bit point_pos.
12766 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12767 bits for the mantissa, this may fail (low bits would be lost). */
12768 real_ldexp (&m, &r, point_pos - exponent);
12769 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12770 mantissa = w.elt (0);
12771 mant_hi = w.elt (1);
12773 /* If there are bits set in the low part of the mantissa, we can't
12774 represent this value. */
12775 if (mantissa != 0)
12776 return -1;
12778 /* Now make it so that mantissa contains the most-significant bits, and move
12779 the point_pos to indicate that the least-significant bits have been
12780 discarded. */
12781 point_pos -= HOST_BITS_PER_WIDE_INT;
12782 mantissa = mant_hi;
12784 /* We can permit four significant bits of mantissa only, plus a high bit
12785 which is always 1. */
12786 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12787 if ((mantissa & mask) != 0)
12788 return -1;
12790 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12791 mantissa >>= point_pos - 5;
12793 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12794 floating-point immediate zero with Neon using an integer-zero load, but
12795 that case is handled elsewhere.) */
12796 if (mantissa == 0)
12797 return -1;
12799 gcc_assert (mantissa >= 16 && mantissa <= 31);
12801 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12802 normalized significands are in the range [1, 2). (Our mantissa is shifted
12803 left 4 places at this point relative to normalized IEEE754 values). GCC
12804 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12805 REAL_EXP must be altered. */
12806 exponent = 5 - exponent;
12808 if (exponent < 0 || exponent > 7)
12809 return -1;
12811 /* Sign, mantissa and exponent are now in the correct form to plug into the
12812 formula described in the comment above. */
12813 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12816 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12818 vfp3_const_double_rtx (rtx x)
12820 if (!TARGET_VFP3)
12821 return 0;
12823 return vfp3_const_double_index (x) != -1;
12826 /* Recognize immediates which can be used in various Neon and MVE instructions.
12827 Legal immediates are described by the following table (for VMVN variants, the
12828 bitwise inverse of the constant shown is recognized. In either case, VMOV
12829 is output and the correct instruction to use for a given constant is chosen
12830 by the assembler). The constant shown is replicated across all elements of
12831 the destination vector.
12833 insn elems variant constant (binary)
12834 ---- ----- ------- -----------------
12835 vmov i32 0 00000000 00000000 00000000 abcdefgh
12836 vmov i32 1 00000000 00000000 abcdefgh 00000000
12837 vmov i32 2 00000000 abcdefgh 00000000 00000000
12838 vmov i32 3 abcdefgh 00000000 00000000 00000000
12839 vmov i16 4 00000000 abcdefgh
12840 vmov i16 5 abcdefgh 00000000
12841 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12842 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12843 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12844 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12845 vmvn i16 10 00000000 abcdefgh
12846 vmvn i16 11 abcdefgh 00000000
12847 vmov i32 12 00000000 00000000 abcdefgh 11111111
12848 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12849 vmov i32 14 00000000 abcdefgh 11111111 11111111
12850 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12851 vmov i8 16 abcdefgh
12852 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12853 eeeeeeee ffffffff gggggggg hhhhhhhh
12854 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12855 vmov f32 19 00000000 00000000 00000000 00000000
12857 For case 18, B = !b. Representable values are exactly those accepted by
12858 vfp3_const_double_index, but are output as floating-point numbers rather
12859 than indices.
12861 For case 19, we will change it to vmov.i32 when assembling.
12863 Variants 0-5 (inclusive) may also be used as immediates for the second
12864 operand of VORR/VBIC instructions.
12866 The INVERSE argument causes the bitwise inverse of the given operand to be
12867 recognized instead (used for recognizing legal immediates for the VAND/VORN
12868 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12869 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12870 output, rather than the real insns vbic/vorr).
12872 INVERSE makes no difference to the recognition of float vectors.
12874 The return value is the variant of immediate as shown in the above table, or
12875 -1 if the given value doesn't match any of the listed patterns.
12877 static int
12878 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12879 rtx *modconst, int *elementwidth)
12881 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12882 matches = 1; \
12883 for (i = 0; i < idx; i += (STRIDE)) \
12884 if (!(TEST)) \
12885 matches = 0; \
12886 if (matches) \
12888 immtype = (CLASS); \
12889 elsize = (ELSIZE); \
12890 break; \
12893 unsigned int i, elsize = 0, idx = 0, n_elts;
12894 unsigned int innersize;
12895 unsigned char bytes[16] = {};
12896 int immtype = -1, matches;
12897 unsigned int invmask = inverse ? 0xff : 0;
12898 bool vector = GET_CODE (op) == CONST_VECTOR;
12900 if (vector)
12901 n_elts = CONST_VECTOR_NUNITS (op);
12902 else
12904 n_elts = 1;
12905 gcc_assert (mode != VOIDmode);
12908 innersize = GET_MODE_UNIT_SIZE (mode);
12910 /* Only support 128-bit vectors for MVE. */
12911 if (TARGET_HAVE_MVE
12912 && (!vector
12913 || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12914 || n_elts * innersize != 16))
12915 return -1;
12917 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12918 return -1;
12920 /* Vectors of float constants. */
12921 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12923 rtx el0 = CONST_VECTOR_ELT (op, 0);
12925 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12926 return -1;
12928 /* FP16 vectors cannot be represented. */
12929 if (GET_MODE_INNER (mode) == HFmode)
12930 return -1;
12932 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12933 are distinct in this context. */
12934 if (!const_vec_duplicate_p (op))
12935 return -1;
12937 if (modconst)
12938 *modconst = CONST_VECTOR_ELT (op, 0);
12940 if (elementwidth)
12941 *elementwidth = 0;
12943 if (el0 == CONST0_RTX (GET_MODE (el0)))
12944 return 19;
12945 else
12946 return 18;
12949 /* The tricks done in the code below apply for little-endian vector layout.
12950 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12951 FIXME: Implement logic for big-endian vectors. */
12952 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12953 return -1;
12955 /* Splat vector constant out into a byte vector. */
12956 for (i = 0; i < n_elts; i++)
12958 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12959 unsigned HOST_WIDE_INT elpart;
12961 gcc_assert (CONST_INT_P (el));
12962 elpart = INTVAL (el);
12964 for (unsigned int byte = 0; byte < innersize; byte++)
12966 bytes[idx++] = (elpart & 0xff) ^ invmask;
12967 elpart >>= BITS_PER_UNIT;
12971 /* Sanity check. */
12972 gcc_assert (idx == GET_MODE_SIZE (mode));
12976 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12977 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12979 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12980 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12982 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12983 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12985 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12986 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12988 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12990 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12992 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12993 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12995 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12996 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12998 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12999 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13001 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13002 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13004 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13006 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13008 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13009 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13011 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13012 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13014 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13015 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13017 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13018 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13020 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13022 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13023 && bytes[i] == bytes[(i + 8) % idx]);
13025 while (0);
13027 if (immtype == -1)
13028 return -1;
13030 if (elementwidth)
13031 *elementwidth = elsize;
13033 if (modconst)
13035 unsigned HOST_WIDE_INT imm = 0;
13037 /* Un-invert bytes of recognized vector, if necessary. */
13038 if (invmask != 0)
13039 for (i = 0; i < idx; i++)
13040 bytes[i] ^= invmask;
13042 if (immtype == 17)
13044 /* FIXME: Broken on 32-bit H_W_I hosts. */
13045 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13047 for (i = 0; i < 8; i++)
13048 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13049 << (i * BITS_PER_UNIT);
13051 *modconst = GEN_INT (imm);
13053 else
13055 unsigned HOST_WIDE_INT imm = 0;
13057 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13058 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13060 *modconst = GEN_INT (imm);
13064 return immtype;
13065 #undef CHECK
13068 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13069 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13070 (or zero for float elements), and a modified constant (whatever should be
13071 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13072 modified to "simd_immediate_valid_for_move" as this function will be used
13073 both by neon and mve. */
13075 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13076 rtx *modconst, int *elementwidth)
13078 rtx tmpconst;
13079 int tmpwidth;
13080 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13082 if (retval == -1)
13083 return 0;
13085 if (modconst)
13086 *modconst = tmpconst;
13088 if (elementwidth)
13089 *elementwidth = tmpwidth;
13091 return 1;
13094 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13095 the immediate is valid, write a constant suitable for using as an operand
13096 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13097 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13100 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13101 rtx *modconst, int *elementwidth)
13103 rtx tmpconst;
13104 int tmpwidth;
13105 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13107 if (retval < 0 || retval > 5)
13108 return 0;
13110 if (modconst)
13111 *modconst = tmpconst;
13113 if (elementwidth)
13114 *elementwidth = tmpwidth;
13116 return 1;
13119 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13120 the immediate is valid, write a constant suitable for using as an operand
13121 to VSHR/VSHL to *MODCONST and the corresponding element width to
13122 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13123 because they have different limitations. */
13126 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13127 rtx *modconst, int *elementwidth,
13128 bool isleftshift)
13130 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13131 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13132 unsigned HOST_WIDE_INT last_elt = 0;
13133 unsigned HOST_WIDE_INT maxshift;
13135 /* Split vector constant out into a byte vector. */
13136 for (i = 0; i < n_elts; i++)
13138 rtx el = CONST_VECTOR_ELT (op, i);
13139 unsigned HOST_WIDE_INT elpart;
13141 if (CONST_INT_P (el))
13142 elpart = INTVAL (el);
13143 else if (CONST_DOUBLE_P (el))
13144 return 0;
13145 else
13146 gcc_unreachable ();
13148 if (i != 0 && elpart != last_elt)
13149 return 0;
13151 last_elt = elpart;
13154 /* Shift less than element size. */
13155 maxshift = innersize * 8;
13157 if (isleftshift)
13159 /* Left shift immediate value can be from 0 to <size>-1. */
13160 if (last_elt >= maxshift)
13161 return 0;
13163 else
13165 /* Right shift immediate value can be from 1 to <size>. */
13166 if (last_elt == 0 || last_elt > maxshift)
13167 return 0;
13170 if (elementwidth)
13171 *elementwidth = innersize * 8;
13173 if (modconst)
13174 *modconst = CONST_VECTOR_ELT (op, 0);
13176 return 1;
13179 /* Return a string suitable for output of Neon immediate logic operation
13180 MNEM. */
13182 char *
13183 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13184 int inverse, int quad)
13186 int width, is_valid;
13187 static char templ[40];
13189 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13191 gcc_assert (is_valid != 0);
13193 if (quad)
13194 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13195 else
13196 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13198 return templ;
13201 /* Return a string suitable for output of Neon immediate shift operation
13202 (VSHR or VSHL) MNEM. */
13204 char *
13205 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13206 machine_mode mode, int quad,
13207 bool isleftshift)
13209 int width, is_valid;
13210 static char templ[40];
13212 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13213 gcc_assert (is_valid != 0);
13215 if (quad)
13216 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13217 else
13218 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13220 return templ;
13223 /* Output a sequence of pairwise operations to implement a reduction.
13224 NOTE: We do "too much work" here, because pairwise operations work on two
13225 registers-worth of operands in one go. Unfortunately we can't exploit those
13226 extra calculations to do the full operation in fewer steps, I don't think.
13227 Although all vector elements of the result but the first are ignored, we
13228 actually calculate the same result in each of the elements. An alternative
13229 such as initially loading a vector with zero to use as each of the second
13230 operands would use up an additional register and take an extra instruction,
13231 for no particular gain. */
13233 void
13234 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13235 rtx (*reduc) (rtx, rtx, rtx))
13237 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13238 rtx tmpsum = op1;
13240 for (i = parts / 2; i >= 1; i /= 2)
13242 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13243 emit_insn (reduc (dest, tmpsum, tmpsum));
13244 tmpsum = dest;
13248 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13249 loaded into a register using VDUP.
13251 If this is the case, and GENERATE is set, we also generate
13252 instructions to do this and return an RTX to assign to the register. */
13254 static rtx
13255 neon_vdup_constant (rtx vals, bool generate)
13257 machine_mode mode = GET_MODE (vals);
13258 machine_mode inner_mode = GET_MODE_INNER (mode);
13259 rtx x;
13261 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13262 return NULL_RTX;
13264 if (!const_vec_duplicate_p (vals, &x))
13265 /* The elements are not all the same. We could handle repeating
13266 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13267 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13268 vdup.i16). */
13269 return NULL_RTX;
13271 if (!generate)
13272 return x;
13274 /* We can load this constant by using VDUP and a constant in a
13275 single ARM register. This will be cheaper than a vector
13276 load. */
13278 x = copy_to_mode_reg (inner_mode, x);
13279 return gen_vec_duplicate (mode, x);
13282 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13284 mve_bool_vec_to_const (rtx const_vec)
13286 int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13287 int repeat = 16 / n_elts;
13288 int i;
13289 int hi_val = 0;
13291 for (i = 0; i < n_elts; i++)
13293 rtx el = CONST_VECTOR_ELT (const_vec, i);
13294 unsigned HOST_WIDE_INT elpart;
13296 gcc_assert (CONST_INT_P (el));
13297 elpart = INTVAL (el);
13299 for (int j = 0; j < repeat; j++)
13300 hi_val |= elpart << (i * repeat + j);
13302 return gen_int_mode (hi_val, HImode);
13305 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13306 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13307 into a register.
13309 If this is the case, and GENERATE is set, we also generate code to do
13310 this and return an RTX to copy into the register. */
13313 neon_make_constant (rtx vals, bool generate)
13315 machine_mode mode = GET_MODE (vals);
13316 rtx target;
13317 rtx const_vec = NULL_RTX;
13318 int n_elts = GET_MODE_NUNITS (mode);
13319 int n_const = 0;
13320 int i;
13322 if (GET_CODE (vals) == CONST_VECTOR)
13323 const_vec = vals;
13324 else if (GET_CODE (vals) == PARALLEL)
13326 /* A CONST_VECTOR must contain only CONST_INTs and
13327 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13328 Only store valid constants in a CONST_VECTOR. */
13329 for (i = 0; i < n_elts; ++i)
13331 rtx x = XVECEXP (vals, 0, i);
13332 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13333 n_const++;
13335 if (n_const == n_elts)
13336 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13338 else
13339 gcc_unreachable ();
13341 if (const_vec != NULL
13342 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13343 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13344 return const_vec;
13345 else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13346 return mve_bool_vec_to_const (const_vec);
13347 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13348 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13349 pipeline cycle; creating the constant takes one or two ARM
13350 pipeline cycles. */
13351 return target;
13352 else if (const_vec != NULL_RTX)
13353 /* Load from constant pool. On Cortex-A8 this takes two cycles
13354 (for either double or quad vectors). We cannot take advantage
13355 of single-cycle VLD1 because we need a PC-relative addressing
13356 mode. */
13357 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13358 else
13359 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13360 We cannot construct an initializer. */
13361 return NULL_RTX;
13364 /* Initialize vector TARGET to VALS. */
13366 void
13367 neon_expand_vector_init (rtx target, rtx vals)
13369 machine_mode mode = GET_MODE (target);
13370 machine_mode inner_mode = GET_MODE_INNER (mode);
13371 int n_elts = GET_MODE_NUNITS (mode);
13372 int n_var = 0, one_var = -1;
13373 bool all_same = true;
13374 rtx x, mem;
13375 int i;
13377 for (i = 0; i < n_elts; ++i)
13379 x = XVECEXP (vals, 0, i);
13380 if (!CONSTANT_P (x))
13381 ++n_var, one_var = i;
13383 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13384 all_same = false;
13387 if (n_var == 0)
13389 rtx constant = neon_make_constant (vals);
13390 if (constant != NULL_RTX)
13392 emit_move_insn (target, constant);
13393 return;
13397 /* Splat a single non-constant element if we can. */
13398 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13400 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13401 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13402 return;
13405 /* One field is non-constant. Load constant then overwrite varying
13406 field. This is more efficient than using the stack. */
13407 if (n_var == 1)
13409 rtx copy = copy_rtx (vals);
13410 rtx merge_mask = GEN_INT (1 << one_var);
13412 /* Load constant part of vector, substitute neighboring value for
13413 varying element. */
13414 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13415 neon_expand_vector_init (target, copy);
13417 /* Insert variable. */
13418 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13419 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13420 return;
13423 /* Construct the vector in memory one field at a time
13424 and load the whole vector. */
13425 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13426 for (i = 0; i < n_elts; i++)
13427 emit_move_insn (adjust_address_nv (mem, inner_mode,
13428 i * GET_MODE_SIZE (inner_mode)),
13429 XVECEXP (vals, 0, i));
13430 emit_move_insn (target, mem);
13433 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13434 ERR if it doesn't. EXP indicates the source location, which includes the
13435 inlining history for intrinsics. */
13437 static void
13438 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13439 const_tree exp, const char *desc)
13441 HOST_WIDE_INT lane;
13443 gcc_assert (CONST_INT_P (operand));
13445 lane = INTVAL (operand);
13447 if (lane < low || lane >= high)
13449 if (exp)
13450 error_at (EXPR_LOCATION (exp),
13451 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13452 else
13453 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13457 /* Bounds-check lanes. */
13459 void
13460 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13461 const_tree exp)
13463 bounds_check (operand, low, high, exp, "lane");
13466 /* Bounds-check constants. */
13468 void
13469 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13471 bounds_check (operand, low, high, NULL_TREE, "constant");
13474 HOST_WIDE_INT
13475 neon_element_bits (machine_mode mode)
13477 return GET_MODE_UNIT_BITSIZE (mode);
13481 /* Predicates for `match_operand' and `match_operator'. */
13483 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13484 WB level is 2 if full writeback address modes are allowed, 1
13485 if limited writeback address modes (POST_INC and PRE_DEC) are
13486 allowed and 0 if no writeback at all is supported. */
13489 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13491 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13492 rtx ind;
13494 /* Reject eliminable registers. */
13495 if (! (reload_in_progress || reload_completed || lra_in_progress)
13496 && ( reg_mentioned_p (frame_pointer_rtx, op)
13497 || reg_mentioned_p (arg_pointer_rtx, op)
13498 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13499 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13500 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13501 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13502 return FALSE;
13504 /* Constants are converted into offsets from labels. */
13505 if (!MEM_P (op))
13506 return FALSE;
13508 ind = XEXP (op, 0);
13510 if (reload_completed
13511 && (LABEL_REF_P (ind)
13512 || (GET_CODE (ind) == CONST
13513 && GET_CODE (XEXP (ind, 0)) == PLUS
13514 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13515 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13516 return TRUE;
13518 /* Match: (mem (reg)). */
13519 if (REG_P (ind))
13520 return arm_address_register_rtx_p (ind, 0);
13522 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13523 acceptable in any case (subject to verification by
13524 arm_address_register_rtx_p). We need full writeback to accept
13525 PRE_INC and POST_DEC, and at least restricted writeback for
13526 PRE_INC and POST_DEC. */
13527 if (wb_level > 0
13528 && (GET_CODE (ind) == POST_INC
13529 || GET_CODE (ind) == PRE_DEC
13530 || (wb_level > 1
13531 && (GET_CODE (ind) == PRE_INC
13532 || GET_CODE (ind) == POST_DEC))))
13533 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13535 if (wb_level > 1
13536 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13537 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13538 && GET_CODE (XEXP (ind, 1)) == PLUS
13539 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13540 ind = XEXP (ind, 1);
13542 /* Match:
13543 (plus (reg)
13544 (const))
13546 The encoded immediate for 16-bit modes is multiplied by 2,
13547 while the encoded immediate for 32-bit and 64-bit modes is
13548 multiplied by 4. */
13549 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13550 if (GET_CODE (ind) == PLUS
13551 && REG_P (XEXP (ind, 0))
13552 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13553 && CONST_INT_P (XEXP (ind, 1))
13554 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13555 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13556 return TRUE;
13558 return FALSE;
13561 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13562 WB is true if full writeback address modes are allowed and is false
13563 if limited writeback address modes (POST_INC and PRE_DEC) are
13564 allowed. */
13566 int arm_coproc_mem_operand (rtx op, bool wb)
13568 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13571 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13572 context in which no writeback address modes are allowed. */
13575 arm_coproc_mem_operand_no_writeback (rtx op)
13577 return arm_coproc_mem_operand_wb (op, 0);
13580 /* This function returns TRUE on matching mode and op.
13581 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13582 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13584 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13586 enum rtx_code code;
13587 int val, reg_no;
13589 /* Match: (mem (reg)). */
13590 if (REG_P (op))
13592 int reg_no = REGNO (op);
13593 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13594 ? reg_no <= LAST_LO_REGNUM
13595 : reg_no < LAST_ARM_REGNUM)
13596 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13598 code = GET_CODE (op);
13600 if (code == POST_INC || code == PRE_DEC
13601 || code == PRE_INC || code == POST_DEC)
13603 reg_no = REGNO (XEXP (op, 0));
13604 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13605 ? reg_no <= LAST_LO_REGNUM
13606 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13607 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13609 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13610 && GET_CODE (XEXP (op, 1)) == PLUS
13611 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13612 && REG_P (XEXP (op, 0))
13613 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13614 /* Make sure to only accept PLUS after reload_completed, otherwise
13615 this will interfere with auto_inc's pattern detection. */
13616 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13617 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13619 reg_no = REGNO (XEXP (op, 0));
13620 if (code == PLUS)
13621 val = INTVAL (XEXP (op, 1));
13622 else
13623 val = INTVAL (XEXP(XEXP (op, 1), 1));
13625 switch (mode)
13627 case E_V16QImode:
13628 case E_V8QImode:
13629 case E_V4QImode:
13630 if (abs (val) > 127)
13631 return FALSE;
13632 break;
13633 case E_V8HImode:
13634 case E_V8HFmode:
13635 case E_V4HImode:
13636 case E_V4HFmode:
13637 if (val % 2 != 0 || abs (val) > 254)
13638 return FALSE;
13639 break;
13640 case E_V4SImode:
13641 case E_V4SFmode:
13642 if (val % 4 != 0 || abs (val) > 508)
13643 return FALSE;
13644 break;
13645 default:
13646 return FALSE;
13648 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13649 || (MVE_STN_LDW_MODE (mode)
13650 ? reg_no <= LAST_LO_REGNUM
13651 : (reg_no < LAST_ARM_REGNUM
13652 && (code == PLUS || reg_no != SP_REGNUM))));
13654 return FALSE;
13657 /* Return TRUE if OP is a memory operand which we can load or store a vector
13658 to/from. TYPE is one of the following values:
13659 0 - Vector load/stor (vldr)
13660 1 - Core registers (ldm)
13661 2 - Element/structure loads (vld1)
13664 neon_vector_mem_operand (rtx op, int type, bool strict)
13666 rtx ind;
13668 /* Reject eliminable registers. */
13669 if (strict && ! (reload_in_progress || reload_completed)
13670 && (reg_mentioned_p (frame_pointer_rtx, op)
13671 || reg_mentioned_p (arg_pointer_rtx, op)
13672 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13673 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13674 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13675 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13676 return FALSE;
13678 /* Constants are converted into offsets from labels. */
13679 if (!MEM_P (op))
13680 return FALSE;
13682 ind = XEXP (op, 0);
13684 if (reload_completed
13685 && (LABEL_REF_P (ind)
13686 || (GET_CODE (ind) == CONST
13687 && GET_CODE (XEXP (ind, 0)) == PLUS
13688 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13689 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13690 return TRUE;
13692 /* Match: (mem (reg)). */
13693 if (REG_P (ind))
13694 return arm_address_register_rtx_p (ind, 0);
13696 /* Allow post-increment with Neon registers. */
13697 if ((type != 1 && GET_CODE (ind) == POST_INC)
13698 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13699 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13701 /* Allow post-increment by register for VLDn */
13702 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13703 && GET_CODE (XEXP (ind, 1)) == PLUS
13704 && REG_P (XEXP (XEXP (ind, 1), 1))
13705 && REG_P (XEXP (ind, 0))
13706 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13707 return true;
13709 /* Match:
13710 (plus (reg)
13711 (const)). */
13712 if (type == 0
13713 && GET_CODE (ind) == PLUS
13714 && REG_P (XEXP (ind, 0))
13715 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13716 && CONST_INT_P (XEXP (ind, 1))
13717 && INTVAL (XEXP (ind, 1)) > -1024
13718 /* For quad modes, we restrict the constant offset to be slightly less
13719 than what the instruction format permits. We have no such constraint
13720 on double mode offsets. (This must match arm_legitimate_index_p.) */
13721 && (INTVAL (XEXP (ind, 1))
13722 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13723 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13724 return TRUE;
13726 return FALSE;
13729 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13730 type. */
13732 neon_struct_mem_operand (rtx op)
13734 rtx ind;
13736 /* Reject eliminable registers. */
13737 if (! (reload_in_progress || reload_completed)
13738 && ( reg_mentioned_p (frame_pointer_rtx, op)
13739 || reg_mentioned_p (arg_pointer_rtx, op)
13740 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13741 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13742 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13743 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13744 return FALSE;
13746 /* Constants are converted into offsets from labels. */
13747 if (!MEM_P (op))
13748 return FALSE;
13750 ind = XEXP (op, 0);
13752 if (reload_completed
13753 && (LABEL_REF_P (ind)
13754 || (GET_CODE (ind) == CONST
13755 && GET_CODE (XEXP (ind, 0)) == PLUS
13756 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13757 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13758 return TRUE;
13760 /* Match: (mem (reg)). */
13761 if (REG_P (ind))
13762 return arm_address_register_rtx_p (ind, 0);
13764 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13765 if (GET_CODE (ind) == POST_INC
13766 || GET_CODE (ind) == PRE_DEC)
13767 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13769 return FALSE;
13772 /* Prepares the operands for the VCMLA by lane instruction such that the right
13773 register number is selected. This instruction is special in that it always
13774 requires a D register, however there is a choice to be made between Dn[0],
13775 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13777 The VCMLA by lane function always selects two values. For instance given D0
13778 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13779 used by the instruction. However given V4SF then index 0 and 1 are valid as
13780 D0[0] or D1[0] are both valid.
13782 This function centralizes that information based on OPERANDS, OPERANDS[3]
13783 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13784 updated to contain the right index. */
13786 rtx *
13787 neon_vcmla_lane_prepare_operands (rtx *operands)
13789 int lane = INTVAL (operands[4]);
13790 machine_mode constmode = SImode;
13791 machine_mode mode = GET_MODE (operands[3]);
13792 int regno = REGNO (operands[3]);
13793 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13794 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13796 operands[3] = gen_int_mode (regno + 1, constmode);
13797 operands[4]
13798 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13800 else
13802 operands[3] = gen_int_mode (regno, constmode);
13803 operands[4] = gen_int_mode (lane, constmode);
13805 return operands;
13809 /* Return true if X is a register that will be eliminated later on. */
13811 arm_eliminable_register (rtx x)
13813 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13814 || REGNO (x) == ARG_POINTER_REGNUM
13815 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13816 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13819 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13820 coprocessor registers. Otherwise return NO_REGS. */
13822 enum reg_class
13823 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13825 if (mode == HFmode)
13827 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13828 return GENERAL_REGS;
13829 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13830 return NO_REGS;
13831 return GENERAL_REGS;
13834 /* The neon move patterns handle all legitimate vector and struct
13835 addresses. */
13836 if (TARGET_NEON
13837 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13838 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13839 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13840 || VALID_NEON_STRUCT_MODE (mode)))
13841 return NO_REGS;
13843 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13844 return NO_REGS;
13846 return GENERAL_REGS;
13849 /* Values which must be returned in the most-significant end of the return
13850 register. */
13852 static bool
13853 arm_return_in_msb (const_tree valtype)
13855 return (TARGET_AAPCS_BASED
13856 && BYTES_BIG_ENDIAN
13857 && (AGGREGATE_TYPE_P (valtype)
13858 || TREE_CODE (valtype) == COMPLEX_TYPE
13859 || FIXED_POINT_TYPE_P (valtype)));
13862 /* Return TRUE if X references a SYMBOL_REF. */
13864 symbol_mentioned_p (rtx x)
13866 const char * fmt;
13867 int i;
13869 if (SYMBOL_REF_P (x))
13870 return 1;
13872 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13873 are constant offsets, not symbols. */
13874 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13875 return 0;
13877 fmt = GET_RTX_FORMAT (GET_CODE (x));
13879 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13881 if (fmt[i] == 'E')
13883 int j;
13885 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13886 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13887 return 1;
13889 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13890 return 1;
13893 return 0;
13896 /* Return TRUE if X references a LABEL_REF. */
13898 label_mentioned_p (rtx x)
13900 const char * fmt;
13901 int i;
13903 if (LABEL_REF_P (x))
13904 return 1;
13906 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13907 instruction, but they are constant offsets, not symbols. */
13908 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13909 return 0;
13911 fmt = GET_RTX_FORMAT (GET_CODE (x));
13912 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13914 if (fmt[i] == 'E')
13916 int j;
13918 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13919 if (label_mentioned_p (XVECEXP (x, i, j)))
13920 return 1;
13922 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13923 return 1;
13926 return 0;
13930 tls_mentioned_p (rtx x)
13932 switch (GET_CODE (x))
13934 case CONST:
13935 return tls_mentioned_p (XEXP (x, 0));
13937 case UNSPEC:
13938 if (XINT (x, 1) == UNSPEC_TLS)
13939 return 1;
13941 /* Fall through. */
13942 default:
13943 return 0;
13947 /* Must not copy any rtx that uses a pc-relative address.
13948 Also, disallow copying of load-exclusive instructions that
13949 may appear after splitting of compare-and-swap-style operations
13950 so as to prevent those loops from being transformed away from their
13951 canonical forms (see PR 69904). */
13953 static bool
13954 arm_cannot_copy_insn_p (rtx_insn *insn)
13956 /* The tls call insn cannot be copied, as it is paired with a data
13957 word. */
13958 if (recog_memoized (insn) == CODE_FOR_tlscall)
13959 return true;
13961 subrtx_iterator::array_type array;
13962 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13964 const_rtx x = *iter;
13965 if (GET_CODE (x) == UNSPEC
13966 && (XINT (x, 1) == UNSPEC_PIC_BASE
13967 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13968 return true;
13971 rtx set = single_set (insn);
13972 if (set)
13974 rtx src = SET_SRC (set);
13975 if (GET_CODE (src) == ZERO_EXTEND)
13976 src = XEXP (src, 0);
13978 /* Catch the load-exclusive and load-acquire operations. */
13979 if (GET_CODE (src) == UNSPEC_VOLATILE
13980 && (XINT (src, 1) == VUNSPEC_LL
13981 || XINT (src, 1) == VUNSPEC_LAX))
13982 return true;
13984 return false;
13987 enum rtx_code
13988 minmax_code (rtx x)
13990 enum rtx_code code = GET_CODE (x);
13992 switch (code)
13994 case SMAX:
13995 return GE;
13996 case SMIN:
13997 return LE;
13998 case UMIN:
13999 return LEU;
14000 case UMAX:
14001 return GEU;
14002 default:
14003 gcc_unreachable ();
14007 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14009 bool
14010 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14011 int *mask, bool *signed_sat)
14013 /* The high bound must be a power of two minus one. */
14014 int log = exact_log2 (INTVAL (hi_bound) + 1);
14015 if (log == -1)
14016 return false;
14018 /* The low bound is either zero (for usat) or one less than the
14019 negation of the high bound (for ssat). */
14020 if (INTVAL (lo_bound) == 0)
14022 if (mask)
14023 *mask = log;
14024 if (signed_sat)
14025 *signed_sat = false;
14027 return true;
14030 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14032 if (mask)
14033 *mask = log + 1;
14034 if (signed_sat)
14035 *signed_sat = true;
14037 return true;
14040 return false;
14043 /* Return 1 if memory locations are adjacent. */
14045 adjacent_mem_locations (rtx a, rtx b)
14047 /* We don't guarantee to preserve the order of these memory refs. */
14048 if (volatile_refs_p (a) || volatile_refs_p (b))
14049 return 0;
14051 if ((REG_P (XEXP (a, 0))
14052 || (GET_CODE (XEXP (a, 0)) == PLUS
14053 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14054 && (REG_P (XEXP (b, 0))
14055 || (GET_CODE (XEXP (b, 0)) == PLUS
14056 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14058 HOST_WIDE_INT val0 = 0, val1 = 0;
14059 rtx reg0, reg1;
14060 int val_diff;
14062 if (GET_CODE (XEXP (a, 0)) == PLUS)
14064 reg0 = XEXP (XEXP (a, 0), 0);
14065 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14067 else
14068 reg0 = XEXP (a, 0);
14070 if (GET_CODE (XEXP (b, 0)) == PLUS)
14072 reg1 = XEXP (XEXP (b, 0), 0);
14073 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14075 else
14076 reg1 = XEXP (b, 0);
14078 /* Don't accept any offset that will require multiple
14079 instructions to handle, since this would cause the
14080 arith_adjacentmem pattern to output an overlong sequence. */
14081 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14082 return 0;
14084 /* Don't allow an eliminable register: register elimination can make
14085 the offset too large. */
14086 if (arm_eliminable_register (reg0))
14087 return 0;
14089 val_diff = val1 - val0;
14091 if (arm_ld_sched)
14093 /* If the target has load delay slots, then there's no benefit
14094 to using an ldm instruction unless the offset is zero and
14095 we are optimizing for size. */
14096 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14097 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14098 && (val_diff == 4 || val_diff == -4));
14101 return ((REGNO (reg0) == REGNO (reg1))
14102 && (val_diff == 4 || val_diff == -4));
14105 return 0;
14108 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14109 for load operations, false for store operations. CONSECUTIVE is true
14110 if the register numbers in the operation must be consecutive in the register
14111 bank. RETURN_PC is true if value is to be loaded in PC.
14112 The pattern we are trying to match for load is:
14113 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14114 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14117 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14119 where
14120 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14121 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14122 3. If consecutive is TRUE, then for kth register being loaded,
14123 REGNO (R_dk) = REGNO (R_d0) + k.
14124 The pattern for store is similar. */
14125 bool
14126 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14127 bool consecutive, bool return_pc)
14129 HOST_WIDE_INT count = XVECLEN (op, 0);
14130 rtx reg, mem, addr;
14131 unsigned regno;
14132 unsigned first_regno;
14133 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14134 rtx elt;
14135 bool addr_reg_in_reglist = false;
14136 bool update = false;
14137 int reg_increment;
14138 int offset_adj;
14139 int regs_per_val;
14141 /* If not in SImode, then registers must be consecutive
14142 (e.g., VLDM instructions for DFmode). */
14143 gcc_assert ((mode == SImode) || consecutive);
14144 /* Setting return_pc for stores is illegal. */
14145 gcc_assert (!return_pc || load);
14147 /* Set up the increments and the regs per val based on the mode. */
14148 reg_increment = GET_MODE_SIZE (mode);
14149 regs_per_val = reg_increment / 4;
14150 offset_adj = return_pc ? 1 : 0;
14152 if (count <= 1
14153 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14154 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14155 return false;
14157 /* Check if this is a write-back. */
14158 elt = XVECEXP (op, 0, offset_adj);
14159 if (GET_CODE (SET_SRC (elt)) == PLUS)
14161 i++;
14162 base = 1;
14163 update = true;
14165 /* The offset adjustment must be the number of registers being
14166 popped times the size of a single register. */
14167 if (!REG_P (SET_DEST (elt))
14168 || !REG_P (XEXP (SET_SRC (elt), 0))
14169 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14170 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14171 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14172 ((count - 1 - offset_adj) * reg_increment))
14173 return false;
14176 i = i + offset_adj;
14177 base = base + offset_adj;
14178 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14179 success depends on the type: VLDM can do just one reg,
14180 LDM must do at least two. */
14181 if ((count <= i) && (mode == SImode))
14182 return false;
14184 elt = XVECEXP (op, 0, i - 1);
14185 if (GET_CODE (elt) != SET)
14186 return false;
14188 if (load)
14190 reg = SET_DEST (elt);
14191 mem = SET_SRC (elt);
14193 else
14195 reg = SET_SRC (elt);
14196 mem = SET_DEST (elt);
14199 if (!REG_P (reg) || !MEM_P (mem))
14200 return false;
14202 regno = REGNO (reg);
14203 first_regno = regno;
14204 addr = XEXP (mem, 0);
14205 if (GET_CODE (addr) == PLUS)
14207 if (!CONST_INT_P (XEXP (addr, 1)))
14208 return false;
14210 offset = INTVAL (XEXP (addr, 1));
14211 addr = XEXP (addr, 0);
14214 if (!REG_P (addr))
14215 return false;
14217 /* Don't allow SP to be loaded unless it is also the base register. It
14218 guarantees that SP is reset correctly when an LDM instruction
14219 is interrupted. Otherwise, we might end up with a corrupt stack. */
14220 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14221 return false;
14223 if (regno == REGNO (addr))
14224 addr_reg_in_reglist = true;
14226 for (; i < count; i++)
14228 elt = XVECEXP (op, 0, i);
14229 if (GET_CODE (elt) != SET)
14230 return false;
14232 if (load)
14234 reg = SET_DEST (elt);
14235 mem = SET_SRC (elt);
14237 else
14239 reg = SET_SRC (elt);
14240 mem = SET_DEST (elt);
14243 if (!REG_P (reg)
14244 || GET_MODE (reg) != mode
14245 || REGNO (reg) <= regno
14246 || (consecutive
14247 && (REGNO (reg) !=
14248 (unsigned int) (first_regno + regs_per_val * (i - base))))
14249 /* Don't allow SP to be loaded unless it is also the base register. It
14250 guarantees that SP is reset correctly when an LDM instruction
14251 is interrupted. Otherwise, we might end up with a corrupt stack. */
14252 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14253 || !MEM_P (mem)
14254 || GET_MODE (mem) != mode
14255 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14256 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14257 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14258 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14259 offset + (i - base) * reg_increment))
14260 && (!REG_P (XEXP (mem, 0))
14261 || offset + (i - base) * reg_increment != 0)))
14262 return false;
14264 regno = REGNO (reg);
14265 if (regno == REGNO (addr))
14266 addr_reg_in_reglist = true;
14269 if (load)
14271 if (update && addr_reg_in_reglist)
14272 return false;
14274 /* For Thumb-1, address register is always modified - either by write-back
14275 or by explicit load. If the pattern does not describe an update,
14276 then the address register must be in the list of loaded registers. */
14277 if (TARGET_THUMB1)
14278 return update || addr_reg_in_reglist;
14281 return true;
14284 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14285 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14286 following form:
14288 [(set (reg:SI <N>) (const_int 0))
14289 (set (reg:SI <M>) (const_int 0))
14291 (unspec_volatile [(const_int 0)]
14292 VUNSPEC_CLRM_APSR)
14293 (clobber (reg:CC CC_REGNUM))
14296 Any number (including 0) of set expressions is valid, the volatile unspec is
14297 optional. All registers but SP and PC are allowed and registers must be in
14298 strict increasing order.
14300 To be a valid VSCCLRM pattern, OP must have the following form:
14302 [(unspec_volatile [(const_int 0)]
14303 VUNSPEC_VSCCLRM_VPR)
14304 (set (reg:SF <N>) (const_int 0))
14305 (set (reg:SF <M>) (const_int 0))
14309 As with CLRM, any number (including 0) of set expressions is valid, however
14310 the volatile unspec is mandatory here. Any VFP single-precision register is
14311 accepted but all registers must be consecutive and in increasing order. */
14313 bool
14314 clear_operation_p (rtx op, bool vfp)
14316 unsigned regno;
14317 unsigned last_regno = INVALID_REGNUM;
14318 rtx elt, reg, zero;
14319 int count = XVECLEN (op, 0);
14320 int first_set = vfp ? 1 : 0;
14321 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14323 for (int i = first_set; i < count; i++)
14325 elt = XVECEXP (op, 0, i);
14327 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14329 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14330 || XVECLEN (elt, 0) != 1
14331 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14332 || i != count - 2)
14333 return false;
14335 continue;
14338 if (GET_CODE (elt) == CLOBBER)
14339 continue;
14341 if (GET_CODE (elt) != SET)
14342 return false;
14344 reg = SET_DEST (elt);
14345 zero = SET_SRC (elt);
14347 if (!REG_P (reg)
14348 || GET_MODE (reg) != expected_mode
14349 || zero != CONST0_RTX (SImode))
14350 return false;
14352 regno = REGNO (reg);
14354 if (vfp)
14356 if (i != first_set && regno != last_regno + 1)
14357 return false;
14359 else
14361 if (regno == SP_REGNUM || regno == PC_REGNUM)
14362 return false;
14363 if (i != first_set && regno <= last_regno)
14364 return false;
14367 last_regno = regno;
14370 return true;
14373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14374 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14375 instruction. ADD_OFFSET is nonzero if the base address register needs
14376 to be modified with an add instruction before we can use it. */
14378 static bool
14379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14380 int nops, HOST_WIDE_INT add_offset)
14382 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14383 if the offset isn't small enough. The reason 2 ldrs are faster
14384 is because these ARMs are able to do more than one cache access
14385 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14386 whilst the ARM8 has a double bandwidth cache. This means that
14387 these cores can do both an instruction fetch and a data fetch in
14388 a single cycle, so the trick of calculating the address into a
14389 scratch register (one of the result regs) and then doing a load
14390 multiple actually becomes slower (and no smaller in code size).
14391 That is the transformation
14393 ldr rd1, [rbase + offset]
14394 ldr rd2, [rbase + offset + 4]
14398 add rd1, rbase, offset
14399 ldmia rd1, {rd1, rd2}
14401 produces worse code -- '3 cycles + any stalls on rd2' instead of
14402 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14403 access per cycle, the first sequence could never complete in less
14404 than 6 cycles, whereas the ldm sequence would only take 5 and
14405 would make better use of sequential accesses if not hitting the
14406 cache.
14408 We cheat here and test 'arm_ld_sched' which we currently know to
14409 only be true for the ARM8, ARM9 and StrongARM. If this ever
14410 changes, then the test below needs to be reworked. */
14411 if (nops == 2 && arm_ld_sched && add_offset != 0)
14412 return false;
14414 /* XScale has load-store double instructions, but they have stricter
14415 alignment requirements than load-store multiple, so we cannot
14416 use them.
14418 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14419 the pipeline until completion.
14421 NREGS CYCLES
14427 An ldr instruction takes 1-3 cycles, but does not block the
14428 pipeline.
14430 NREGS CYCLES
14431 1 1-3
14432 2 2-6
14433 3 3-9
14434 4 4-12
14436 Best case ldr will always win. However, the more ldr instructions
14437 we issue, the less likely we are to be able to schedule them well.
14438 Using ldr instructions also increases code size.
14440 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14441 for counts of 3 or 4 regs. */
14442 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14443 return false;
14444 return true;
14447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14448 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14449 an array ORDER which describes the sequence to use when accessing the
14450 offsets that produces an ascending order. In this sequence, each
14451 offset must be larger by exactly 4 than the previous one. ORDER[0]
14452 must have been filled in with the lowest offset by the caller.
14453 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14454 we use to verify that ORDER produces an ascending order of registers.
14455 Return true if it was possible to construct such an order, false if
14456 not. */
14458 static bool
14459 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14460 int *unsorted_regs)
14462 int i;
14463 for (i = 1; i < nops; i++)
14465 int j;
14467 order[i] = order[i - 1];
14468 for (j = 0; j < nops; j++)
14469 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14471 /* We must find exactly one offset that is higher than the
14472 previous one by 4. */
14473 if (order[i] != order[i - 1])
14474 return false;
14475 order[i] = j;
14477 if (order[i] == order[i - 1])
14478 return false;
14479 /* The register numbers must be ascending. */
14480 if (unsorted_regs != NULL
14481 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14482 return false;
14484 return true;
14487 /* Used to determine in a peephole whether a sequence of load
14488 instructions can be changed into a load-multiple instruction.
14489 NOPS is the number of separate load instructions we are examining. The
14490 first NOPS entries in OPERANDS are the destination registers, the
14491 next NOPS entries are memory operands. If this function is
14492 successful, *BASE is set to the common base register of the memory
14493 accesses; *LOAD_OFFSET is set to the first memory location's offset
14494 from that base register.
14495 REGS is an array filled in with the destination register numbers.
14496 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14497 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14498 the sequence of registers in REGS matches the loads from ascending memory
14499 locations, and the function verifies that the register numbers are
14500 themselves ascending. If CHECK_REGS is false, the register numbers
14501 are stored in the order they are found in the operands. */
14502 static int
14503 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14504 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14506 int unsorted_regs[MAX_LDM_STM_OPS];
14507 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14508 int order[MAX_LDM_STM_OPS];
14509 int base_reg = -1;
14510 int i, ldm_case;
14512 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14513 easily extended if required. */
14514 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14516 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14518 /* Loop over the operands and check that the memory references are
14519 suitable (i.e. immediate offsets from the same base register). At
14520 the same time, extract the target register, and the memory
14521 offsets. */
14522 for (i = 0; i < nops; i++)
14524 rtx reg;
14525 rtx offset;
14527 /* Convert a subreg of a mem into the mem itself. */
14528 if (GET_CODE (operands[nops + i]) == SUBREG)
14529 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14531 gcc_assert (MEM_P (operands[nops + i]));
14533 /* Don't reorder volatile memory references; it doesn't seem worth
14534 looking for the case where the order is ok anyway. */
14535 if (MEM_VOLATILE_P (operands[nops + i]))
14536 return 0;
14538 offset = const0_rtx;
14540 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14541 || (SUBREG_P (reg)
14542 && REG_P (reg = SUBREG_REG (reg))))
14543 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14544 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14545 || (SUBREG_P (reg)
14546 && REG_P (reg = SUBREG_REG (reg))))
14547 && (CONST_INT_P (offset
14548 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14550 if (i == 0)
14552 base_reg = REGNO (reg);
14553 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14554 return 0;
14556 else if (base_reg != (int) REGNO (reg))
14557 /* Not addressed from the same base register. */
14558 return 0;
14560 unsorted_regs[i] = (REG_P (operands[i])
14561 ? REGNO (operands[i])
14562 : REGNO (SUBREG_REG (operands[i])));
14564 /* If it isn't an integer register, or if it overwrites the
14565 base register but isn't the last insn in the list, then
14566 we can't do this. */
14567 if (unsorted_regs[i] < 0
14568 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14569 || unsorted_regs[i] > 14
14570 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14571 return 0;
14573 /* Don't allow SP to be loaded unless it is also the base
14574 register. It guarantees that SP is reset correctly when
14575 an LDM instruction is interrupted. Otherwise, we might
14576 end up with a corrupt stack. */
14577 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14578 return 0;
14580 unsorted_offsets[i] = INTVAL (offset);
14581 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14582 order[0] = i;
14584 else
14585 /* Not a suitable memory address. */
14586 return 0;
14589 /* All the useful information has now been extracted from the
14590 operands into unsorted_regs and unsorted_offsets; additionally,
14591 order[0] has been set to the lowest offset in the list. Sort
14592 the offsets into order, verifying that they are adjacent, and
14593 check that the register numbers are ascending. */
14594 if (!compute_offset_order (nops, unsorted_offsets, order,
14595 check_regs ? unsorted_regs : NULL))
14596 return 0;
14598 if (saved_order)
14599 memcpy (saved_order, order, sizeof order);
14601 if (base)
14603 *base = base_reg;
14605 for (i = 0; i < nops; i++)
14606 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14608 *load_offset = unsorted_offsets[order[0]];
14611 if (unsorted_offsets[order[0]] == 0)
14612 ldm_case = 1; /* ldmia */
14613 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14614 ldm_case = 2; /* ldmib */
14615 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14616 ldm_case = 3; /* ldmda */
14617 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14618 ldm_case = 4; /* ldmdb */
14619 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14620 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14621 ldm_case = 5;
14622 else
14623 return 0;
14625 if (!multiple_operation_profitable_p (false, nops,
14626 ldm_case == 5
14627 ? unsorted_offsets[order[0]] : 0))
14628 return 0;
14630 return ldm_case;
14633 /* Used to determine in a peephole whether a sequence of store instructions can
14634 be changed into a store-multiple instruction.
14635 NOPS is the number of separate store instructions we are examining.
14636 NOPS_TOTAL is the total number of instructions recognized by the peephole
14637 pattern.
14638 The first NOPS entries in OPERANDS are the source registers, the next
14639 NOPS entries are memory operands. If this function is successful, *BASE is
14640 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14641 to the first memory location's offset from that base register. REGS is an
14642 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14643 likewise filled with the corresponding rtx's.
14644 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14645 numbers to an ascending order of stores.
14646 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14647 from ascending memory locations, and the function verifies that the register
14648 numbers are themselves ascending. If CHECK_REGS is false, the register
14649 numbers are stored in the order they are found in the operands. */
14650 static int
14651 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14652 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14653 HOST_WIDE_INT *load_offset, bool check_regs)
14655 int unsorted_regs[MAX_LDM_STM_OPS];
14656 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14657 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14658 int order[MAX_LDM_STM_OPS];
14659 int base_reg = -1;
14660 rtx base_reg_rtx = NULL;
14661 int i, stm_case;
14663 /* Write back of base register is currently only supported for Thumb 1. */
14664 int base_writeback = TARGET_THUMB1;
14666 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14667 easily extended if required. */
14668 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14670 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14672 /* Loop over the operands and check that the memory references are
14673 suitable (i.e. immediate offsets from the same base register). At
14674 the same time, extract the target register, and the memory
14675 offsets. */
14676 for (i = 0; i < nops; i++)
14678 rtx reg;
14679 rtx offset;
14681 /* Convert a subreg of a mem into the mem itself. */
14682 if (GET_CODE (operands[nops + i]) == SUBREG)
14683 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14685 gcc_assert (MEM_P (operands[nops + i]));
14687 /* Don't reorder volatile memory references; it doesn't seem worth
14688 looking for the case where the order is ok anyway. */
14689 if (MEM_VOLATILE_P (operands[nops + i]))
14690 return 0;
14692 offset = const0_rtx;
14694 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14695 || (SUBREG_P (reg)
14696 && REG_P (reg = SUBREG_REG (reg))))
14697 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14698 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14699 || (SUBREG_P (reg)
14700 && REG_P (reg = SUBREG_REG (reg))))
14701 && (CONST_INT_P (offset
14702 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14704 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14705 ? operands[i] : SUBREG_REG (operands[i]));
14706 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14708 if (i == 0)
14710 base_reg = REGNO (reg);
14711 base_reg_rtx = reg;
14712 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14713 return 0;
14715 else if (base_reg != (int) REGNO (reg))
14716 /* Not addressed from the same base register. */
14717 return 0;
14719 /* If it isn't an integer register, then we can't do this. */
14720 if (unsorted_regs[i] < 0
14721 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14722 /* The effects are unpredictable if the base register is
14723 both updated and stored. */
14724 || (base_writeback && unsorted_regs[i] == base_reg)
14725 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14726 || unsorted_regs[i] > 14)
14727 return 0;
14729 unsorted_offsets[i] = INTVAL (offset);
14730 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14731 order[0] = i;
14733 else
14734 /* Not a suitable memory address. */
14735 return 0;
14738 /* All the useful information has now been extracted from the
14739 operands into unsorted_regs and unsorted_offsets; additionally,
14740 order[0] has been set to the lowest offset in the list. Sort
14741 the offsets into order, verifying that they are adjacent, and
14742 check that the register numbers are ascending. */
14743 if (!compute_offset_order (nops, unsorted_offsets, order,
14744 check_regs ? unsorted_regs : NULL))
14745 return 0;
14747 if (saved_order)
14748 memcpy (saved_order, order, sizeof order);
14750 if (base)
14752 *base = base_reg;
14754 for (i = 0; i < nops; i++)
14756 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14757 if (reg_rtxs)
14758 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14761 *load_offset = unsorted_offsets[order[0]];
14764 if (TARGET_THUMB1
14765 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14766 return 0;
14768 if (unsorted_offsets[order[0]] == 0)
14769 stm_case = 1; /* stmia */
14770 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14771 stm_case = 2; /* stmib */
14772 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14773 stm_case = 3; /* stmda */
14774 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14775 stm_case = 4; /* stmdb */
14776 else
14777 return 0;
14779 if (!multiple_operation_profitable_p (false, nops, 0))
14780 return 0;
14782 return stm_case;
14785 /* Routines for use in generating RTL. */
14787 /* Generate a load-multiple instruction. COUNT is the number of loads in
14788 the instruction; REGS and MEMS are arrays containing the operands.
14789 BASEREG is the base register to be used in addressing the memory operands.
14790 WBACK_OFFSET is nonzero if the instruction should update the base
14791 register. */
14793 static rtx
14794 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14795 HOST_WIDE_INT wback_offset)
14797 int i = 0, j;
14798 rtx result;
14800 if (!multiple_operation_profitable_p (false, count, 0))
14802 rtx seq;
14804 start_sequence ();
14806 for (i = 0; i < count; i++)
14807 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14809 if (wback_offset != 0)
14810 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14812 seq = get_insns ();
14813 end_sequence ();
14815 return seq;
14818 result = gen_rtx_PARALLEL (VOIDmode,
14819 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14820 if (wback_offset != 0)
14822 XVECEXP (result, 0, 0)
14823 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14824 i = 1;
14825 count++;
14828 for (j = 0; i < count; i++, j++)
14829 XVECEXP (result, 0, i)
14830 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14832 return result;
14835 /* Generate a store-multiple instruction. COUNT is the number of stores in
14836 the instruction; REGS and MEMS are arrays containing the operands.
14837 BASEREG is the base register to be used in addressing the memory operands.
14838 WBACK_OFFSET is nonzero if the instruction should update the base
14839 register. */
14841 static rtx
14842 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14843 HOST_WIDE_INT wback_offset)
14845 int i = 0, j;
14846 rtx result;
14848 if (GET_CODE (basereg) == PLUS)
14849 basereg = XEXP (basereg, 0);
14851 if (!multiple_operation_profitable_p (false, count, 0))
14853 rtx seq;
14855 start_sequence ();
14857 for (i = 0; i < count; i++)
14858 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14860 if (wback_offset != 0)
14861 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14863 seq = get_insns ();
14864 end_sequence ();
14866 return seq;
14869 result = gen_rtx_PARALLEL (VOIDmode,
14870 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14871 if (wback_offset != 0)
14873 XVECEXP (result, 0, 0)
14874 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14875 i = 1;
14876 count++;
14879 for (j = 0; i < count; i++, j++)
14880 XVECEXP (result, 0, i)
14881 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14883 return result;
14886 /* Generate either a load-multiple or a store-multiple instruction. This
14887 function can be used in situations where we can start with a single MEM
14888 rtx and adjust its address upwards.
14889 COUNT is the number of operations in the instruction, not counting a
14890 possible update of the base register. REGS is an array containing the
14891 register operands.
14892 BASEREG is the base register to be used in addressing the memory operands,
14893 which are constructed from BASEMEM.
14894 WRITE_BACK specifies whether the generated instruction should include an
14895 update of the base register.
14896 OFFSETP is used to pass an offset to and from this function; this offset
14897 is not used when constructing the address (instead BASEMEM should have an
14898 appropriate offset in its address), it is used only for setting
14899 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14901 static rtx
14902 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14903 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14905 rtx mems[MAX_LDM_STM_OPS];
14906 HOST_WIDE_INT offset = *offsetp;
14907 int i;
14909 gcc_assert (count <= MAX_LDM_STM_OPS);
14911 if (GET_CODE (basereg) == PLUS)
14912 basereg = XEXP (basereg, 0);
14914 for (i = 0; i < count; i++)
14916 rtx addr = plus_constant (Pmode, basereg, i * 4);
14917 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14918 offset += 4;
14921 if (write_back)
14922 *offsetp = offset;
14924 if (is_load)
14925 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14926 write_back ? 4 * count : 0);
14927 else
14928 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14929 write_back ? 4 * count : 0);
14933 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14934 rtx basemem, HOST_WIDE_INT *offsetp)
14936 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14937 offsetp);
14941 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14942 rtx basemem, HOST_WIDE_INT *offsetp)
14944 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14945 offsetp);
14948 /* Called from a peephole2 expander to turn a sequence of loads into an
14949 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14950 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14951 is true if we can reorder the registers because they are used commutatively
14952 subsequently.
14953 Returns true iff we could generate a new instruction. */
14955 bool
14956 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14958 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14959 rtx mems[MAX_LDM_STM_OPS];
14960 int i, j, base_reg;
14961 rtx base_reg_rtx;
14962 HOST_WIDE_INT offset;
14963 int write_back = FALSE;
14964 int ldm_case;
14965 rtx addr;
14967 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14968 &base_reg, &offset, !sort_regs);
14970 if (ldm_case == 0)
14971 return false;
14973 if (sort_regs)
14974 for (i = 0; i < nops - 1; i++)
14975 for (j = i + 1; j < nops; j++)
14976 if (regs[i] > regs[j])
14978 int t = regs[i];
14979 regs[i] = regs[j];
14980 regs[j] = t;
14982 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14984 if (TARGET_THUMB1)
14986 gcc_assert (ldm_case == 1 || ldm_case == 5);
14988 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14989 write_back = true;
14990 for (i = 0; i < nops; i++)
14991 if (base_reg == regs[i])
14992 write_back = false;
14994 /* Ensure the base is dead if it is updated. */
14995 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14996 return false;
14999 if (ldm_case == 5)
15001 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15002 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15003 offset = 0;
15004 base_reg_rtx = newbase;
15007 for (i = 0; i < nops; i++)
15009 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15010 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15011 SImode, addr, 0);
15013 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15014 write_back ? offset + i * 4 : 0));
15015 return true;
15018 /* Called from a peephole2 expander to turn a sequence of stores into an
15019 STM instruction. OPERANDS are the operands found by the peephole matcher;
15020 NOPS indicates how many separate stores we are trying to combine.
15021 Returns true iff we could generate a new instruction. */
15023 bool
15024 gen_stm_seq (rtx *operands, int nops)
15026 int i;
15027 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15028 rtx mems[MAX_LDM_STM_OPS];
15029 int base_reg;
15030 rtx base_reg_rtx;
15031 HOST_WIDE_INT offset;
15032 int write_back = FALSE;
15033 int stm_case;
15034 rtx addr;
15035 bool base_reg_dies;
15037 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15038 mem_order, &base_reg, &offset, true);
15040 if (stm_case == 0)
15041 return false;
15043 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15045 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15046 if (TARGET_THUMB1)
15048 gcc_assert (base_reg_dies);
15049 write_back = TRUE;
15052 if (stm_case == 5)
15054 gcc_assert (base_reg_dies);
15055 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15056 offset = 0;
15059 addr = plus_constant (Pmode, base_reg_rtx, offset);
15061 for (i = 0; i < nops; i++)
15063 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15064 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15065 SImode, addr, 0);
15067 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15068 write_back ? offset + i * 4 : 0));
15069 return true;
15072 /* Called from a peephole2 expander to turn a sequence of stores that are
15073 preceded by constant loads into an STM instruction. OPERANDS are the
15074 operands found by the peephole matcher; NOPS indicates how many
15075 separate stores we are trying to combine; there are 2 * NOPS
15076 instructions in the peephole.
15077 Returns true iff we could generate a new instruction. */
15079 bool
15080 gen_const_stm_seq (rtx *operands, int nops)
15082 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15083 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15084 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15085 rtx mems[MAX_LDM_STM_OPS];
15086 int base_reg;
15087 rtx base_reg_rtx;
15088 HOST_WIDE_INT offset;
15089 int write_back = FALSE;
15090 int stm_case;
15091 rtx addr;
15092 bool base_reg_dies;
15093 int i, j;
15094 HARD_REG_SET allocated;
15096 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15097 mem_order, &base_reg, &offset, false);
15099 if (stm_case == 0)
15100 return false;
15102 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15104 /* If the same register is used more than once, try to find a free
15105 register. */
15106 CLEAR_HARD_REG_SET (allocated);
15107 for (i = 0; i < nops; i++)
15109 for (j = i + 1; j < nops; j++)
15110 if (regs[i] == regs[j])
15112 rtx t = peep2_find_free_register (0, nops * 2,
15113 TARGET_THUMB1 ? "l" : "r",
15114 SImode, &allocated);
15115 if (t == NULL_RTX)
15116 return false;
15117 reg_rtxs[i] = t;
15118 regs[i] = REGNO (t);
15122 /* Compute an ordering that maps the register numbers to an ascending
15123 sequence. */
15124 reg_order[0] = 0;
15125 for (i = 0; i < nops; i++)
15126 if (regs[i] < regs[reg_order[0]])
15127 reg_order[0] = i;
15129 for (i = 1; i < nops; i++)
15131 int this_order = reg_order[i - 1];
15132 for (j = 0; j < nops; j++)
15133 if (regs[j] > regs[reg_order[i - 1]]
15134 && (this_order == reg_order[i - 1]
15135 || regs[j] < regs[this_order]))
15136 this_order = j;
15137 reg_order[i] = this_order;
15140 /* Ensure that registers that must be live after the instruction end
15141 up with the correct value. */
15142 for (i = 0; i < nops; i++)
15144 int this_order = reg_order[i];
15145 if ((this_order != mem_order[i]
15146 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15147 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15148 return false;
15151 /* Load the constants. */
15152 for (i = 0; i < nops; i++)
15154 rtx op = operands[2 * nops + mem_order[i]];
15155 sorted_regs[i] = regs[reg_order[i]];
15156 emit_move_insn (reg_rtxs[reg_order[i]], op);
15159 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15161 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15162 if (TARGET_THUMB1)
15164 gcc_assert (base_reg_dies);
15165 write_back = TRUE;
15168 if (stm_case == 5)
15170 gcc_assert (base_reg_dies);
15171 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15172 offset = 0;
15175 addr = plus_constant (Pmode, base_reg_rtx, offset);
15177 for (i = 0; i < nops; i++)
15179 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15180 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15181 SImode, addr, 0);
15183 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15184 write_back ? offset + i * 4 : 0));
15185 return true;
15188 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15189 unaligned copies on processors which support unaligned semantics for those
15190 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15191 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15192 An interleave factor of 1 (the minimum) will perform no interleaving.
15193 Load/store multiple are used for aligned addresses where possible. */
15195 static void
15196 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15197 HOST_WIDE_INT length,
15198 unsigned int interleave_factor)
15200 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15201 int *regnos = XALLOCAVEC (int, interleave_factor);
15202 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15203 HOST_WIDE_INT i, j;
15204 HOST_WIDE_INT remaining = length, words;
15205 rtx halfword_tmp = NULL, byte_tmp = NULL;
15206 rtx dst, src;
15207 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15208 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15209 HOST_WIDE_INT srcoffset, dstoffset;
15210 HOST_WIDE_INT src_autoinc, dst_autoinc;
15211 rtx mem, addr;
15213 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15215 /* Use hard registers if we have aligned source or destination so we can use
15216 load/store multiple with contiguous registers. */
15217 if (dst_aligned || src_aligned)
15218 for (i = 0; i < interleave_factor; i++)
15219 regs[i] = gen_rtx_REG (SImode, i);
15220 else
15221 for (i = 0; i < interleave_factor; i++)
15222 regs[i] = gen_reg_rtx (SImode);
15224 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15225 src = copy_addr_to_reg (XEXP (srcbase, 0));
15227 srcoffset = dstoffset = 0;
15229 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15230 For copying the last bytes we want to subtract this offset again. */
15231 src_autoinc = dst_autoinc = 0;
15233 for (i = 0; i < interleave_factor; i++)
15234 regnos[i] = i;
15236 /* Copy BLOCK_SIZE_BYTES chunks. */
15238 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15240 /* Load words. */
15241 if (src_aligned && interleave_factor > 1)
15243 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15244 TRUE, srcbase, &srcoffset));
15245 src_autoinc += UNITS_PER_WORD * interleave_factor;
15247 else
15249 for (j = 0; j < interleave_factor; j++)
15251 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15252 - src_autoinc));
15253 mem = adjust_automodify_address (srcbase, SImode, addr,
15254 srcoffset + j * UNITS_PER_WORD);
15255 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15257 srcoffset += block_size_bytes;
15260 /* Store words. */
15261 if (dst_aligned && interleave_factor > 1)
15263 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15264 TRUE, dstbase, &dstoffset));
15265 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15267 else
15269 for (j = 0; j < interleave_factor; j++)
15271 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15272 - dst_autoinc));
15273 mem = adjust_automodify_address (dstbase, SImode, addr,
15274 dstoffset + j * UNITS_PER_WORD);
15275 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15277 dstoffset += block_size_bytes;
15280 remaining -= block_size_bytes;
15283 /* Copy any whole words left (note these aren't interleaved with any
15284 subsequent halfword/byte load/stores in the interests of simplicity). */
15286 words = remaining / UNITS_PER_WORD;
15288 gcc_assert (words < interleave_factor);
15290 if (src_aligned && words > 1)
15292 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15293 &srcoffset));
15294 src_autoinc += UNITS_PER_WORD * words;
15296 else
15298 for (j = 0; j < words; j++)
15300 addr = plus_constant (Pmode, src,
15301 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15302 mem = adjust_automodify_address (srcbase, SImode, addr,
15303 srcoffset + j * UNITS_PER_WORD);
15304 if (src_aligned)
15305 emit_move_insn (regs[j], mem);
15306 else
15307 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15309 srcoffset += words * UNITS_PER_WORD;
15312 if (dst_aligned && words > 1)
15314 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15315 &dstoffset));
15316 dst_autoinc += words * UNITS_PER_WORD;
15318 else
15320 for (j = 0; j < words; j++)
15322 addr = plus_constant (Pmode, dst,
15323 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15324 mem = adjust_automodify_address (dstbase, SImode, addr,
15325 dstoffset + j * UNITS_PER_WORD);
15326 if (dst_aligned)
15327 emit_move_insn (mem, regs[j]);
15328 else
15329 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15331 dstoffset += words * UNITS_PER_WORD;
15334 remaining -= words * UNITS_PER_WORD;
15336 gcc_assert (remaining < 4);
15338 /* Copy a halfword if necessary. */
15340 if (remaining >= 2)
15342 halfword_tmp = gen_reg_rtx (SImode);
15344 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15345 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15346 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15348 /* Either write out immediately, or delay until we've loaded the last
15349 byte, depending on interleave factor. */
15350 if (interleave_factor == 1)
15352 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15353 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15354 emit_insn (gen_unaligned_storehi (mem,
15355 gen_lowpart (HImode, halfword_tmp)));
15356 halfword_tmp = NULL;
15357 dstoffset += 2;
15360 remaining -= 2;
15361 srcoffset += 2;
15364 gcc_assert (remaining < 2);
15366 /* Copy last byte. */
15368 if ((remaining & 1) != 0)
15370 byte_tmp = gen_reg_rtx (SImode);
15372 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15373 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15374 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15376 if (interleave_factor == 1)
15378 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15379 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15380 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15381 byte_tmp = NULL;
15382 dstoffset++;
15385 remaining--;
15386 srcoffset++;
15389 /* Store last halfword if we haven't done so already. */
15391 if (halfword_tmp)
15393 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15394 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15395 emit_insn (gen_unaligned_storehi (mem,
15396 gen_lowpart (HImode, halfword_tmp)));
15397 dstoffset += 2;
15400 /* Likewise for last byte. */
15402 if (byte_tmp)
15404 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15405 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15406 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15407 dstoffset++;
15410 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15413 /* From mips_adjust_block_mem:
15415 Helper function for doing a loop-based block operation on memory
15416 reference MEM. Each iteration of the loop will operate on LENGTH
15417 bytes of MEM.
15419 Create a new base register for use within the loop and point it to
15420 the start of MEM. Create a new memory reference that uses this
15421 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15423 static void
15424 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15425 rtx *loop_mem)
15427 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15429 /* Although the new mem does not refer to a known location,
15430 it does keep up to LENGTH bytes of alignment. */
15431 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15432 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15435 /* From mips_block_move_loop:
15437 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15438 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15439 the memory regions do not overlap. */
15441 static void
15442 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15443 unsigned int interleave_factor,
15444 HOST_WIDE_INT bytes_per_iter)
15446 rtx src_reg, dest_reg, final_src, test;
15447 HOST_WIDE_INT leftover;
15449 leftover = length % bytes_per_iter;
15450 length -= leftover;
15452 /* Create registers and memory references for use within the loop. */
15453 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15454 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15456 /* Calculate the value that SRC_REG should have after the last iteration of
15457 the loop. */
15458 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15459 0, 0, OPTAB_WIDEN);
15461 /* Emit the start of the loop. */
15462 rtx_code_label *label = gen_label_rtx ();
15463 emit_label (label);
15465 /* Emit the loop body. */
15466 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15467 interleave_factor);
15469 /* Move on to the next block. */
15470 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15471 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15473 /* Emit the loop condition. */
15474 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15475 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15477 /* Mop up any left-over bytes. */
15478 if (leftover)
15479 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15482 /* Emit a block move when either the source or destination is unaligned (not
15483 aligned to a four-byte boundary). This may need further tuning depending on
15484 core type, optimize_size setting, etc. */
15486 static int
15487 arm_cpymemqi_unaligned (rtx *operands)
15489 HOST_WIDE_INT length = INTVAL (operands[2]);
15491 if (optimize_size)
15493 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15494 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15495 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15496 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15497 or dst_aligned though: allow more interleaving in those cases since the
15498 resulting code can be smaller. */
15499 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15500 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15502 if (length > 12)
15503 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15504 interleave_factor, bytes_per_iter);
15505 else
15506 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15507 interleave_factor);
15509 else
15511 /* Note that the loop created by arm_block_move_unaligned_loop may be
15512 subject to loop unrolling, which makes tuning this condition a little
15513 redundant. */
15514 if (length > 32)
15515 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15516 else
15517 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15520 return 1;
15524 arm_gen_cpymemqi (rtx *operands)
15526 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15527 HOST_WIDE_INT srcoffset, dstoffset;
15528 rtx src, dst, srcbase, dstbase;
15529 rtx part_bytes_reg = NULL;
15530 rtx mem;
15532 if (!CONST_INT_P (operands[2])
15533 || !CONST_INT_P (operands[3])
15534 || INTVAL (operands[2]) > 64)
15535 return 0;
15537 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15538 return arm_cpymemqi_unaligned (operands);
15540 if (INTVAL (operands[3]) & 3)
15541 return 0;
15543 dstbase = operands[0];
15544 srcbase = operands[1];
15546 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15547 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15549 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15550 out_words_to_go = INTVAL (operands[2]) / 4;
15551 last_bytes = INTVAL (operands[2]) & 3;
15552 dstoffset = srcoffset = 0;
15554 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15555 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15557 while (in_words_to_go >= 2)
15559 if (in_words_to_go > 4)
15560 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15561 TRUE, srcbase, &srcoffset));
15562 else
15563 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15564 src, FALSE, srcbase,
15565 &srcoffset));
15567 if (out_words_to_go)
15569 if (out_words_to_go > 4)
15570 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15571 TRUE, dstbase, &dstoffset));
15572 else if (out_words_to_go != 1)
15573 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15574 out_words_to_go, dst,
15575 (last_bytes == 0
15576 ? FALSE : TRUE),
15577 dstbase, &dstoffset));
15578 else
15580 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15581 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15582 if (last_bytes != 0)
15584 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15585 dstoffset += 4;
15590 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15591 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15594 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15595 if (out_words_to_go)
15597 rtx sreg;
15599 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15600 sreg = copy_to_reg (mem);
15602 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15603 emit_move_insn (mem, sreg);
15604 in_words_to_go--;
15606 gcc_assert (!in_words_to_go); /* Sanity check */
15609 if (in_words_to_go)
15611 gcc_assert (in_words_to_go > 0);
15613 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15614 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15617 gcc_assert (!last_bytes || part_bytes_reg);
15619 if (BYTES_BIG_ENDIAN && last_bytes)
15621 rtx tmp = gen_reg_rtx (SImode);
15623 /* The bytes we want are in the top end of the word. */
15624 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15625 GEN_INT (8 * (4 - last_bytes))));
15626 part_bytes_reg = tmp;
15628 while (last_bytes)
15630 mem = adjust_automodify_address (dstbase, QImode,
15631 plus_constant (Pmode, dst,
15632 last_bytes - 1),
15633 dstoffset + last_bytes - 1);
15634 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15636 if (--last_bytes)
15638 tmp = gen_reg_rtx (SImode);
15639 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15640 part_bytes_reg = tmp;
15645 else
15647 if (last_bytes > 1)
15649 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15650 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15651 last_bytes -= 2;
15652 if (last_bytes)
15654 rtx tmp = gen_reg_rtx (SImode);
15655 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15656 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15657 part_bytes_reg = tmp;
15658 dstoffset += 2;
15662 if (last_bytes)
15664 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15665 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15669 return 1;
15672 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15673 by mode size. */
15674 inline static rtx
15675 next_consecutive_mem (rtx mem)
15677 machine_mode mode = GET_MODE (mem);
15678 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15679 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15681 return adjust_automodify_address (mem, mode, addr, offset);
15684 /* Copy using LDRD/STRD instructions whenever possible.
15685 Returns true upon success. */
15686 bool
15687 gen_cpymem_ldrd_strd (rtx *operands)
15689 unsigned HOST_WIDE_INT len;
15690 HOST_WIDE_INT align;
15691 rtx src, dst, base;
15692 rtx reg0;
15693 bool src_aligned, dst_aligned;
15694 bool src_volatile, dst_volatile;
15696 gcc_assert (CONST_INT_P (operands[2]));
15697 gcc_assert (CONST_INT_P (operands[3]));
15699 len = UINTVAL (operands[2]);
15700 if (len > 64)
15701 return false;
15703 /* Maximum alignment we can assume for both src and dst buffers. */
15704 align = INTVAL (operands[3]);
15706 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15707 return false;
15709 /* Place src and dst addresses in registers
15710 and update the corresponding mem rtx. */
15711 dst = operands[0];
15712 dst_volatile = MEM_VOLATILE_P (dst);
15713 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15714 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15715 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15717 src = operands[1];
15718 src_volatile = MEM_VOLATILE_P (src);
15719 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15720 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15721 src = adjust_automodify_address (src, VOIDmode, base, 0);
15723 if (!unaligned_access && !(src_aligned && dst_aligned))
15724 return false;
15726 if (src_volatile || dst_volatile)
15727 return false;
15729 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15730 if (!(dst_aligned || src_aligned))
15731 return arm_gen_cpymemqi (operands);
15733 /* If the either src or dst is unaligned we'll be accessing it as pairs
15734 of unaligned SImode accesses. Otherwise we can generate DImode
15735 ldrd/strd instructions. */
15736 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15737 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15739 while (len >= 8)
15741 len -= 8;
15742 reg0 = gen_reg_rtx (DImode);
15743 rtx first_reg = NULL_RTX;
15744 rtx second_reg = NULL_RTX;
15746 if (!src_aligned || !dst_aligned)
15748 if (BYTES_BIG_ENDIAN)
15750 second_reg = gen_lowpart (SImode, reg0);
15751 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15753 else
15755 first_reg = gen_lowpart (SImode, reg0);
15756 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15759 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15760 emit_move_insn (reg0, src);
15761 else if (src_aligned)
15762 emit_insn (gen_unaligned_loaddi (reg0, src));
15763 else
15765 emit_insn (gen_unaligned_loadsi (first_reg, src));
15766 src = next_consecutive_mem (src);
15767 emit_insn (gen_unaligned_loadsi (second_reg, src));
15770 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15771 emit_move_insn (dst, reg0);
15772 else if (dst_aligned)
15773 emit_insn (gen_unaligned_storedi (dst, reg0));
15774 else
15776 emit_insn (gen_unaligned_storesi (dst, first_reg));
15777 dst = next_consecutive_mem (dst);
15778 emit_insn (gen_unaligned_storesi (dst, second_reg));
15781 src = next_consecutive_mem (src);
15782 dst = next_consecutive_mem (dst);
15785 gcc_assert (len < 8);
15786 if (len >= 4)
15788 /* More than a word but less than a double-word to copy. Copy a word. */
15789 reg0 = gen_reg_rtx (SImode);
15790 src = adjust_address (src, SImode, 0);
15791 dst = adjust_address (dst, SImode, 0);
15792 if (src_aligned)
15793 emit_move_insn (reg0, src);
15794 else
15795 emit_insn (gen_unaligned_loadsi (reg0, src));
15797 if (dst_aligned)
15798 emit_move_insn (dst, reg0);
15799 else
15800 emit_insn (gen_unaligned_storesi (dst, reg0));
15802 src = next_consecutive_mem (src);
15803 dst = next_consecutive_mem (dst);
15804 len -= 4;
15807 if (len == 0)
15808 return true;
15810 /* Copy the remaining bytes. */
15811 if (len >= 2)
15813 dst = adjust_address (dst, HImode, 0);
15814 src = adjust_address (src, HImode, 0);
15815 reg0 = gen_reg_rtx (SImode);
15816 if (src_aligned)
15817 emit_insn (gen_zero_extendhisi2 (reg0, src));
15818 else
15819 emit_insn (gen_unaligned_loadhiu (reg0, src));
15821 if (dst_aligned)
15822 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15823 else
15824 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15826 src = next_consecutive_mem (src);
15827 dst = next_consecutive_mem (dst);
15828 if (len == 2)
15829 return true;
15832 dst = adjust_address (dst, QImode, 0);
15833 src = adjust_address (src, QImode, 0);
15834 reg0 = gen_reg_rtx (QImode);
15835 emit_move_insn (reg0, src);
15836 emit_move_insn (dst, reg0);
15837 return true;
15840 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15841 into its component 32-bit subregs. OP2 may be an immediate
15842 constant and we want to simplify it in that case. */
15843 void
15844 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15845 rtx *lo_op2, rtx *hi_op2)
15847 *lo_op1 = gen_lowpart (SImode, op1);
15848 *hi_op1 = gen_highpart (SImode, op1);
15849 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15850 subreg_lowpart_offset (SImode, DImode));
15851 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15852 subreg_highpart_offset (SImode, DImode));
15855 /* Select a dominance comparison mode if possible for a test of the general
15856 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15857 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15858 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15859 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15860 In all cases OP will be either EQ or NE, but we don't need to know which
15861 here. If we are unable to support a dominance comparison we return
15862 CC mode. This will then fail to match for the RTL expressions that
15863 generate this call. */
15864 machine_mode
15865 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15867 enum rtx_code cond1, cond2;
15868 int swapped = 0;
15870 /* Currently we will probably get the wrong result if the individual
15871 comparisons are not simple. This also ensures that it is safe to
15872 reverse a comparison if necessary. */
15873 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15874 != CCmode)
15875 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15876 != CCmode))
15877 return CCmode;
15879 /* The if_then_else variant of this tests the second condition if the
15880 first passes, but is true if the first fails. Reverse the first
15881 condition to get a true "inclusive-or" expression. */
15882 if (cond_or == DOM_CC_NX_OR_Y)
15883 cond1 = reverse_condition (cond1);
15885 /* If the comparisons are not equal, and one doesn't dominate the other,
15886 then we can't do this. */
15887 if (cond1 != cond2
15888 && !comparison_dominates_p (cond1, cond2)
15889 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15890 return CCmode;
15892 if (swapped)
15893 std::swap (cond1, cond2);
15895 switch (cond1)
15897 case EQ:
15898 if (cond_or == DOM_CC_X_AND_Y)
15899 return CC_DEQmode;
15901 switch (cond2)
15903 case EQ: return CC_DEQmode;
15904 case LE: return CC_DLEmode;
15905 case LEU: return CC_DLEUmode;
15906 case GE: return CC_DGEmode;
15907 case GEU: return CC_DGEUmode;
15908 default: gcc_unreachable ();
15911 case LT:
15912 if (cond_or == DOM_CC_X_AND_Y)
15913 return CC_DLTmode;
15915 switch (cond2)
15917 case LT:
15918 return CC_DLTmode;
15919 case LE:
15920 return CC_DLEmode;
15921 case NE:
15922 return CC_DNEmode;
15923 default:
15924 gcc_unreachable ();
15927 case GT:
15928 if (cond_or == DOM_CC_X_AND_Y)
15929 return CC_DGTmode;
15931 switch (cond2)
15933 case GT:
15934 return CC_DGTmode;
15935 case GE:
15936 return CC_DGEmode;
15937 case NE:
15938 return CC_DNEmode;
15939 default:
15940 gcc_unreachable ();
15943 case LTU:
15944 if (cond_or == DOM_CC_X_AND_Y)
15945 return CC_DLTUmode;
15947 switch (cond2)
15949 case LTU:
15950 return CC_DLTUmode;
15951 case LEU:
15952 return CC_DLEUmode;
15953 case NE:
15954 return CC_DNEmode;
15955 default:
15956 gcc_unreachable ();
15959 case GTU:
15960 if (cond_or == DOM_CC_X_AND_Y)
15961 return CC_DGTUmode;
15963 switch (cond2)
15965 case GTU:
15966 return CC_DGTUmode;
15967 case GEU:
15968 return CC_DGEUmode;
15969 case NE:
15970 return CC_DNEmode;
15971 default:
15972 gcc_unreachable ();
15975 /* The remaining cases only occur when both comparisons are the
15976 same. */
15977 case NE:
15978 gcc_assert (cond1 == cond2);
15979 return CC_DNEmode;
15981 case LE:
15982 gcc_assert (cond1 == cond2);
15983 return CC_DLEmode;
15985 case GE:
15986 gcc_assert (cond1 == cond2);
15987 return CC_DGEmode;
15989 case LEU:
15990 gcc_assert (cond1 == cond2);
15991 return CC_DLEUmode;
15993 case GEU:
15994 gcc_assert (cond1 == cond2);
15995 return CC_DGEUmode;
15997 default:
15998 gcc_unreachable ();
16002 machine_mode
16003 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16005 /* All floating point compares return CCFP if it is an equality
16006 comparison, and CCFPE otherwise. */
16007 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16009 switch (op)
16011 case EQ:
16012 case NE:
16013 case UNORDERED:
16014 case ORDERED:
16015 case UNLT:
16016 case UNLE:
16017 case UNGT:
16018 case UNGE:
16019 case UNEQ:
16020 case LTGT:
16021 return CCFPmode;
16023 case LT:
16024 case LE:
16025 case GT:
16026 case GE:
16027 return CCFPEmode;
16029 default:
16030 gcc_unreachable ();
16034 /* A compare with a shifted operand. Because of canonicalization, the
16035 comparison will have to be swapped when we emit the assembler. */
16036 if (GET_MODE (y) == SImode
16037 && (REG_P (y) || (SUBREG_P (y)))
16038 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16039 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16040 || GET_CODE (x) == ROTATERT))
16041 return CC_SWPmode;
16043 /* A widened compare of the sum of a value plus a carry against a
16044 constant. This is a representation of RSC. We want to swap the
16045 result of the comparison at output. Not valid if the Z bit is
16046 needed. */
16047 if (GET_MODE (x) == DImode
16048 && GET_CODE (x) == PLUS
16049 && arm_borrow_operation (XEXP (x, 1), DImode)
16050 && CONST_INT_P (y)
16051 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16052 && (op == LE || op == GT))
16053 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16054 && (op == LEU || op == GTU))))
16055 return CC_SWPmode;
16057 /* If X is a constant we want to use CC_RSBmode. This is
16058 non-canonical, but arm_gen_compare_reg uses this to generate the
16059 correct canonical form. */
16060 if (GET_MODE (y) == SImode
16061 && (REG_P (y) || SUBREG_P (y))
16062 && CONST_INT_P (x))
16063 return CC_RSBmode;
16065 /* This operation is performed swapped, but since we only rely on the Z
16066 flag we don't need an additional mode. */
16067 if (GET_MODE (y) == SImode
16068 && (REG_P (y) || (SUBREG_P (y)))
16069 && GET_CODE (x) == NEG
16070 && (op == EQ || op == NE))
16071 return CC_Zmode;
16073 /* This is a special case that is used by combine to allow a
16074 comparison of a shifted byte load to be split into a zero-extend
16075 followed by a comparison of the shifted integer (only valid for
16076 equalities and unsigned inequalities). */
16077 if (GET_MODE (x) == SImode
16078 && GET_CODE (x) == ASHIFT
16079 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16080 && GET_CODE (XEXP (x, 0)) == SUBREG
16081 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16082 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16083 && (op == EQ || op == NE
16084 || op == GEU || op == GTU || op == LTU || op == LEU)
16085 && CONST_INT_P (y))
16086 return CC_Zmode;
16088 /* A construct for a conditional compare, if the false arm contains
16089 0, then both conditions must be true, otherwise either condition
16090 must be true. Not all conditions are possible, so CCmode is
16091 returned if it can't be done. */
16092 if (GET_CODE (x) == IF_THEN_ELSE
16093 && (XEXP (x, 2) == const0_rtx
16094 || XEXP (x, 2) == const1_rtx)
16095 && COMPARISON_P (XEXP (x, 0))
16096 && COMPARISON_P (XEXP (x, 1)))
16097 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16098 INTVAL (XEXP (x, 2)));
16100 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16101 if (GET_CODE (x) == AND
16102 && (op == EQ || op == NE)
16103 && COMPARISON_P (XEXP (x, 0))
16104 && COMPARISON_P (XEXP (x, 1)))
16105 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16106 DOM_CC_X_AND_Y);
16108 if (GET_CODE (x) == IOR
16109 && (op == EQ || op == NE)
16110 && COMPARISON_P (XEXP (x, 0))
16111 && COMPARISON_P (XEXP (x, 1)))
16112 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16113 DOM_CC_X_OR_Y);
16115 /* An operation (on Thumb) where we want to test for a single bit.
16116 This is done by shifting that bit up into the top bit of a
16117 scratch register; we can then branch on the sign bit. */
16118 if (TARGET_THUMB1
16119 && GET_MODE (x) == SImode
16120 && (op == EQ || op == NE)
16121 && GET_CODE (x) == ZERO_EXTRACT
16122 && XEXP (x, 1) == const1_rtx)
16123 return CC_Nmode;
16125 /* An operation that sets the condition codes as a side-effect, the
16126 V flag is not set correctly, so we can only use comparisons where
16127 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16128 instead.) */
16129 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16130 if (GET_MODE (x) == SImode
16131 && y == const0_rtx
16132 && (op == EQ || op == NE || op == LT || op == GE)
16133 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16134 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16135 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16136 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16137 || GET_CODE (x) == LSHIFTRT
16138 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16139 || GET_CODE (x) == ROTATERT
16140 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16141 return CC_NZmode;
16143 /* A comparison of ~reg with a const is really a special
16144 canoncialization of compare (~const, reg), which is a reverse
16145 subtract operation. We may not get here if CONST is 0, but that
16146 doesn't matter because ~0 isn't a valid immediate for RSB. */
16147 if (GET_MODE (x) == SImode
16148 && GET_CODE (x) == NOT
16149 && CONST_INT_P (y))
16150 return CC_RSBmode;
16152 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16153 return CC_Zmode;
16155 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16156 && GET_CODE (x) == PLUS
16157 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16158 return CC_Cmode;
16160 if (GET_MODE (x) == DImode
16161 && GET_CODE (x) == PLUS
16162 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16163 && CONST_INT_P (y)
16164 && UINTVAL (y) == 0x800000000
16165 && (op == GEU || op == LTU))
16166 return CC_ADCmode;
16168 if (GET_MODE (x) == DImode
16169 && (op == GE || op == LT)
16170 && GET_CODE (x) == SIGN_EXTEND
16171 && ((GET_CODE (y) == PLUS
16172 && arm_borrow_operation (XEXP (y, 0), DImode))
16173 || arm_borrow_operation (y, DImode)))
16174 return CC_NVmode;
16176 if (GET_MODE (x) == DImode
16177 && (op == GEU || op == LTU)
16178 && GET_CODE (x) == ZERO_EXTEND
16179 && ((GET_CODE (y) == PLUS
16180 && arm_borrow_operation (XEXP (y, 0), DImode))
16181 || arm_borrow_operation (y, DImode)))
16182 return CC_Bmode;
16184 if (GET_MODE (x) == DImode
16185 && (op == EQ || op == NE)
16186 && (GET_CODE (x) == PLUS
16187 || GET_CODE (x) == MINUS)
16188 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16189 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16190 && GET_CODE (y) == SIGN_EXTEND
16191 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16192 return CC_Vmode;
16194 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16195 return GET_MODE (x);
16197 return CCmode;
16200 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16201 the sequence of instructions needed to generate a suitable condition
16202 code register. Return the CC register result. */
16203 static rtx
16204 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16206 machine_mode mode;
16207 rtx cc_reg;
16209 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16210 gcc_assert (TARGET_32BIT);
16211 gcc_assert (!CONST_INT_P (x));
16213 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16214 subreg_lowpart_offset (SImode, DImode));
16215 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16216 subreg_highpart_offset (SImode, DImode));
16217 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16218 subreg_lowpart_offset (SImode, DImode));
16219 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16220 subreg_highpart_offset (SImode, DImode));
16221 switch (code)
16223 case EQ:
16224 case NE:
16226 if (y_lo == const0_rtx || y_hi == const0_rtx)
16228 if (y_lo != const0_rtx)
16230 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16232 gcc_assert (y_hi == const0_rtx);
16233 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16234 if (!arm_add_operand (y_lo, SImode))
16235 y_lo = force_reg (SImode, y_lo);
16236 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16237 x_lo = scratch2;
16239 else if (y_hi != const0_rtx)
16241 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16243 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16244 if (!arm_add_operand (y_hi, SImode))
16245 y_hi = force_reg (SImode, y_hi);
16246 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16247 x_hi = scratch2;
16250 if (!scratch)
16252 gcc_assert (!reload_completed);
16253 scratch = gen_rtx_SCRATCH (SImode);
16256 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16257 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16259 rtx set
16260 = gen_rtx_SET (cc_reg,
16261 gen_rtx_COMPARE (CC_NZmode,
16262 gen_rtx_IOR (SImode, x_lo, x_hi),
16263 const0_rtx));
16264 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16265 clobber)));
16266 return cc_reg;
16269 if (!arm_add_operand (y_lo, SImode))
16270 y_lo = force_reg (SImode, y_lo);
16272 if (!arm_add_operand (y_hi, SImode))
16273 y_hi = force_reg (SImode, y_hi);
16275 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16276 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16277 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16278 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16279 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16281 emit_insn (gen_rtx_SET (cc_reg,
16282 gen_rtx_COMPARE (mode, conjunction,
16283 const0_rtx)));
16284 return cc_reg;
16287 case LT:
16288 case GE:
16290 if (y_lo == const0_rtx)
16292 /* If the low word of y is 0, then this is simply a normal
16293 compare of the upper words. */
16294 if (!arm_add_operand (y_hi, SImode))
16295 y_hi = force_reg (SImode, y_hi);
16297 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16300 if (!arm_add_operand (y_lo, SImode))
16301 y_lo = force_reg (SImode, y_lo);
16303 rtx cmp1
16304 = gen_rtx_LTU (DImode,
16305 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16306 const0_rtx);
16308 if (!scratch)
16309 scratch = gen_rtx_SCRATCH (SImode);
16311 if (!arm_not_operand (y_hi, SImode))
16312 y_hi = force_reg (SImode, y_hi);
16314 rtx_insn *insn;
16315 if (y_hi == const0_rtx)
16316 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16317 cmp1));
16318 else if (CONST_INT_P (y_hi))
16319 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16320 y_hi, cmp1));
16321 else
16322 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16323 cmp1));
16324 return SET_DEST (single_set (insn));
16327 case LE:
16328 case GT:
16330 /* During expansion, we only expect to get here if y is a
16331 constant that we want to handle, otherwise we should have
16332 swapped the operands already. */
16333 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16335 if (!const_ok_for_arm (INTVAL (y_lo)))
16336 y_lo = force_reg (SImode, y_lo);
16338 /* Perform a reverse subtract and compare. */
16339 rtx cmp1
16340 = gen_rtx_LTU (DImode,
16341 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16342 const0_rtx);
16343 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16344 x_hi, cmp1));
16345 return SET_DEST (single_set (insn));
16348 case LTU:
16349 case GEU:
16351 if (y_lo == const0_rtx)
16353 /* If the low word of y is 0, then this is simply a normal
16354 compare of the upper words. */
16355 if (!arm_add_operand (y_hi, SImode))
16356 y_hi = force_reg (SImode, y_hi);
16358 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16361 if (!arm_add_operand (y_lo, SImode))
16362 y_lo = force_reg (SImode, y_lo);
16364 rtx cmp1
16365 = gen_rtx_LTU (DImode,
16366 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16367 const0_rtx);
16369 if (!scratch)
16370 scratch = gen_rtx_SCRATCH (SImode);
16371 if (!arm_not_operand (y_hi, SImode))
16372 y_hi = force_reg (SImode, y_hi);
16374 rtx_insn *insn;
16375 if (y_hi == const0_rtx)
16376 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16377 cmp1));
16378 else if (CONST_INT_P (y_hi))
16380 /* Constant is viewed as unsigned when zero-extended. */
16381 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16382 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16383 y_hi, cmp1));
16385 else
16386 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16387 cmp1));
16388 return SET_DEST (single_set (insn));
16391 case LEU:
16392 case GTU:
16394 /* During expansion, we only expect to get here if y is a
16395 constant that we want to handle, otherwise we should have
16396 swapped the operands already. */
16397 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16399 if (!const_ok_for_arm (INTVAL (y_lo)))
16400 y_lo = force_reg (SImode, y_lo);
16402 /* Perform a reverse subtract and compare. */
16403 rtx cmp1
16404 = gen_rtx_LTU (DImode,
16405 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16406 const0_rtx);
16407 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16408 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16409 x_hi, cmp1));
16410 return SET_DEST (single_set (insn));
16413 default:
16414 gcc_unreachable ();
16418 /* X and Y are two things to compare using CODE. Emit the compare insn and
16419 return the rtx for register 0 in the proper mode. */
16421 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16423 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16424 return arm_gen_dicompare_reg (code, x, y, scratch);
16426 machine_mode mode = SELECT_CC_MODE (code, x, y);
16427 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16428 if (mode == CC_RSBmode)
16430 if (!scratch)
16431 scratch = gen_rtx_SCRATCH (SImode);
16432 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16433 GEN_INT (~UINTVAL (x)), y));
16435 else
16436 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16438 return cc_reg;
16441 /* Generate a sequence of insns that will generate the correct return
16442 address mask depending on the physical architecture that the program
16443 is running on. */
16445 arm_gen_return_addr_mask (void)
16447 rtx reg = gen_reg_rtx (Pmode);
16449 emit_insn (gen_return_addr_mask (reg));
16450 return reg;
16453 void
16454 arm_reload_in_hi (rtx *operands)
16456 rtx ref = operands[1];
16457 rtx base, scratch;
16458 HOST_WIDE_INT offset = 0;
16460 if (SUBREG_P (ref))
16462 offset = SUBREG_BYTE (ref);
16463 ref = SUBREG_REG (ref);
16466 if (REG_P (ref))
16468 /* We have a pseudo which has been spilt onto the stack; there
16469 are two cases here: the first where there is a simple
16470 stack-slot replacement and a second where the stack-slot is
16471 out of range, or is used as a subreg. */
16472 if (reg_equiv_mem (REGNO (ref)))
16474 ref = reg_equiv_mem (REGNO (ref));
16475 base = find_replacement (&XEXP (ref, 0));
16477 else
16478 /* The slot is out of range, or was dressed up in a SUBREG. */
16479 base = reg_equiv_address (REGNO (ref));
16481 /* PR 62554: If there is no equivalent memory location then just move
16482 the value as an SImode register move. This happens when the target
16483 architecture variant does not have an HImode register move. */
16484 if (base == NULL)
16486 gcc_assert (REG_P (operands[0]));
16487 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16488 gen_rtx_SUBREG (SImode, ref, 0)));
16489 return;
16492 else
16493 base = find_replacement (&XEXP (ref, 0));
16495 /* Handle the case where the address is too complex to be offset by 1. */
16496 if (GET_CODE (base) == MINUS
16497 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16499 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16501 emit_set_insn (base_plus, base);
16502 base = base_plus;
16504 else if (GET_CODE (base) == PLUS)
16506 /* The addend must be CONST_INT, or we would have dealt with it above. */
16507 HOST_WIDE_INT hi, lo;
16509 offset += INTVAL (XEXP (base, 1));
16510 base = XEXP (base, 0);
16512 /* Rework the address into a legal sequence of insns. */
16513 /* Valid range for lo is -4095 -> 4095 */
16514 lo = (offset >= 0
16515 ? (offset & 0xfff)
16516 : -((-offset) & 0xfff));
16518 /* Corner case, if lo is the max offset then we would be out of range
16519 once we have added the additional 1 below, so bump the msb into the
16520 pre-loading insn(s). */
16521 if (lo == 4095)
16522 lo &= 0x7ff;
16524 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16525 ^ (HOST_WIDE_INT) 0x80000000)
16526 - (HOST_WIDE_INT) 0x80000000);
16528 gcc_assert (hi + lo == offset);
16530 if (hi != 0)
16532 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16534 /* Get the base address; addsi3 knows how to handle constants
16535 that require more than one insn. */
16536 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16537 base = base_plus;
16538 offset = lo;
16542 /* Operands[2] may overlap operands[0] (though it won't overlap
16543 operands[1]), that's why we asked for a DImode reg -- so we can
16544 use the bit that does not overlap. */
16545 if (REGNO (operands[2]) == REGNO (operands[0]))
16546 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16547 else
16548 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16550 emit_insn (gen_zero_extendqisi2 (scratch,
16551 gen_rtx_MEM (QImode,
16552 plus_constant (Pmode, base,
16553 offset))));
16554 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16555 gen_rtx_MEM (QImode,
16556 plus_constant (Pmode, base,
16557 offset + 1))));
16558 if (!BYTES_BIG_ENDIAN)
16559 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16560 gen_rtx_IOR (SImode,
16561 gen_rtx_ASHIFT
16562 (SImode,
16563 gen_rtx_SUBREG (SImode, operands[0], 0),
16564 GEN_INT (8)),
16565 scratch));
16566 else
16567 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16568 gen_rtx_IOR (SImode,
16569 gen_rtx_ASHIFT (SImode, scratch,
16570 GEN_INT (8)),
16571 gen_rtx_SUBREG (SImode, operands[0], 0)));
16574 /* Handle storing a half-word to memory during reload by synthesizing as two
16575 byte stores. Take care not to clobber the input values until after we
16576 have moved them somewhere safe. This code assumes that if the DImode
16577 scratch in operands[2] overlaps either the input value or output address
16578 in some way, then that value must die in this insn (we absolutely need
16579 two scratch registers for some corner cases). */
16580 void
16581 arm_reload_out_hi (rtx *operands)
16583 rtx ref = operands[0];
16584 rtx outval = operands[1];
16585 rtx base, scratch;
16586 HOST_WIDE_INT offset = 0;
16588 if (SUBREG_P (ref))
16590 offset = SUBREG_BYTE (ref);
16591 ref = SUBREG_REG (ref);
16594 if (REG_P (ref))
16596 /* We have a pseudo which has been spilt onto the stack; there
16597 are two cases here: the first where there is a simple
16598 stack-slot replacement and a second where the stack-slot is
16599 out of range, or is used as a subreg. */
16600 if (reg_equiv_mem (REGNO (ref)))
16602 ref = reg_equiv_mem (REGNO (ref));
16603 base = find_replacement (&XEXP (ref, 0));
16605 else
16606 /* The slot is out of range, or was dressed up in a SUBREG. */
16607 base = reg_equiv_address (REGNO (ref));
16609 /* PR 62254: If there is no equivalent memory location then just move
16610 the value as an SImode register move. This happens when the target
16611 architecture variant does not have an HImode register move. */
16612 if (base == NULL)
16614 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16616 if (REG_P (outval))
16618 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16619 gen_rtx_SUBREG (SImode, outval, 0)));
16621 else /* SUBREG_P (outval) */
16623 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16624 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16625 SUBREG_REG (outval)));
16626 else
16627 /* FIXME: Handle other cases ? */
16628 gcc_unreachable ();
16630 return;
16633 else
16634 base = find_replacement (&XEXP (ref, 0));
16636 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16638 /* Handle the case where the address is too complex to be offset by 1. */
16639 if (GET_CODE (base) == MINUS
16640 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16642 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16644 /* Be careful not to destroy OUTVAL. */
16645 if (reg_overlap_mentioned_p (base_plus, outval))
16647 /* Updating base_plus might destroy outval, see if we can
16648 swap the scratch and base_plus. */
16649 if (!reg_overlap_mentioned_p (scratch, outval))
16650 std::swap (scratch, base_plus);
16651 else
16653 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16655 /* Be conservative and copy OUTVAL into the scratch now,
16656 this should only be necessary if outval is a subreg
16657 of something larger than a word. */
16658 /* XXX Might this clobber base? I can't see how it can,
16659 since scratch is known to overlap with OUTVAL, and
16660 must be wider than a word. */
16661 emit_insn (gen_movhi (scratch_hi, outval));
16662 outval = scratch_hi;
16666 emit_set_insn (base_plus, base);
16667 base = base_plus;
16669 else if (GET_CODE (base) == PLUS)
16671 /* The addend must be CONST_INT, or we would have dealt with it above. */
16672 HOST_WIDE_INT hi, lo;
16674 offset += INTVAL (XEXP (base, 1));
16675 base = XEXP (base, 0);
16677 /* Rework the address into a legal sequence of insns. */
16678 /* Valid range for lo is -4095 -> 4095 */
16679 lo = (offset >= 0
16680 ? (offset & 0xfff)
16681 : -((-offset) & 0xfff));
16683 /* Corner case, if lo is the max offset then we would be out of range
16684 once we have added the additional 1 below, so bump the msb into the
16685 pre-loading insn(s). */
16686 if (lo == 4095)
16687 lo &= 0x7ff;
16689 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16690 ^ (HOST_WIDE_INT) 0x80000000)
16691 - (HOST_WIDE_INT) 0x80000000);
16693 gcc_assert (hi + lo == offset);
16695 if (hi != 0)
16697 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16699 /* Be careful not to destroy OUTVAL. */
16700 if (reg_overlap_mentioned_p (base_plus, outval))
16702 /* Updating base_plus might destroy outval, see if we
16703 can swap the scratch and base_plus. */
16704 if (!reg_overlap_mentioned_p (scratch, outval))
16705 std::swap (scratch, base_plus);
16706 else
16708 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16710 /* Be conservative and copy outval into scratch now,
16711 this should only be necessary if outval is a
16712 subreg of something larger than a word. */
16713 /* XXX Might this clobber base? I can't see how it
16714 can, since scratch is known to overlap with
16715 outval. */
16716 emit_insn (gen_movhi (scratch_hi, outval));
16717 outval = scratch_hi;
16721 /* Get the base address; addsi3 knows how to handle constants
16722 that require more than one insn. */
16723 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16724 base = base_plus;
16725 offset = lo;
16729 if (BYTES_BIG_ENDIAN)
16731 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16732 plus_constant (Pmode, base,
16733 offset + 1)),
16734 gen_lowpart (QImode, outval)));
16735 emit_insn (gen_lshrsi3 (scratch,
16736 gen_rtx_SUBREG (SImode, outval, 0),
16737 GEN_INT (8)));
16738 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16739 offset)),
16740 gen_lowpart (QImode, scratch)));
16742 else
16744 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16745 offset)),
16746 gen_lowpart (QImode, outval)));
16747 emit_insn (gen_lshrsi3 (scratch,
16748 gen_rtx_SUBREG (SImode, outval, 0),
16749 GEN_INT (8)));
16750 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16751 plus_constant (Pmode, base,
16752 offset + 1)),
16753 gen_lowpart (QImode, scratch)));
16757 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16758 (padded to the size of a word) should be passed in a register. */
16760 static bool
16761 arm_must_pass_in_stack (const function_arg_info &arg)
16763 if (TARGET_AAPCS_BASED)
16764 return must_pass_in_stack_var_size (arg);
16765 else
16766 return must_pass_in_stack_var_size_or_pad (arg);
16770 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16771 byte of a stack argument has useful data. For legacy APCS ABIs we use
16772 the default. For AAPCS based ABIs small aggregate types are placed
16773 in the lowest memory address. */
16775 static pad_direction
16776 arm_function_arg_padding (machine_mode mode, const_tree type)
16778 if (!TARGET_AAPCS_BASED)
16779 return default_function_arg_padding (mode, type);
16781 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16782 return PAD_DOWNWARD;
16784 return PAD_UPWARD;
16788 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16789 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16790 register has useful data, and return the opposite if the most
16791 significant byte does. */
16793 bool
16794 arm_pad_reg_upward (machine_mode mode,
16795 tree type, int first ATTRIBUTE_UNUSED)
16797 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16799 /* For AAPCS, small aggregates, small fixed-point types,
16800 and small complex types are always padded upwards. */
16801 if (type)
16803 if ((AGGREGATE_TYPE_P (type)
16804 || TREE_CODE (type) == COMPLEX_TYPE
16805 || FIXED_POINT_TYPE_P (type))
16806 && int_size_in_bytes (type) <= 4)
16807 return true;
16809 else
16811 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16812 && GET_MODE_SIZE (mode) <= 4)
16813 return true;
16817 /* Otherwise, use default padding. */
16818 return !BYTES_BIG_ENDIAN;
16821 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16822 assuming that the address in the base register is word aligned. */
16823 bool
16824 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16826 HOST_WIDE_INT max_offset;
16828 /* Offset must be a multiple of 4 in Thumb mode. */
16829 if (TARGET_THUMB2 && ((offset & 3) != 0))
16830 return false;
16832 if (TARGET_THUMB2)
16833 max_offset = 1020;
16834 else if (TARGET_ARM)
16835 max_offset = 255;
16836 else
16837 return false;
16839 return ((offset <= max_offset) && (offset >= -max_offset));
16842 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16843 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16844 Assumes that the address in the base register RN is word aligned. Pattern
16845 guarantees that both memory accesses use the same base register,
16846 the offsets are constants within the range, and the gap between the offsets is 4.
16847 If preload complete then check that registers are legal. WBACK indicates whether
16848 address is updated. LOAD indicates whether memory access is load or store. */
16849 bool
16850 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16851 bool wback, bool load)
16853 unsigned int t, t2, n;
16855 if (!reload_completed)
16856 return true;
16858 if (!offset_ok_for_ldrd_strd (offset))
16859 return false;
16861 t = REGNO (rt);
16862 t2 = REGNO (rt2);
16863 n = REGNO (rn);
16865 if ((TARGET_THUMB2)
16866 && ((wback && (n == t || n == t2))
16867 || (t == SP_REGNUM)
16868 || (t == PC_REGNUM)
16869 || (t2 == SP_REGNUM)
16870 || (t2 == PC_REGNUM)
16871 || (!load && (n == PC_REGNUM))
16872 || (load && (t == t2))
16873 /* Triggers Cortex-M3 LDRD errata. */
16874 || (!wback && load && fix_cm3_ldrd && (n == t))))
16875 return false;
16877 if ((TARGET_ARM)
16878 && ((wback && (n == t || n == t2))
16879 || (t2 == PC_REGNUM)
16880 || (t % 2 != 0) /* First destination register is not even. */
16881 || (t2 != t + 1)
16882 /* PC can be used as base register (for offset addressing only),
16883 but it is depricated. */
16884 || (n == PC_REGNUM)))
16885 return false;
16887 return true;
16890 /* Return true if a 64-bit access with alignment ALIGN and with a
16891 constant offset OFFSET from the base pointer is permitted on this
16892 architecture. */
16893 static bool
16894 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16896 return (unaligned_access
16897 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16898 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16901 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16902 operand MEM's address contains an immediate offset from the base
16903 register and has no side effects, in which case it sets BASE,
16904 OFFSET and ALIGN accordingly. */
16905 static bool
16906 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16908 rtx addr;
16910 gcc_assert (base != NULL && offset != NULL);
16912 /* TODO: Handle more general memory operand patterns, such as
16913 PRE_DEC and PRE_INC. */
16915 if (side_effects_p (mem))
16916 return false;
16918 /* Can't deal with subregs. */
16919 if (SUBREG_P (mem))
16920 return false;
16922 gcc_assert (MEM_P (mem));
16924 *offset = const0_rtx;
16925 *align = MEM_ALIGN (mem);
16927 addr = XEXP (mem, 0);
16929 /* If addr isn't valid for DImode, then we can't handle it. */
16930 if (!arm_legitimate_address_p (DImode, addr,
16931 reload_in_progress || reload_completed))
16932 return false;
16934 if (REG_P (addr))
16936 *base = addr;
16937 return true;
16939 else if (GET_CODE (addr) == PLUS)
16941 *base = XEXP (addr, 0);
16942 *offset = XEXP (addr, 1);
16943 return (REG_P (*base) && CONST_INT_P (*offset));
16946 return false;
16949 /* Called from a peephole2 to replace two word-size accesses with a
16950 single LDRD/STRD instruction. Returns true iff we can generate a
16951 new instruction sequence. That is, both accesses use the same base
16952 register and the gap between constant offsets is 4. This function
16953 may reorder its operands to match ldrd/strd RTL templates.
16954 OPERANDS are the operands found by the peephole matcher;
16955 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16956 corresponding memory operands. LOAD indicaates whether the access
16957 is load or store. CONST_STORE indicates a store of constant
16958 integer values held in OPERANDS[4,5] and assumes that the pattern
16959 is of length 4 insn, for the purpose of checking dead registers.
16960 COMMUTE indicates that register operands may be reordered. */
16961 bool
16962 gen_operands_ldrd_strd (rtx *operands, bool load,
16963 bool const_store, bool commute)
16965 int nops = 2;
16966 HOST_WIDE_INT offsets[2], offset, align[2];
16967 rtx base = NULL_RTX;
16968 rtx cur_base, cur_offset, tmp;
16969 int i, gap;
16970 HARD_REG_SET regset;
16972 gcc_assert (!const_store || !load);
16973 /* Check that the memory references are immediate offsets from the
16974 same base register. Extract the base register, the destination
16975 registers, and the corresponding memory offsets. */
16976 for (i = 0; i < nops; i++)
16978 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16979 &align[i]))
16980 return false;
16982 if (i == 0)
16983 base = cur_base;
16984 else if (REGNO (base) != REGNO (cur_base))
16985 return false;
16987 offsets[i] = INTVAL (cur_offset);
16988 if (GET_CODE (operands[i]) == SUBREG)
16990 tmp = SUBREG_REG (operands[i]);
16991 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16992 operands[i] = tmp;
16996 /* Make sure there is no dependency between the individual loads. */
16997 if (load && REGNO (operands[0]) == REGNO (base))
16998 return false; /* RAW */
17000 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17001 return false; /* WAW */
17003 /* If the same input register is used in both stores
17004 when storing different constants, try to find a free register.
17005 For example, the code
17006 mov r0, 0
17007 str r0, [r2]
17008 mov r0, 1
17009 str r0, [r2, #4]
17010 can be transformed into
17011 mov r1, 0
17012 mov r0, 1
17013 strd r1, r0, [r2]
17014 in Thumb mode assuming that r1 is free.
17015 For ARM mode do the same but only if the starting register
17016 can be made to be even. */
17017 if (const_store
17018 && REGNO (operands[0]) == REGNO (operands[1])
17019 && INTVAL (operands[4]) != INTVAL (operands[5]))
17021 if (TARGET_THUMB2)
17023 CLEAR_HARD_REG_SET (regset);
17024 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17025 if (tmp == NULL_RTX)
17026 return false;
17028 /* Use the new register in the first load to ensure that
17029 if the original input register is not dead after peephole,
17030 then it will have the correct constant value. */
17031 operands[0] = tmp;
17033 else if (TARGET_ARM)
17035 int regno = REGNO (operands[0]);
17036 if (!peep2_reg_dead_p (4, operands[0]))
17038 /* When the input register is even and is not dead after the
17039 pattern, it has to hold the second constant but we cannot
17040 form a legal STRD in ARM mode with this register as the second
17041 register. */
17042 if (regno % 2 == 0)
17043 return false;
17045 /* Is regno-1 free? */
17046 SET_HARD_REG_SET (regset);
17047 CLEAR_HARD_REG_BIT(regset, regno - 1);
17048 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17049 if (tmp == NULL_RTX)
17050 return false;
17052 operands[0] = tmp;
17054 else
17056 /* Find a DImode register. */
17057 CLEAR_HARD_REG_SET (regset);
17058 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17059 if (tmp != NULL_RTX)
17061 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17062 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17064 else
17066 /* Can we use the input register to form a DI register? */
17067 SET_HARD_REG_SET (regset);
17068 CLEAR_HARD_REG_BIT(regset,
17069 regno % 2 == 0 ? regno + 1 : regno - 1);
17070 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17071 if (tmp == NULL_RTX)
17072 return false;
17073 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17077 gcc_assert (operands[0] != NULL_RTX);
17078 gcc_assert (operands[1] != NULL_RTX);
17079 gcc_assert (REGNO (operands[0]) % 2 == 0);
17080 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17084 /* Make sure the instructions are ordered with lower memory access first. */
17085 if (offsets[0] > offsets[1])
17087 gap = offsets[0] - offsets[1];
17088 offset = offsets[1];
17090 /* Swap the instructions such that lower memory is accessed first. */
17091 std::swap (operands[0], operands[1]);
17092 std::swap (operands[2], operands[3]);
17093 std::swap (align[0], align[1]);
17094 if (const_store)
17095 std::swap (operands[4], operands[5]);
17097 else
17099 gap = offsets[1] - offsets[0];
17100 offset = offsets[0];
17103 /* Make sure accesses are to consecutive memory locations. */
17104 if (gap != GET_MODE_SIZE (SImode))
17105 return false;
17107 if (!align_ok_ldrd_strd (align[0], offset))
17108 return false;
17110 /* Make sure we generate legal instructions. */
17111 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17112 false, load))
17113 return true;
17115 /* In Thumb state, where registers are almost unconstrained, there
17116 is little hope to fix it. */
17117 if (TARGET_THUMB2)
17118 return false;
17120 if (load && commute)
17122 /* Try reordering registers. */
17123 std::swap (operands[0], operands[1]);
17124 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17125 false, load))
17126 return true;
17129 if (const_store)
17131 /* If input registers are dead after this pattern, they can be
17132 reordered or replaced by other registers that are free in the
17133 current pattern. */
17134 if (!peep2_reg_dead_p (4, operands[0])
17135 || !peep2_reg_dead_p (4, operands[1]))
17136 return false;
17138 /* Try to reorder the input registers. */
17139 /* For example, the code
17140 mov r0, 0
17141 mov r1, 1
17142 str r1, [r2]
17143 str r0, [r2, #4]
17144 can be transformed into
17145 mov r1, 0
17146 mov r0, 1
17147 strd r0, [r2]
17149 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17150 false, false))
17152 std::swap (operands[0], operands[1]);
17153 return true;
17156 /* Try to find a free DI register. */
17157 CLEAR_HARD_REG_SET (regset);
17158 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17159 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17160 while (true)
17162 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17163 if (tmp == NULL_RTX)
17164 return false;
17166 /* DREG must be an even-numbered register in DImode.
17167 Split it into SI registers. */
17168 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17169 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17170 gcc_assert (operands[0] != NULL_RTX);
17171 gcc_assert (operands[1] != NULL_RTX);
17172 gcc_assert (REGNO (operands[0]) % 2 == 0);
17173 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17175 return (operands_ok_ldrd_strd (operands[0], operands[1],
17176 base, offset,
17177 false, load));
17181 return false;
17185 /* Return true if parallel execution of the two word-size accesses provided
17186 could be satisfied with a single LDRD/STRD instruction. Two word-size
17187 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17188 register operands and OPERANDS[2,3] are the corresponding memory operands.
17190 bool
17191 valid_operands_ldrd_strd (rtx *operands, bool load)
17193 int nops = 2;
17194 HOST_WIDE_INT offsets[2], offset, align[2];
17195 rtx base = NULL_RTX;
17196 rtx cur_base, cur_offset;
17197 int i, gap;
17199 /* Check that the memory references are immediate offsets from the
17200 same base register. Extract the base register, the destination
17201 registers, and the corresponding memory offsets. */
17202 for (i = 0; i < nops; i++)
17204 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17205 &align[i]))
17206 return false;
17208 if (i == 0)
17209 base = cur_base;
17210 else if (REGNO (base) != REGNO (cur_base))
17211 return false;
17213 offsets[i] = INTVAL (cur_offset);
17214 if (GET_CODE (operands[i]) == SUBREG)
17215 return false;
17218 if (offsets[0] > offsets[1])
17219 return false;
17221 gap = offsets[1] - offsets[0];
17222 offset = offsets[0];
17224 /* Make sure accesses are to consecutive memory locations. */
17225 if (gap != GET_MODE_SIZE (SImode))
17226 return false;
17228 if (!align_ok_ldrd_strd (align[0], offset))
17229 return false;
17231 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17232 false, load);
17236 /* Print a symbolic form of X to the debug file, F. */
17237 static void
17238 arm_print_value (FILE *f, rtx x)
17240 switch (GET_CODE (x))
17242 case CONST_INT:
17243 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17244 return;
17246 case CONST_DOUBLE:
17248 char fpstr[20];
17249 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17250 sizeof (fpstr), 0, 1);
17251 fputs (fpstr, f);
17253 return;
17255 case CONST_VECTOR:
17257 int i;
17259 fprintf (f, "<");
17260 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17262 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17263 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17264 fputc (',', f);
17266 fprintf (f, ">");
17268 return;
17270 case CONST_STRING:
17271 fprintf (f, "\"%s\"", XSTR (x, 0));
17272 return;
17274 case SYMBOL_REF:
17275 fprintf (f, "`%s'", XSTR (x, 0));
17276 return;
17278 case LABEL_REF:
17279 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17280 return;
17282 case CONST:
17283 arm_print_value (f, XEXP (x, 0));
17284 return;
17286 case PLUS:
17287 arm_print_value (f, XEXP (x, 0));
17288 fprintf (f, "+");
17289 arm_print_value (f, XEXP (x, 1));
17290 return;
17292 case PC:
17293 fprintf (f, "pc");
17294 return;
17296 default:
17297 fprintf (f, "????");
17298 return;
17302 /* Routines for manipulation of the constant pool. */
17304 /* Arm instructions cannot load a large constant directly into a
17305 register; they have to come from a pc relative load. The constant
17306 must therefore be placed in the addressable range of the pc
17307 relative load. Depending on the precise pc relative load
17308 instruction the range is somewhere between 256 bytes and 4k. This
17309 means that we often have to dump a constant inside a function, and
17310 generate code to branch around it.
17312 It is important to minimize this, since the branches will slow
17313 things down and make the code larger.
17315 Normally we can hide the table after an existing unconditional
17316 branch so that there is no interruption of the flow, but in the
17317 worst case the code looks like this:
17319 ldr rn, L1
17321 b L2
17322 align
17323 L1: .long value
17327 ldr rn, L3
17329 b L4
17330 align
17331 L3: .long value
17335 We fix this by performing a scan after scheduling, which notices
17336 which instructions need to have their operands fetched from the
17337 constant table and builds the table.
17339 The algorithm starts by building a table of all the constants that
17340 need fixing up and all the natural barriers in the function (places
17341 where a constant table can be dropped without breaking the flow).
17342 For each fixup we note how far the pc-relative replacement will be
17343 able to reach and the offset of the instruction into the function.
17345 Having built the table we then group the fixes together to form
17346 tables that are as large as possible (subject to addressing
17347 constraints) and emit each table of constants after the last
17348 barrier that is within range of all the instructions in the group.
17349 If a group does not contain a barrier, then we forcibly create one
17350 by inserting a jump instruction into the flow. Once the table has
17351 been inserted, the insns are then modified to reference the
17352 relevant entry in the pool.
17354 Possible enhancements to the algorithm (not implemented) are:
17356 1) For some processors and object formats, there may be benefit in
17357 aligning the pools to the start of cache lines; this alignment
17358 would need to be taken into account when calculating addressability
17359 of a pool. */
17361 /* These typedefs are located at the start of this file, so that
17362 they can be used in the prototypes there. This comment is to
17363 remind readers of that fact so that the following structures
17364 can be understood more easily.
17366 typedef struct minipool_node Mnode;
17367 typedef struct minipool_fixup Mfix; */
17369 struct minipool_node
17371 /* Doubly linked chain of entries. */
17372 Mnode * next;
17373 Mnode * prev;
17374 /* The maximum offset into the code that this entry can be placed. While
17375 pushing fixes for forward references, all entries are sorted in order
17376 of increasing max_address. */
17377 HOST_WIDE_INT max_address;
17378 /* Similarly for an entry inserted for a backwards ref. */
17379 HOST_WIDE_INT min_address;
17380 /* The number of fixes referencing this entry. This can become zero
17381 if we "unpush" an entry. In this case we ignore the entry when we
17382 come to emit the code. */
17383 int refcount;
17384 /* The offset from the start of the minipool. */
17385 HOST_WIDE_INT offset;
17386 /* The value in table. */
17387 rtx value;
17388 /* The mode of value. */
17389 machine_mode mode;
17390 /* The size of the value. With iWMMXt enabled
17391 sizes > 4 also imply an alignment of 8-bytes. */
17392 int fix_size;
17395 struct minipool_fixup
17397 Mfix * next;
17398 rtx_insn * insn;
17399 HOST_WIDE_INT address;
17400 rtx * loc;
17401 machine_mode mode;
17402 int fix_size;
17403 rtx value;
17404 Mnode * minipool;
17405 HOST_WIDE_INT forwards;
17406 HOST_WIDE_INT backwards;
17409 /* Fixes less than a word need padding out to a word boundary. */
17410 #define MINIPOOL_FIX_SIZE(mode) \
17411 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17413 static Mnode * minipool_vector_head;
17414 static Mnode * minipool_vector_tail;
17415 static rtx_code_label *minipool_vector_label;
17416 static int minipool_pad;
17418 /* The linked list of all minipool fixes required for this function. */
17419 Mfix * minipool_fix_head;
17420 Mfix * minipool_fix_tail;
17421 /* The fix entry for the current minipool, once it has been placed. */
17422 Mfix * minipool_barrier;
17424 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17425 #define JUMP_TABLES_IN_TEXT_SECTION 0
17426 #endif
17428 static HOST_WIDE_INT
17429 get_jump_table_size (rtx_jump_table_data *insn)
17431 /* ADDR_VECs only take room if read-only data does into the text
17432 section. */
17433 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17435 rtx body = PATTERN (insn);
17436 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17437 HOST_WIDE_INT size;
17438 HOST_WIDE_INT modesize;
17440 modesize = GET_MODE_SIZE (GET_MODE (body));
17441 size = modesize * XVECLEN (body, elt);
17442 switch (modesize)
17444 case 1:
17445 /* Round up size of TBB table to a halfword boundary. */
17446 size = (size + 1) & ~HOST_WIDE_INT_1;
17447 break;
17448 case 2:
17449 /* No padding necessary for TBH. */
17450 break;
17451 case 4:
17452 /* Add two bytes for alignment on Thumb. */
17453 if (TARGET_THUMB)
17454 size += 2;
17455 break;
17456 default:
17457 gcc_unreachable ();
17459 return size;
17462 return 0;
17465 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17466 function descriptor) into a register and the GOT address into the
17467 FDPIC register, returning an rtx for the register holding the
17468 function address. */
17471 arm_load_function_descriptor (rtx funcdesc)
17473 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17474 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17475 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17476 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17478 emit_move_insn (fnaddr_reg, fnaddr);
17480 /* The ABI requires the entry point address to be loaded first, but
17481 since we cannot support lazy binding for lack of atomic load of
17482 two 32-bits values, we do not need to bother to prevent the
17483 previous load from being moved after that of the GOT address. */
17484 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17486 return fnaddr_reg;
17489 /* Return the maximum amount of padding that will be inserted before
17490 label LABEL. */
17491 static HOST_WIDE_INT
17492 get_label_padding (rtx label)
17494 HOST_WIDE_INT align, min_insn_size;
17496 align = 1 << label_to_alignment (label).levels[0].log;
17497 min_insn_size = TARGET_THUMB ? 2 : 4;
17498 return align > min_insn_size ? align - min_insn_size : 0;
17501 /* Move a minipool fix MP from its current location to before MAX_MP.
17502 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17503 constraints may need updating. */
17504 static Mnode *
17505 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17506 HOST_WIDE_INT max_address)
17508 /* The code below assumes these are different. */
17509 gcc_assert (mp != max_mp);
17511 if (max_mp == NULL)
17513 if (max_address < mp->max_address)
17514 mp->max_address = max_address;
17516 else
17518 if (max_address > max_mp->max_address - mp->fix_size)
17519 mp->max_address = max_mp->max_address - mp->fix_size;
17520 else
17521 mp->max_address = max_address;
17523 /* Unlink MP from its current position. Since max_mp is non-null,
17524 mp->prev must be non-null. */
17525 mp->prev->next = mp->next;
17526 if (mp->next != NULL)
17527 mp->next->prev = mp->prev;
17528 else
17529 minipool_vector_tail = mp->prev;
17531 /* Re-insert it before MAX_MP. */
17532 mp->next = max_mp;
17533 mp->prev = max_mp->prev;
17534 max_mp->prev = mp;
17536 if (mp->prev != NULL)
17537 mp->prev->next = mp;
17538 else
17539 minipool_vector_head = mp;
17542 /* Save the new entry. */
17543 max_mp = mp;
17545 /* Scan over the preceding entries and adjust their addresses as
17546 required. */
17547 while (mp->prev != NULL
17548 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17550 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17551 mp = mp->prev;
17554 return max_mp;
17557 /* Add a constant to the minipool for a forward reference. Returns the
17558 node added or NULL if the constant will not fit in this pool. */
17559 static Mnode *
17560 add_minipool_forward_ref (Mfix *fix)
17562 /* If set, max_mp is the first pool_entry that has a lower
17563 constraint than the one we are trying to add. */
17564 Mnode * max_mp = NULL;
17565 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17566 Mnode * mp;
17568 /* If the minipool starts before the end of FIX->INSN then this FIX
17569 cannot be placed into the current pool. Furthermore, adding the
17570 new constant pool entry may cause the pool to start FIX_SIZE bytes
17571 earlier. */
17572 if (minipool_vector_head &&
17573 (fix->address + get_attr_length (fix->insn)
17574 >= minipool_vector_head->max_address - fix->fix_size))
17575 return NULL;
17577 /* Scan the pool to see if a constant with the same value has
17578 already been added. While we are doing this, also note the
17579 location where we must insert the constant if it doesn't already
17580 exist. */
17581 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17583 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17584 && fix->mode == mp->mode
17585 && (!LABEL_P (fix->value)
17586 || (CODE_LABEL_NUMBER (fix->value)
17587 == CODE_LABEL_NUMBER (mp->value)))
17588 && rtx_equal_p (fix->value, mp->value))
17590 /* More than one fix references this entry. */
17591 mp->refcount++;
17592 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17595 /* Note the insertion point if necessary. */
17596 if (max_mp == NULL
17597 && mp->max_address > max_address)
17598 max_mp = mp;
17600 /* If we are inserting an 8-bytes aligned quantity and
17601 we have not already found an insertion point, then
17602 make sure that all such 8-byte aligned quantities are
17603 placed at the start of the pool. */
17604 if (ARM_DOUBLEWORD_ALIGN
17605 && max_mp == NULL
17606 && fix->fix_size >= 8
17607 && mp->fix_size < 8)
17609 max_mp = mp;
17610 max_address = mp->max_address;
17614 /* The value is not currently in the minipool, so we need to create
17615 a new entry for it. If MAX_MP is NULL, the entry will be put on
17616 the end of the list since the placement is less constrained than
17617 any existing entry. Otherwise, we insert the new fix before
17618 MAX_MP and, if necessary, adjust the constraints on the other
17619 entries. */
17620 mp = XNEW (Mnode);
17621 mp->fix_size = fix->fix_size;
17622 mp->mode = fix->mode;
17623 mp->value = fix->value;
17624 mp->refcount = 1;
17625 /* Not yet required for a backwards ref. */
17626 mp->min_address = -65536;
17628 if (max_mp == NULL)
17630 mp->max_address = max_address;
17631 mp->next = NULL;
17632 mp->prev = minipool_vector_tail;
17634 if (mp->prev == NULL)
17636 minipool_vector_head = mp;
17637 minipool_vector_label = gen_label_rtx ();
17639 else
17640 mp->prev->next = mp;
17642 minipool_vector_tail = mp;
17644 else
17646 if (max_address > max_mp->max_address - mp->fix_size)
17647 mp->max_address = max_mp->max_address - mp->fix_size;
17648 else
17649 mp->max_address = max_address;
17651 mp->next = max_mp;
17652 mp->prev = max_mp->prev;
17653 max_mp->prev = mp;
17654 if (mp->prev != NULL)
17655 mp->prev->next = mp;
17656 else
17657 minipool_vector_head = mp;
17660 /* Save the new entry. */
17661 max_mp = mp;
17663 /* Scan over the preceding entries and adjust their addresses as
17664 required. */
17665 while (mp->prev != NULL
17666 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17668 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17669 mp = mp->prev;
17672 return max_mp;
17675 static Mnode *
17676 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17677 HOST_WIDE_INT min_address)
17679 HOST_WIDE_INT offset;
17681 /* The code below assumes these are different. */
17682 gcc_assert (mp != min_mp);
17684 if (min_mp == NULL)
17686 if (min_address > mp->min_address)
17687 mp->min_address = min_address;
17689 else
17691 /* We will adjust this below if it is too loose. */
17692 mp->min_address = min_address;
17694 /* Unlink MP from its current position. Since min_mp is non-null,
17695 mp->next must be non-null. */
17696 mp->next->prev = mp->prev;
17697 if (mp->prev != NULL)
17698 mp->prev->next = mp->next;
17699 else
17700 minipool_vector_head = mp->next;
17702 /* Reinsert it after MIN_MP. */
17703 mp->prev = min_mp;
17704 mp->next = min_mp->next;
17705 min_mp->next = mp;
17706 if (mp->next != NULL)
17707 mp->next->prev = mp;
17708 else
17709 minipool_vector_tail = mp;
17712 min_mp = mp;
17714 offset = 0;
17715 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17717 mp->offset = offset;
17718 if (mp->refcount > 0)
17719 offset += mp->fix_size;
17721 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17722 mp->next->min_address = mp->min_address + mp->fix_size;
17725 return min_mp;
17728 /* Add a constant to the minipool for a backward reference. Returns the
17729 node added or NULL if the constant will not fit in this pool.
17731 Note that the code for insertion for a backwards reference can be
17732 somewhat confusing because the calculated offsets for each fix do
17733 not take into account the size of the pool (which is still under
17734 construction. */
17735 static Mnode *
17736 add_minipool_backward_ref (Mfix *fix)
17738 /* If set, min_mp is the last pool_entry that has a lower constraint
17739 than the one we are trying to add. */
17740 Mnode *min_mp = NULL;
17741 /* This can be negative, since it is only a constraint. */
17742 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17743 Mnode *mp;
17745 /* If we can't reach the current pool from this insn, or if we can't
17746 insert this entry at the end of the pool without pushing other
17747 fixes out of range, then we don't try. This ensures that we
17748 can't fail later on. */
17749 if (min_address >= minipool_barrier->address
17750 || (minipool_vector_tail->min_address + fix->fix_size
17751 >= minipool_barrier->address))
17752 return NULL;
17754 /* Scan the pool to see if a constant with the same value has
17755 already been added. While we are doing this, also note the
17756 location where we must insert the constant if it doesn't already
17757 exist. */
17758 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17760 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17761 && fix->mode == mp->mode
17762 && (!LABEL_P (fix->value)
17763 || (CODE_LABEL_NUMBER (fix->value)
17764 == CODE_LABEL_NUMBER (mp->value)))
17765 && rtx_equal_p (fix->value, mp->value)
17766 /* Check that there is enough slack to move this entry to the
17767 end of the table (this is conservative). */
17768 && (mp->max_address
17769 > (minipool_barrier->address
17770 + minipool_vector_tail->offset
17771 + minipool_vector_tail->fix_size)))
17773 mp->refcount++;
17774 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17777 if (min_mp != NULL)
17778 mp->min_address += fix->fix_size;
17779 else
17781 /* Note the insertion point if necessary. */
17782 if (mp->min_address < min_address)
17784 /* For now, we do not allow the insertion of 8-byte alignment
17785 requiring nodes anywhere but at the start of the pool. */
17786 if (ARM_DOUBLEWORD_ALIGN
17787 && fix->fix_size >= 8 && mp->fix_size < 8)
17788 return NULL;
17789 else
17790 min_mp = mp;
17792 else if (mp->max_address
17793 < minipool_barrier->address + mp->offset + fix->fix_size)
17795 /* Inserting before this entry would push the fix beyond
17796 its maximum address (which can happen if we have
17797 re-located a forwards fix); force the new fix to come
17798 after it. */
17799 if (ARM_DOUBLEWORD_ALIGN
17800 && fix->fix_size >= 8 && mp->fix_size < 8)
17801 return NULL;
17802 else
17804 min_mp = mp;
17805 min_address = mp->min_address + fix->fix_size;
17808 /* Do not insert a non-8-byte aligned quantity before 8-byte
17809 aligned quantities. */
17810 else if (ARM_DOUBLEWORD_ALIGN
17811 && fix->fix_size < 8
17812 && mp->fix_size >= 8)
17814 min_mp = mp;
17815 min_address = mp->min_address + fix->fix_size;
17820 /* We need to create a new entry. */
17821 mp = XNEW (Mnode);
17822 mp->fix_size = fix->fix_size;
17823 mp->mode = fix->mode;
17824 mp->value = fix->value;
17825 mp->refcount = 1;
17826 mp->max_address = minipool_barrier->address + 65536;
17828 mp->min_address = min_address;
17830 if (min_mp == NULL)
17832 mp->prev = NULL;
17833 mp->next = minipool_vector_head;
17835 if (mp->next == NULL)
17837 minipool_vector_tail = mp;
17838 minipool_vector_label = gen_label_rtx ();
17840 else
17841 mp->next->prev = mp;
17843 minipool_vector_head = mp;
17845 else
17847 mp->next = min_mp->next;
17848 mp->prev = min_mp;
17849 min_mp->next = mp;
17851 if (mp->next != NULL)
17852 mp->next->prev = mp;
17853 else
17854 minipool_vector_tail = mp;
17857 /* Save the new entry. */
17858 min_mp = mp;
17860 if (mp->prev)
17861 mp = mp->prev;
17862 else
17863 mp->offset = 0;
17865 /* Scan over the following entries and adjust their offsets. */
17866 while (mp->next != NULL)
17868 if (mp->next->min_address < mp->min_address + mp->fix_size)
17869 mp->next->min_address = mp->min_address + mp->fix_size;
17871 if (mp->refcount)
17872 mp->next->offset = mp->offset + mp->fix_size;
17873 else
17874 mp->next->offset = mp->offset;
17876 mp = mp->next;
17879 return min_mp;
17882 static void
17883 assign_minipool_offsets (Mfix *barrier)
17885 HOST_WIDE_INT offset = 0;
17886 Mnode *mp;
17888 minipool_barrier = barrier;
17890 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17892 mp->offset = offset;
17894 if (mp->refcount > 0)
17895 offset += mp->fix_size;
17899 /* Output the literal table */
17900 static void
17901 dump_minipool (rtx_insn *scan)
17903 Mnode * mp;
17904 Mnode * nmp;
17905 int align64 = 0;
17907 if (ARM_DOUBLEWORD_ALIGN)
17908 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17909 if (mp->refcount > 0 && mp->fix_size >= 8)
17911 align64 = 1;
17912 break;
17915 if (dump_file)
17916 fprintf (dump_file,
17917 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17918 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17920 scan = emit_label_after (gen_label_rtx (), scan);
17921 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17922 scan = emit_label_after (minipool_vector_label, scan);
17924 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17926 if (mp->refcount > 0)
17928 if (dump_file)
17930 fprintf (dump_file,
17931 ";; Offset %u, min %ld, max %ld ",
17932 (unsigned) mp->offset, (unsigned long) mp->min_address,
17933 (unsigned long) mp->max_address);
17934 arm_print_value (dump_file, mp->value);
17935 fputc ('\n', dump_file);
17938 rtx val = copy_rtx (mp->value);
17940 switch (GET_MODE_SIZE (mp->mode))
17942 #ifdef HAVE_consttable_1
17943 case 1:
17944 scan = emit_insn_after (gen_consttable_1 (val), scan);
17945 break;
17947 #endif
17948 #ifdef HAVE_consttable_2
17949 case 2:
17950 scan = emit_insn_after (gen_consttable_2 (val), scan);
17951 break;
17953 #endif
17954 #ifdef HAVE_consttable_4
17955 case 4:
17956 scan = emit_insn_after (gen_consttable_4 (val), scan);
17957 break;
17959 #endif
17960 #ifdef HAVE_consttable_8
17961 case 8:
17962 scan = emit_insn_after (gen_consttable_8 (val), scan);
17963 break;
17965 #endif
17966 #ifdef HAVE_consttable_16
17967 case 16:
17968 scan = emit_insn_after (gen_consttable_16 (val), scan);
17969 break;
17971 #endif
17972 default:
17973 gcc_unreachable ();
17977 nmp = mp->next;
17978 free (mp);
17981 minipool_vector_head = minipool_vector_tail = NULL;
17982 scan = emit_insn_after (gen_consttable_end (), scan);
17983 scan = emit_barrier_after (scan);
17986 /* Return the cost of forcibly inserting a barrier after INSN. */
17987 static int
17988 arm_barrier_cost (rtx_insn *insn)
17990 /* Basing the location of the pool on the loop depth is preferable,
17991 but at the moment, the basic block information seems to be
17992 corrupt by this stage of the compilation. */
17993 int base_cost = 50;
17994 rtx_insn *next = next_nonnote_insn (insn);
17996 if (next != NULL && LABEL_P (next))
17997 base_cost -= 20;
17999 switch (GET_CODE (insn))
18001 case CODE_LABEL:
18002 /* It will always be better to place the table before the label, rather
18003 than after it. */
18004 return 50;
18006 case INSN:
18007 case CALL_INSN:
18008 return base_cost;
18010 case JUMP_INSN:
18011 return base_cost - 10;
18013 default:
18014 return base_cost + 10;
18018 /* Find the best place in the insn stream in the range
18019 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18020 Create the barrier by inserting a jump and add a new fix entry for
18021 it. */
18022 static Mfix *
18023 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18025 HOST_WIDE_INT count = 0;
18026 rtx_barrier *barrier;
18027 rtx_insn *from = fix->insn;
18028 /* The instruction after which we will insert the jump. */
18029 rtx_insn *selected = NULL;
18030 int selected_cost;
18031 /* The address at which the jump instruction will be placed. */
18032 HOST_WIDE_INT selected_address;
18033 Mfix * new_fix;
18034 HOST_WIDE_INT max_count = max_address - fix->address;
18035 rtx_code_label *label = gen_label_rtx ();
18037 selected_cost = arm_barrier_cost (from);
18038 selected_address = fix->address;
18040 while (from && count < max_count)
18042 rtx_jump_table_data *tmp;
18043 int new_cost;
18045 /* This code shouldn't have been called if there was a natural barrier
18046 within range. */
18047 gcc_assert (!BARRIER_P (from));
18049 /* Count the length of this insn. This must stay in sync with the
18050 code that pushes minipool fixes. */
18051 if (LABEL_P (from))
18052 count += get_label_padding (from);
18053 else
18054 count += get_attr_length (from);
18056 /* If there is a jump table, add its length. */
18057 if (tablejump_p (from, NULL, &tmp))
18059 count += get_jump_table_size (tmp);
18061 /* Jump tables aren't in a basic block, so base the cost on
18062 the dispatch insn. If we select this location, we will
18063 still put the pool after the table. */
18064 new_cost = arm_barrier_cost (from);
18066 if (count < max_count
18067 && (!selected || new_cost <= selected_cost))
18069 selected = tmp;
18070 selected_cost = new_cost;
18071 selected_address = fix->address + count;
18074 /* Continue after the dispatch table. */
18075 from = NEXT_INSN (tmp);
18076 continue;
18079 new_cost = arm_barrier_cost (from);
18081 if (count < max_count
18082 && (!selected || new_cost <= selected_cost))
18084 selected = from;
18085 selected_cost = new_cost;
18086 selected_address = fix->address + count;
18089 from = NEXT_INSN (from);
18092 /* Make sure that we found a place to insert the jump. */
18093 gcc_assert (selected);
18095 /* Create a new JUMP_INSN that branches around a barrier. */
18096 from = emit_jump_insn_after (gen_jump (label), selected);
18097 JUMP_LABEL (from) = label;
18098 barrier = emit_barrier_after (from);
18099 emit_label_after (label, barrier);
18101 /* Create a minipool barrier entry for the new barrier. */
18102 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18103 new_fix->insn = barrier;
18104 new_fix->address = selected_address;
18105 new_fix->next = fix->next;
18106 fix->next = new_fix;
18108 return new_fix;
18111 /* Record that there is a natural barrier in the insn stream at
18112 ADDRESS. */
18113 static void
18114 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18116 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18118 fix->insn = insn;
18119 fix->address = address;
18121 fix->next = NULL;
18122 if (minipool_fix_head != NULL)
18123 minipool_fix_tail->next = fix;
18124 else
18125 minipool_fix_head = fix;
18127 minipool_fix_tail = fix;
18130 /* Record INSN, which will need fixing up to load a value from the
18131 minipool. ADDRESS is the offset of the insn since the start of the
18132 function; LOC is a pointer to the part of the insn which requires
18133 fixing; VALUE is the constant that must be loaded, which is of type
18134 MODE. */
18135 static void
18136 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18137 machine_mode mode, rtx value)
18139 gcc_assert (!arm_disable_literal_pool);
18140 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18142 fix->insn = insn;
18143 fix->address = address;
18144 fix->loc = loc;
18145 fix->mode = mode;
18146 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18147 fix->value = value;
18148 fix->forwards = get_attr_pool_range (insn);
18149 fix->backwards = get_attr_neg_pool_range (insn);
18150 fix->minipool = NULL;
18152 /* If an insn doesn't have a range defined for it, then it isn't
18153 expecting to be reworked by this code. Better to stop now than
18154 to generate duff assembly code. */
18155 gcc_assert (fix->forwards || fix->backwards);
18157 /* If an entry requires 8-byte alignment then assume all constant pools
18158 require 4 bytes of padding. Trying to do this later on a per-pool
18159 basis is awkward because existing pool entries have to be modified. */
18160 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18161 minipool_pad = 4;
18163 if (dump_file)
18165 fprintf (dump_file,
18166 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18167 GET_MODE_NAME (mode),
18168 INSN_UID (insn), (unsigned long) address,
18169 -1 * (long)fix->backwards, (long)fix->forwards);
18170 arm_print_value (dump_file, fix->value);
18171 fprintf (dump_file, "\n");
18174 /* Add it to the chain of fixes. */
18175 fix->next = NULL;
18177 if (minipool_fix_head != NULL)
18178 minipool_fix_tail->next = fix;
18179 else
18180 minipool_fix_head = fix;
18182 minipool_fix_tail = fix;
18185 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18186 Returns the number of insns needed, or 99 if we always want to synthesize
18187 the value. */
18189 arm_max_const_double_inline_cost ()
18191 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18194 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18195 Returns the number of insns needed, or 99 if we don't know how to
18196 do it. */
18198 arm_const_double_inline_cost (rtx val)
18200 rtx lowpart, highpart;
18201 machine_mode mode;
18203 mode = GET_MODE (val);
18205 if (mode == VOIDmode)
18206 mode = DImode;
18208 gcc_assert (GET_MODE_SIZE (mode) == 8);
18210 lowpart = gen_lowpart (SImode, val);
18211 highpart = gen_highpart_mode (SImode, mode, val);
18213 gcc_assert (CONST_INT_P (lowpart));
18214 gcc_assert (CONST_INT_P (highpart));
18216 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18217 NULL_RTX, NULL_RTX, 0, 0)
18218 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18219 NULL_RTX, NULL_RTX, 0, 0));
18222 /* Cost of loading a SImode constant. */
18223 static inline int
18224 arm_const_inline_cost (enum rtx_code code, rtx val)
18226 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18227 NULL_RTX, NULL_RTX, 1, 0);
18230 /* Return true if it is worthwhile to split a 64-bit constant into two
18231 32-bit operations. This is the case if optimizing for size, or
18232 if we have load delay slots, or if one 32-bit part can be done with
18233 a single data operation. */
18234 bool
18235 arm_const_double_by_parts (rtx val)
18237 machine_mode mode = GET_MODE (val);
18238 rtx part;
18240 if (optimize_size || arm_ld_sched)
18241 return true;
18243 if (mode == VOIDmode)
18244 mode = DImode;
18246 part = gen_highpart_mode (SImode, mode, val);
18248 gcc_assert (CONST_INT_P (part));
18250 if (const_ok_for_arm (INTVAL (part))
18251 || const_ok_for_arm (~INTVAL (part)))
18252 return true;
18254 part = gen_lowpart (SImode, val);
18256 gcc_assert (CONST_INT_P (part));
18258 if (const_ok_for_arm (INTVAL (part))
18259 || const_ok_for_arm (~INTVAL (part)))
18260 return true;
18262 return false;
18265 /* Return true if it is possible to inline both the high and low parts
18266 of a 64-bit constant into 32-bit data processing instructions. */
18267 bool
18268 arm_const_double_by_immediates (rtx val)
18270 machine_mode mode = GET_MODE (val);
18271 rtx part;
18273 if (mode == VOIDmode)
18274 mode = DImode;
18276 part = gen_highpart_mode (SImode, mode, val);
18278 gcc_assert (CONST_INT_P (part));
18280 if (!const_ok_for_arm (INTVAL (part)))
18281 return false;
18283 part = gen_lowpart (SImode, val);
18285 gcc_assert (CONST_INT_P (part));
18287 if (!const_ok_for_arm (INTVAL (part)))
18288 return false;
18290 return true;
18293 /* Scan INSN and note any of its operands that need fixing.
18294 If DO_PUSHES is false we do not actually push any of the fixups
18295 needed. */
18296 static void
18297 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18299 int opno;
18301 extract_constrain_insn (insn);
18303 if (recog_data.n_alternatives == 0)
18304 return;
18306 /* Fill in recog_op_alt with information about the constraints of
18307 this insn. */
18308 preprocess_constraints (insn);
18310 const operand_alternative *op_alt = which_op_alt ();
18311 for (opno = 0; opno < recog_data.n_operands; opno++)
18313 /* Things we need to fix can only occur in inputs. */
18314 if (recog_data.operand_type[opno] != OP_IN)
18315 continue;
18317 /* If this alternative is a memory reference, then any mention
18318 of constants in this alternative is really to fool reload
18319 into allowing us to accept one there. We need to fix them up
18320 now so that we output the right code. */
18321 if (op_alt[opno].memory_ok)
18323 rtx op = recog_data.operand[opno];
18325 if (CONSTANT_P (op))
18327 if (do_pushes)
18328 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18329 recog_data.operand_mode[opno], op);
18331 else if (MEM_P (op)
18332 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18333 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18335 if (do_pushes)
18337 rtx cop = avoid_constant_pool_reference (op);
18339 /* Casting the address of something to a mode narrower
18340 than a word can cause avoid_constant_pool_reference()
18341 to return the pool reference itself. That's no good to
18342 us here. Lets just hope that we can use the
18343 constant pool value directly. */
18344 if (op == cop)
18345 cop = get_pool_constant (XEXP (op, 0));
18347 push_minipool_fix (insn, address,
18348 recog_data.operand_loc[opno],
18349 recog_data.operand_mode[opno], cop);
18356 return;
18359 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18360 and unions in the context of ARMv8-M Security Extensions. It is used as a
18361 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18362 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18363 or four masks, depending on whether it is being computed for a
18364 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18365 respectively. The tree for the type of the argument or a field within an
18366 argument is passed in ARG_TYPE, the current register this argument or field
18367 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18368 argument or field starts at is passed in STARTING_BIT and the last used bit
18369 is kept in LAST_USED_BIT which is also updated accordingly. */
18371 static unsigned HOST_WIDE_INT
18372 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18373 uint32_t * padding_bits_to_clear,
18374 unsigned starting_bit, int * last_used_bit)
18377 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18379 if (TREE_CODE (arg_type) == RECORD_TYPE)
18381 unsigned current_bit = starting_bit;
18382 tree field;
18383 long int offset, size;
18386 field = TYPE_FIELDS (arg_type);
18387 while (field)
18389 /* The offset within a structure is always an offset from
18390 the start of that structure. Make sure we take that into the
18391 calculation of the register based offset that we use here. */
18392 offset = starting_bit;
18393 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18394 offset %= 32;
18396 /* This is the actual size of the field, for bitfields this is the
18397 bitfield width and not the container size. */
18398 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18400 if (*last_used_bit != offset)
18402 if (offset < *last_used_bit)
18404 /* This field's offset is before the 'last_used_bit', that
18405 means this field goes on the next register. So we need to
18406 pad the rest of the current register and increase the
18407 register number. */
18408 uint32_t mask;
18409 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18410 mask++;
18412 padding_bits_to_clear[*regno] |= mask;
18413 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18414 (*regno)++;
18416 else
18418 /* Otherwise we pad the bits between the last field's end and
18419 the start of the new field. */
18420 uint32_t mask;
18422 mask = ((uint32_t)-1) >> (32 - offset);
18423 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18424 padding_bits_to_clear[*regno] |= mask;
18426 current_bit = offset;
18429 /* Calculate further padding bits for inner structs/unions too. */
18430 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18432 *last_used_bit = current_bit;
18433 not_to_clear_reg_mask
18434 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18435 padding_bits_to_clear, offset,
18436 last_used_bit);
18438 else
18440 /* Update 'current_bit' with this field's size. If the
18441 'current_bit' lies in a subsequent register, update 'regno' and
18442 reset 'current_bit' to point to the current bit in that new
18443 register. */
18444 current_bit += size;
18445 while (current_bit >= 32)
18447 current_bit-=32;
18448 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18449 (*regno)++;
18451 *last_used_bit = current_bit;
18454 field = TREE_CHAIN (field);
18456 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18458 else if (TREE_CODE (arg_type) == UNION_TYPE)
18460 tree field, field_t;
18461 int i, regno_t, field_size;
18462 int max_reg = -1;
18463 int max_bit = -1;
18464 uint32_t mask;
18465 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18466 = {-1, -1, -1, -1};
18468 /* To compute the padding bits in a union we only consider bits as
18469 padding bits if they are always either a padding bit or fall outside a
18470 fields size for all fields in the union. */
18471 field = TYPE_FIELDS (arg_type);
18472 while (field)
18474 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18475 = {0U, 0U, 0U, 0U};
18476 int last_used_bit_t = *last_used_bit;
18477 regno_t = *regno;
18478 field_t = TREE_TYPE (field);
18480 /* If the field's type is either a record or a union make sure to
18481 compute their padding bits too. */
18482 if (RECORD_OR_UNION_TYPE_P (field_t))
18483 not_to_clear_reg_mask
18484 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18485 &padding_bits_to_clear_t[0],
18486 starting_bit, &last_used_bit_t);
18487 else
18489 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18490 regno_t = (field_size / 32) + *regno;
18491 last_used_bit_t = (starting_bit + field_size) % 32;
18494 for (i = *regno; i < regno_t; i++)
18496 /* For all but the last register used by this field only keep the
18497 padding bits that were padding bits in this field. */
18498 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18501 /* For the last register, keep all padding bits that were padding
18502 bits in this field and any padding bits that are still valid
18503 as padding bits but fall outside of this field's size. */
18504 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18505 padding_bits_to_clear_res[regno_t]
18506 &= padding_bits_to_clear_t[regno_t] | mask;
18508 /* Update the maximum size of the fields in terms of registers used
18509 ('max_reg') and the 'last_used_bit' in said register. */
18510 if (max_reg < regno_t)
18512 max_reg = regno_t;
18513 max_bit = last_used_bit_t;
18515 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18516 max_bit = last_used_bit_t;
18518 field = TREE_CHAIN (field);
18521 /* Update the current padding_bits_to_clear using the intersection of the
18522 padding bits of all the fields. */
18523 for (i=*regno; i < max_reg; i++)
18524 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18526 /* Do not keep trailing padding bits, we do not know yet whether this
18527 is the end of the argument. */
18528 mask = ((uint32_t) 1 << max_bit) - 1;
18529 padding_bits_to_clear[max_reg]
18530 |= padding_bits_to_clear_res[max_reg] & mask;
18532 *regno = max_reg;
18533 *last_used_bit = max_bit;
18535 else
18536 /* This function should only be used for structs and unions. */
18537 gcc_unreachable ();
18539 return not_to_clear_reg_mask;
18542 /* In the context of ARMv8-M Security Extensions, this function is used for both
18543 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18544 registers are used when returning or passing arguments, which is then
18545 returned as a mask. It will also compute a mask to indicate padding/unused
18546 bits for each of these registers, and passes this through the
18547 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18548 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18549 the starting register used to pass this argument or return value is passed
18550 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18551 for struct and union types. */
18553 static unsigned HOST_WIDE_INT
18554 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18555 uint32_t * padding_bits_to_clear)
18558 int last_used_bit = 0;
18559 unsigned HOST_WIDE_INT not_to_clear_mask;
18561 if (RECORD_OR_UNION_TYPE_P (arg_type))
18563 not_to_clear_mask
18564 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18565 padding_bits_to_clear, 0,
18566 &last_used_bit);
18569 /* If the 'last_used_bit' is not zero, that means we are still using a
18570 part of the last 'regno'. In such cases we must clear the trailing
18571 bits. Otherwise we are not using regno and we should mark it as to
18572 clear. */
18573 if (last_used_bit != 0)
18574 padding_bits_to_clear[regno]
18575 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18576 else
18577 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18579 else
18581 not_to_clear_mask = 0;
18582 /* We are not dealing with structs nor unions. So these arguments may be
18583 passed in floating point registers too. In some cases a BLKmode is
18584 used when returning or passing arguments in multiple VFP registers. */
18585 if (GET_MODE (arg_rtx) == BLKmode)
18587 int i, arg_regs;
18588 rtx reg;
18590 /* This should really only occur when dealing with the hard-float
18591 ABI. */
18592 gcc_assert (TARGET_HARD_FLOAT_ABI);
18594 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18596 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18597 gcc_assert (REG_P (reg));
18599 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18601 /* If we are dealing with DF mode, make sure we don't
18602 clear either of the registers it addresses. */
18603 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18604 if (arg_regs > 1)
18606 unsigned HOST_WIDE_INT mask;
18607 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18608 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18609 not_to_clear_mask |= mask;
18613 else
18615 /* Otherwise we can rely on the MODE to determine how many registers
18616 are being used by this argument. */
18617 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18618 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18619 if (arg_regs > 1)
18621 unsigned HOST_WIDE_INT
18622 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18623 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18624 not_to_clear_mask |= mask;
18629 return not_to_clear_mask;
18632 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18633 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18634 are to be fully cleared, using the value in register CLEARING_REG if more
18635 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18636 the bits that needs to be cleared in caller-saved core registers, with
18637 SCRATCH_REG used as a scratch register for that clearing.
18639 NOTE: one of three following assertions must hold:
18640 - SCRATCH_REG is a low register
18641 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18642 in TO_CLEAR_BITMAP)
18643 - CLEARING_REG is a low register. */
18645 static void
18646 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18647 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18649 bool saved_clearing = false;
18650 rtx saved_clearing_reg = NULL_RTX;
18651 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18653 gcc_assert (arm_arch_cmse);
18655 if (!bitmap_empty_p (to_clear_bitmap))
18657 minregno = bitmap_first_set_bit (to_clear_bitmap);
18658 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18660 clearing_regno = REGNO (clearing_reg);
18662 /* Clear padding bits. */
18663 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18664 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18666 uint64_t mask;
18667 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18669 if (padding_bits_to_clear[i] == 0)
18670 continue;
18672 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18673 CLEARING_REG as scratch. */
18674 if (TARGET_THUMB1
18675 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18677 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18678 such that we can use clearing_reg to clear the unused bits in the
18679 arguments. */
18680 if ((clearing_regno > maxregno
18681 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18682 && !saved_clearing)
18684 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18685 emit_move_insn (scratch_reg, clearing_reg);
18686 saved_clearing = true;
18687 saved_clearing_reg = scratch_reg;
18689 scratch_reg = clearing_reg;
18692 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18693 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18694 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18696 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18697 mask = (~padding_bits_to_clear[i]) >> 16;
18698 rtx16 = gen_int_mode (16, SImode);
18699 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18700 if (mask)
18701 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18703 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18705 if (saved_clearing)
18706 emit_move_insn (clearing_reg, saved_clearing_reg);
18709 /* Clear full registers. */
18711 if (TARGET_HAVE_FPCXT_CMSE)
18713 rtvec vunspec_vec;
18714 int i, j, k, nb_regs;
18715 rtx use_seq, par, reg, set, vunspec;
18716 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18717 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18718 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18720 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18722 /* Find next register to clear and exit if none. */
18723 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18724 if (i > maxregno)
18725 break;
18727 /* Compute number of consecutive registers to clear. */
18728 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18729 j++);
18730 nb_regs = j - i;
18732 /* Create VSCCLRM RTX pattern. */
18733 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18734 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18735 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18736 VUNSPEC_VSCCLRM_VPR);
18737 XVECEXP (par, 0, 0) = vunspec;
18739 /* Insert VFP register clearing RTX in the pattern. */
18740 start_sequence ();
18741 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18743 if (!bitmap_bit_p (to_clear_bitmap, j))
18744 continue;
18746 reg = gen_rtx_REG (SFmode, j);
18747 set = gen_rtx_SET (reg, const0_rtx);
18748 XVECEXP (par, 0, k++) = set;
18749 emit_use (reg);
18751 use_seq = get_insns ();
18752 end_sequence ();
18754 emit_insn_after (use_seq, emit_insn (par));
18757 /* Get set of core registers to clear. */
18758 bitmap_clear (core_regs_bitmap);
18759 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18760 IP_REGNUM - R0_REGNUM + 1);
18761 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18762 core_regs_bitmap);
18763 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18765 if (bitmap_empty_p (to_clear_core_bitmap))
18766 return;
18768 /* Create clrm RTX pattern. */
18769 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18770 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18772 /* Insert core register clearing RTX in the pattern. */
18773 start_sequence ();
18774 for (j = 0, i = minregno; j < nb_regs; i++)
18776 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18777 continue;
18779 reg = gen_rtx_REG (SImode, i);
18780 set = gen_rtx_SET (reg, const0_rtx);
18781 XVECEXP (par, 0, j++) = set;
18782 emit_use (reg);
18785 /* Insert APSR register clearing RTX in the pattern
18786 * along with clobbering CC. */
18787 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18788 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18789 VUNSPEC_CLRM_APSR);
18791 XVECEXP (par, 0, j++) = vunspec;
18793 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18794 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18795 XVECEXP (par, 0, j) = clobber;
18797 use_seq = get_insns ();
18798 end_sequence ();
18800 emit_insn_after (use_seq, emit_insn (par));
18802 else
18804 /* If not marked for clearing, clearing_reg already does not contain
18805 any secret. */
18806 if (clearing_regno <= maxregno
18807 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18809 emit_move_insn (clearing_reg, const0_rtx);
18810 emit_use (clearing_reg);
18811 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18814 for (regno = minregno; regno <= maxregno; regno++)
18816 if (!bitmap_bit_p (to_clear_bitmap, regno))
18817 continue;
18819 if (IS_VFP_REGNUM (regno))
18821 /* If regno is an even vfp register and its successor is also to
18822 be cleared, use vmov. */
18823 if (TARGET_VFP_DOUBLE
18824 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18825 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18827 emit_move_insn (gen_rtx_REG (DFmode, regno),
18828 CONST1_RTX (DFmode));
18829 emit_use (gen_rtx_REG (DFmode, regno));
18830 regno++;
18832 else
18834 emit_move_insn (gen_rtx_REG (SFmode, regno),
18835 CONST1_RTX (SFmode));
18836 emit_use (gen_rtx_REG (SFmode, regno));
18839 else
18841 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18842 emit_use (gen_rtx_REG (SImode, regno));
18848 /* Clear core and caller-saved VFP registers not used to pass arguments before
18849 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18850 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18851 libgcc/config/arm/cmse_nonsecure_call.S. */
18853 static void
18854 cmse_nonsecure_call_inline_register_clear (void)
18856 basic_block bb;
18858 FOR_EACH_BB_FN (bb, cfun)
18860 rtx_insn *insn;
18862 FOR_BB_INSNS (bb, insn)
18864 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18865 /* frame = VFP regs + FPSCR + VPR. */
18866 unsigned lazy_store_stack_frame_size
18867 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18868 unsigned long callee_saved_mask
18869 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18870 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18871 unsigned address_regnum, regno;
18872 unsigned max_int_regno
18873 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18874 unsigned max_fp_regno
18875 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18876 unsigned maxregno
18877 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18878 auto_sbitmap to_clear_bitmap (maxregno + 1);
18879 rtx_insn *seq;
18880 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18881 rtx address;
18882 CUMULATIVE_ARGS args_so_far_v;
18883 cumulative_args_t args_so_far;
18884 tree arg_type, fntype;
18885 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18886 function_args_iterator args_iter;
18887 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18889 if (!NONDEBUG_INSN_P (insn))
18890 continue;
18892 if (!CALL_P (insn))
18893 continue;
18895 pat = PATTERN (insn);
18896 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18897 call = XVECEXP (pat, 0, 0);
18899 /* Get the real call RTX if the insn sets a value, ie. returns. */
18900 if (GET_CODE (call) == SET)
18901 call = SET_SRC (call);
18903 /* Check if it is a cmse_nonsecure_call. */
18904 unspec = XEXP (call, 0);
18905 if (GET_CODE (unspec) != UNSPEC
18906 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18907 continue;
18909 /* Mark registers that needs to be cleared. Those that holds a
18910 parameter are removed from the set further below. */
18911 bitmap_clear (to_clear_bitmap);
18912 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18913 max_int_regno - R0_REGNUM + 1);
18915 /* Only look at the caller-saved floating point registers in case of
18916 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18917 lazy store and loads which clear both caller- and callee-saved
18918 registers. */
18919 if (!lazy_fpclear)
18921 auto_sbitmap float_bitmap (maxregno + 1);
18923 bitmap_clear (float_bitmap);
18924 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18925 max_fp_regno - FIRST_VFP_REGNUM + 1);
18926 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18929 /* Make sure the register used to hold the function address is not
18930 cleared. */
18931 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18932 gcc_assert (MEM_P (address));
18933 gcc_assert (REG_P (XEXP (address, 0)));
18934 address_regnum = REGNO (XEXP (address, 0));
18935 if (address_regnum <= max_int_regno)
18936 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18938 /* Set basic block of call insn so that df rescan is performed on
18939 insns inserted here. */
18940 set_block_for_insn (insn, bb);
18941 df_set_flags (DF_DEFER_INSN_RESCAN);
18942 start_sequence ();
18944 /* Make sure the scheduler doesn't schedule other insns beyond
18945 here. */
18946 emit_insn (gen_blockage ());
18948 /* Walk through all arguments and clear registers appropriately.
18950 fntype = TREE_TYPE (MEM_EXPR (address));
18951 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18952 NULL_TREE);
18953 args_so_far = pack_cumulative_args (&args_so_far_v);
18954 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18956 rtx arg_rtx;
18957 uint64_t to_clear_args_mask;
18959 if (VOID_TYPE_P (arg_type))
18960 continue;
18962 function_arg_info arg (arg_type, /*named=*/true);
18963 if (!first_param)
18964 /* ??? We should advance after processing the argument and pass
18965 the argument we're advancing past. */
18966 arm_function_arg_advance (args_so_far, arg);
18968 arg_rtx = arm_function_arg (args_so_far, arg);
18969 gcc_assert (REG_P (arg_rtx));
18970 to_clear_args_mask
18971 = compute_not_to_clear_mask (arg_type, arg_rtx,
18972 REGNO (arg_rtx),
18973 &padding_bits_to_clear[0]);
18974 if (to_clear_args_mask)
18976 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18978 if (to_clear_args_mask & (1ULL << regno))
18979 bitmap_clear_bit (to_clear_bitmap, regno);
18983 first_param = false;
18986 /* We use right shift and left shift to clear the LSB of the address
18987 we jump to instead of using bic, to avoid having to use an extra
18988 register on Thumb-1. */
18989 clearing_reg = XEXP (address, 0);
18990 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18991 emit_insn (gen_rtx_SET (clearing_reg, shift));
18992 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18993 emit_insn (gen_rtx_SET (clearing_reg, shift));
18995 if (clear_callee_saved)
18997 rtx push_insn =
18998 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18999 /* Disable frame debug info in push because it needs to be
19000 disabled for pop (see below). */
19001 RTX_FRAME_RELATED_P (push_insn) = 0;
19003 /* Lazy store multiple. */
19004 if (lazy_fpclear)
19006 rtx imm;
19007 rtx_insn *add_insn;
19009 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19010 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19011 stack_pointer_rtx, imm));
19012 /* If we have the frame pointer, then it will be the
19013 CFA reg. Otherwise, the stack pointer is the CFA
19014 reg, so we need to emit a CFA adjust. */
19015 if (!frame_pointer_needed)
19016 arm_add_cfa_adjust_cfa_note (add_insn,
19017 - lazy_store_stack_frame_size,
19018 stack_pointer_rtx,
19019 stack_pointer_rtx);
19020 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19022 /* Save VFP callee-saved registers. */
19023 else
19025 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19026 (max_fp_regno - D7_VFP_REGNUM) / 2);
19027 /* Disable frame debug info in push because it needs to be
19028 disabled for vpop (see below). */
19029 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19033 /* Clear caller-saved registers that leak before doing a non-secure
19034 call. */
19035 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19036 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19037 NUM_ARG_REGS, ip_reg, clearing_reg);
19039 seq = get_insns ();
19040 end_sequence ();
19041 emit_insn_before (seq, insn);
19043 if (TARGET_HAVE_FPCXT_CMSE)
19045 rtx_insn *last, *pop_insn, *after = insn;
19047 start_sequence ();
19049 /* Lazy load multiple done as part of libcall in Armv8-M. */
19050 if (lazy_fpclear)
19052 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19053 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19054 rtx_insn *add_insn =
19055 emit_insn (gen_addsi3 (stack_pointer_rtx,
19056 stack_pointer_rtx, imm));
19057 if (!frame_pointer_needed)
19058 arm_add_cfa_adjust_cfa_note (add_insn,
19059 lazy_store_stack_frame_size,
19060 stack_pointer_rtx,
19061 stack_pointer_rtx);
19063 /* Restore VFP callee-saved registers. */
19064 else
19066 int nb_callee_saved_vfp_regs =
19067 (max_fp_regno - D7_VFP_REGNUM) / 2;
19068 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19069 nb_callee_saved_vfp_regs,
19070 stack_pointer_rtx);
19071 /* Disable frame debug info in vpop because the SP adjustment
19072 is made using a CFA adjustment note while CFA used is
19073 sometimes R7. This then causes an assert failure in the
19074 CFI note creation code. */
19075 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19078 arm_emit_multi_reg_pop (callee_saved_mask);
19079 pop_insn = get_last_insn ();
19081 /* Disable frame debug info in pop because they reset the state
19082 of popped registers to what it was at the beginning of the
19083 function, before the prologue. This leads to incorrect state
19084 when doing the pop after the nonsecure call for registers that
19085 are pushed both in prologue and before the nonsecure call.
19087 It also occasionally triggers an assert failure in CFI note
19088 creation code when there are two codepaths to the epilogue,
19089 one of which does not go through the nonsecure call.
19090 Obviously this mean that debugging between the push and pop is
19091 not reliable. */
19092 RTX_FRAME_RELATED_P (pop_insn) = 0;
19094 seq = get_insns ();
19095 last = get_last_insn ();
19096 end_sequence ();
19098 emit_insn_after (seq, after);
19100 /* Skip pop we have just inserted after nonsecure call, we know
19101 it does not contain a nonsecure call. */
19102 insn = last;
19108 /* Rewrite move insn into subtract of 0 if the condition codes will
19109 be useful in next conditional jump insn. */
19111 static void
19112 thumb1_reorg (void)
19114 basic_block bb;
19116 FOR_EACH_BB_FN (bb, cfun)
19118 rtx dest, src;
19119 rtx cmp, op0, op1, set = NULL;
19120 rtx_insn *prev, *insn = BB_END (bb);
19121 bool insn_clobbered = false;
19123 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19124 insn = PREV_INSN (insn);
19126 /* Find the last cbranchsi4_insn in basic block BB. */
19127 if (insn == BB_HEAD (bb)
19128 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19129 continue;
19131 /* Get the register with which we are comparing. */
19132 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19133 op0 = XEXP (cmp, 0);
19134 op1 = XEXP (cmp, 1);
19136 /* Check that comparison is against ZERO. */
19137 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19138 continue;
19140 /* Find the first flag setting insn before INSN in basic block BB. */
19141 gcc_assert (insn != BB_HEAD (bb));
19142 for (prev = PREV_INSN (insn);
19143 (!insn_clobbered
19144 && prev != BB_HEAD (bb)
19145 && (NOTE_P (prev)
19146 || DEBUG_INSN_P (prev)
19147 || ((set = single_set (prev)) != NULL
19148 && get_attr_conds (prev) == CONDS_NOCOND)));
19149 prev = PREV_INSN (prev))
19151 if (reg_set_p (op0, prev))
19152 insn_clobbered = true;
19155 /* Skip if op0 is clobbered by insn other than prev. */
19156 if (insn_clobbered)
19157 continue;
19159 if (!set)
19160 continue;
19162 dest = SET_DEST (set);
19163 src = SET_SRC (set);
19164 if (!low_register_operand (dest, SImode)
19165 || !low_register_operand (src, SImode))
19166 continue;
19168 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19169 in INSN. Both src and dest of the move insn are checked. */
19170 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19172 dest = copy_rtx (dest);
19173 src = copy_rtx (src);
19174 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19175 PATTERN (prev) = gen_rtx_SET (dest, src);
19176 INSN_CODE (prev) = -1;
19177 /* Set test register in INSN to dest. */
19178 XEXP (cmp, 0) = copy_rtx (dest);
19179 INSN_CODE (insn) = -1;
19184 /* Convert instructions to their cc-clobbering variant if possible, since
19185 that allows us to use smaller encodings. */
19187 static void
19188 thumb2_reorg (void)
19190 basic_block bb;
19191 regset_head live;
19193 INIT_REG_SET (&live);
19195 /* We are freeing block_for_insn in the toplev to keep compatibility
19196 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19197 compute_bb_for_insn ();
19198 df_analyze ();
19200 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19202 FOR_EACH_BB_FN (bb, cfun)
19204 if ((current_tune->disparage_flag_setting_t16_encodings
19205 == tune_params::DISPARAGE_FLAGS_ALL)
19206 && optimize_bb_for_speed_p (bb))
19207 continue;
19209 rtx_insn *insn;
19210 Convert_Action action = SKIP;
19211 Convert_Action action_for_partial_flag_setting
19212 = ((current_tune->disparage_flag_setting_t16_encodings
19213 != tune_params::DISPARAGE_FLAGS_NEITHER)
19214 && optimize_bb_for_speed_p (bb))
19215 ? SKIP : CONV;
19217 COPY_REG_SET (&live, DF_LR_OUT (bb));
19218 df_simulate_initialize_backwards (bb, &live);
19219 FOR_BB_INSNS_REVERSE (bb, insn)
19221 if (NONJUMP_INSN_P (insn)
19222 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19223 && GET_CODE (PATTERN (insn)) == SET)
19225 action = SKIP;
19226 rtx pat = PATTERN (insn);
19227 rtx dst = XEXP (pat, 0);
19228 rtx src = XEXP (pat, 1);
19229 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19231 if (UNARY_P (src) || BINARY_P (src))
19232 op0 = XEXP (src, 0);
19234 if (BINARY_P (src))
19235 op1 = XEXP (src, 1);
19237 if (low_register_operand (dst, SImode))
19239 switch (GET_CODE (src))
19241 case PLUS:
19242 /* Adding two registers and storing the result
19243 in the first source is already a 16-bit
19244 operation. */
19245 if (rtx_equal_p (dst, op0)
19246 && register_operand (op1, SImode))
19247 break;
19249 if (low_register_operand (op0, SImode))
19251 /* ADDS <Rd>,<Rn>,<Rm> */
19252 if (low_register_operand (op1, SImode))
19253 action = CONV;
19254 /* ADDS <Rdn>,#<imm8> */
19255 /* SUBS <Rdn>,#<imm8> */
19256 else if (rtx_equal_p (dst, op0)
19257 && CONST_INT_P (op1)
19258 && IN_RANGE (INTVAL (op1), -255, 255))
19259 action = CONV;
19260 /* ADDS <Rd>,<Rn>,#<imm3> */
19261 /* SUBS <Rd>,<Rn>,#<imm3> */
19262 else if (CONST_INT_P (op1)
19263 && IN_RANGE (INTVAL (op1), -7, 7))
19264 action = CONV;
19266 /* ADCS <Rd>, <Rn> */
19267 else if (GET_CODE (XEXP (src, 0)) == PLUS
19268 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19269 && low_register_operand (XEXP (XEXP (src, 0), 1),
19270 SImode)
19271 && COMPARISON_P (op1)
19272 && cc_register (XEXP (op1, 0), VOIDmode)
19273 && maybe_get_arm_condition_code (op1) == ARM_CS
19274 && XEXP (op1, 1) == const0_rtx)
19275 action = CONV;
19276 break;
19278 case MINUS:
19279 /* RSBS <Rd>,<Rn>,#0
19280 Not handled here: see NEG below. */
19281 /* SUBS <Rd>,<Rn>,#<imm3>
19282 SUBS <Rdn>,#<imm8>
19283 Not handled here: see PLUS above. */
19284 /* SUBS <Rd>,<Rn>,<Rm> */
19285 if (low_register_operand (op0, SImode)
19286 && low_register_operand (op1, SImode))
19287 action = CONV;
19288 break;
19290 case MULT:
19291 /* MULS <Rdm>,<Rn>,<Rdm>
19292 As an exception to the rule, this is only used
19293 when optimizing for size since MULS is slow on all
19294 known implementations. We do not even want to use
19295 MULS in cold code, if optimizing for speed, so we
19296 test the global flag here. */
19297 if (!optimize_size)
19298 break;
19299 /* Fall through. */
19300 case AND:
19301 case IOR:
19302 case XOR:
19303 /* ANDS <Rdn>,<Rm> */
19304 if (rtx_equal_p (dst, op0)
19305 && low_register_operand (op1, SImode))
19306 action = action_for_partial_flag_setting;
19307 else if (rtx_equal_p (dst, op1)
19308 && low_register_operand (op0, SImode))
19309 action = action_for_partial_flag_setting == SKIP
19310 ? SKIP : SWAP_CONV;
19311 break;
19313 case ASHIFTRT:
19314 case ASHIFT:
19315 case LSHIFTRT:
19316 /* ASRS <Rdn>,<Rm> */
19317 /* LSRS <Rdn>,<Rm> */
19318 /* LSLS <Rdn>,<Rm> */
19319 if (rtx_equal_p (dst, op0)
19320 && low_register_operand (op1, SImode))
19321 action = action_for_partial_flag_setting;
19322 /* ASRS <Rd>,<Rm>,#<imm5> */
19323 /* LSRS <Rd>,<Rm>,#<imm5> */
19324 /* LSLS <Rd>,<Rm>,#<imm5> */
19325 else if (low_register_operand (op0, SImode)
19326 && CONST_INT_P (op1)
19327 && IN_RANGE (INTVAL (op1), 0, 31))
19328 action = action_for_partial_flag_setting;
19329 break;
19331 case ROTATERT:
19332 /* RORS <Rdn>,<Rm> */
19333 if (rtx_equal_p (dst, op0)
19334 && low_register_operand (op1, SImode))
19335 action = action_for_partial_flag_setting;
19336 break;
19338 case NOT:
19339 /* MVNS <Rd>,<Rm> */
19340 if (low_register_operand (op0, SImode))
19341 action = action_for_partial_flag_setting;
19342 break;
19344 case NEG:
19345 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19346 if (low_register_operand (op0, SImode))
19347 action = CONV;
19348 break;
19350 case CONST_INT:
19351 /* MOVS <Rd>,#<imm8> */
19352 if (CONST_INT_P (src)
19353 && IN_RANGE (INTVAL (src), 0, 255))
19354 action = action_for_partial_flag_setting;
19355 break;
19357 case REG:
19358 /* MOVS and MOV<c> with registers have different
19359 encodings, so are not relevant here. */
19360 break;
19362 default:
19363 break;
19367 if (action != SKIP)
19369 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19370 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19371 rtvec vec;
19373 if (action == SWAP_CONV)
19375 src = copy_rtx (src);
19376 XEXP (src, 0) = op1;
19377 XEXP (src, 1) = op0;
19378 pat = gen_rtx_SET (dst, src);
19379 vec = gen_rtvec (2, pat, clobber);
19381 else /* action == CONV */
19382 vec = gen_rtvec (2, pat, clobber);
19384 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19385 INSN_CODE (insn) = -1;
19389 if (NONDEBUG_INSN_P (insn))
19390 df_simulate_one_insn_backwards (bb, insn, &live);
19394 CLEAR_REG_SET (&live);
19397 /* Gcc puts the pool in the wrong place for ARM, since we can only
19398 load addresses a limited distance around the pc. We do some
19399 special munging to move the constant pool values to the correct
19400 point in the code. */
19401 static void
19402 arm_reorg (void)
19404 rtx_insn *insn;
19405 HOST_WIDE_INT address = 0;
19406 Mfix * fix;
19408 if (use_cmse)
19409 cmse_nonsecure_call_inline_register_clear ();
19411 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19412 if (cfun->is_thunk)
19414 else if (TARGET_THUMB1)
19415 thumb1_reorg ();
19416 else if (TARGET_THUMB2)
19417 thumb2_reorg ();
19419 /* Ensure all insns that must be split have been split at this point.
19420 Otherwise, the pool placement code below may compute incorrect
19421 insn lengths. Note that when optimizing, all insns have already
19422 been split at this point. */
19423 if (!optimize)
19424 split_all_insns_noflow ();
19426 /* Make sure we do not attempt to create a literal pool even though it should
19427 no longer be necessary to create any. */
19428 if (arm_disable_literal_pool)
19429 return ;
19431 minipool_fix_head = minipool_fix_tail = NULL;
19433 /* The first insn must always be a note, or the code below won't
19434 scan it properly. */
19435 insn = get_insns ();
19436 gcc_assert (NOTE_P (insn));
19437 minipool_pad = 0;
19439 /* Scan all the insns and record the operands that will need fixing. */
19440 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19442 if (BARRIER_P (insn))
19443 push_minipool_barrier (insn, address);
19444 else if (INSN_P (insn))
19446 rtx_jump_table_data *table;
19448 note_invalid_constants (insn, address, true);
19449 address += get_attr_length (insn);
19451 /* If the insn is a vector jump, add the size of the table
19452 and skip the table. */
19453 if (tablejump_p (insn, NULL, &table))
19455 address += get_jump_table_size (table);
19456 insn = table;
19459 else if (LABEL_P (insn))
19460 /* Add the worst-case padding due to alignment. We don't add
19461 the _current_ padding because the minipool insertions
19462 themselves might change it. */
19463 address += get_label_padding (insn);
19466 fix = minipool_fix_head;
19468 /* Now scan the fixups and perform the required changes. */
19469 while (fix)
19471 Mfix * ftmp;
19472 Mfix * fdel;
19473 Mfix * last_added_fix;
19474 Mfix * last_barrier = NULL;
19475 Mfix * this_fix;
19477 /* Skip any further barriers before the next fix. */
19478 while (fix && BARRIER_P (fix->insn))
19479 fix = fix->next;
19481 /* No more fixes. */
19482 if (fix == NULL)
19483 break;
19485 last_added_fix = NULL;
19487 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19489 if (BARRIER_P (ftmp->insn))
19491 if (ftmp->address >= minipool_vector_head->max_address)
19492 break;
19494 last_barrier = ftmp;
19496 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19497 break;
19499 last_added_fix = ftmp; /* Keep track of the last fix added. */
19502 /* If we found a barrier, drop back to that; any fixes that we
19503 could have reached but come after the barrier will now go in
19504 the next mini-pool. */
19505 if (last_barrier != NULL)
19507 /* Reduce the refcount for those fixes that won't go into this
19508 pool after all. */
19509 for (fdel = last_barrier->next;
19510 fdel && fdel != ftmp;
19511 fdel = fdel->next)
19513 fdel->minipool->refcount--;
19514 fdel->minipool = NULL;
19517 ftmp = last_barrier;
19519 else
19521 /* ftmp is first fix that we can't fit into this pool and
19522 there no natural barriers that we could use. Insert a
19523 new barrier in the code somewhere between the previous
19524 fix and this one, and arrange to jump around it. */
19525 HOST_WIDE_INT max_address;
19527 /* The last item on the list of fixes must be a barrier, so
19528 we can never run off the end of the list of fixes without
19529 last_barrier being set. */
19530 gcc_assert (ftmp);
19532 max_address = minipool_vector_head->max_address;
19533 /* Check that there isn't another fix that is in range that
19534 we couldn't fit into this pool because the pool was
19535 already too large: we need to put the pool before such an
19536 instruction. The pool itself may come just after the
19537 fix because create_fix_barrier also allows space for a
19538 jump instruction. */
19539 if (ftmp->address < max_address)
19540 max_address = ftmp->address + 1;
19542 last_barrier = create_fix_barrier (last_added_fix, max_address);
19545 assign_minipool_offsets (last_barrier);
19547 while (ftmp)
19549 if (!BARRIER_P (ftmp->insn)
19550 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19551 == NULL))
19552 break;
19554 ftmp = ftmp->next;
19557 /* Scan over the fixes we have identified for this pool, fixing them
19558 up and adding the constants to the pool itself. */
19559 for (this_fix = fix; this_fix && ftmp != this_fix;
19560 this_fix = this_fix->next)
19561 if (!BARRIER_P (this_fix->insn))
19563 rtx addr
19564 = plus_constant (Pmode,
19565 gen_rtx_LABEL_REF (VOIDmode,
19566 minipool_vector_label),
19567 this_fix->minipool->offset);
19568 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19571 dump_minipool (last_barrier->insn);
19572 fix = ftmp;
19575 /* From now on we must synthesize any constants that we can't handle
19576 directly. This can happen if the RTL gets split during final
19577 instruction generation. */
19578 cfun->machine->after_arm_reorg = 1;
19580 /* Free the minipool memory. */
19581 obstack_free (&minipool_obstack, minipool_startobj);
19584 /* Routines to output assembly language. */
19586 /* Return string representation of passed in real value. */
19587 static const char *
19588 fp_const_from_val (REAL_VALUE_TYPE *r)
19590 if (!fp_consts_inited)
19591 init_fp_table ();
19593 gcc_assert (real_equal (r, &value_fp0));
19594 return "0";
19597 /* OPERANDS[0] is the entire list of insns that constitute pop,
19598 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19599 is in the list, UPDATE is true iff the list contains explicit
19600 update of base register. */
19601 void
19602 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19603 bool update)
19605 int i;
19606 char pattern[100];
19607 int offset;
19608 const char *conditional;
19609 int num_saves = XVECLEN (operands[0], 0);
19610 unsigned int regno;
19611 unsigned int regno_base = REGNO (operands[1]);
19612 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19614 offset = 0;
19615 offset += update ? 1 : 0;
19616 offset += return_pc ? 1 : 0;
19618 /* Is the base register in the list? */
19619 for (i = offset; i < num_saves; i++)
19621 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19622 /* If SP is in the list, then the base register must be SP. */
19623 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19624 /* If base register is in the list, there must be no explicit update. */
19625 if (regno == regno_base)
19626 gcc_assert (!update);
19629 conditional = reverse ? "%?%D0" : "%?%d0";
19630 /* Can't use POP if returning from an interrupt. */
19631 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19632 sprintf (pattern, "pop%s\t{", conditional);
19633 else
19635 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19636 It's just a convention, their semantics are identical. */
19637 if (regno_base == SP_REGNUM)
19638 sprintf (pattern, "ldmfd%s\t", conditional);
19639 else if (update)
19640 sprintf (pattern, "ldmia%s\t", conditional);
19641 else
19642 sprintf (pattern, "ldm%s\t", conditional);
19644 strcat (pattern, reg_names[regno_base]);
19645 if (update)
19646 strcat (pattern, "!, {");
19647 else
19648 strcat (pattern, ", {");
19651 /* Output the first destination register. */
19652 strcat (pattern,
19653 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19655 /* Output the rest of the destination registers. */
19656 for (i = offset + 1; i < num_saves; i++)
19658 strcat (pattern, ", ");
19659 strcat (pattern,
19660 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19663 strcat (pattern, "}");
19665 if (interrupt_p && return_pc)
19666 strcat (pattern, "^");
19668 output_asm_insn (pattern, &cond);
19672 /* Output the assembly for a store multiple. */
19674 const char *
19675 vfp_output_vstmd (rtx * operands)
19677 char pattern[100];
19678 int p;
19679 int base;
19680 int i;
19681 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19682 ? XEXP (operands[0], 0)
19683 : XEXP (XEXP (operands[0], 0), 0);
19684 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19686 if (push_p)
19687 strcpy (pattern, "vpush%?.64\t{%P1");
19688 else
19689 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19691 p = strlen (pattern);
19693 gcc_assert (REG_P (operands[1]));
19695 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19696 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19698 p += sprintf (&pattern[p], ", d%d", base + i);
19700 strcpy (&pattern[p], "}");
19702 output_asm_insn (pattern, operands);
19703 return "";
19707 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19708 number of bytes pushed. */
19710 static int
19711 vfp_emit_fstmd (int base_reg, int count)
19713 rtx par;
19714 rtx dwarf;
19715 rtx tmp, reg;
19716 int i;
19718 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19719 register pairs are stored by a store multiple insn. We avoid this
19720 by pushing an extra pair. */
19721 if (count == 2 && !arm_arch6)
19723 if (base_reg == LAST_VFP_REGNUM - 3)
19724 base_reg -= 2;
19725 count++;
19728 /* FSTMD may not store more than 16 doubleword registers at once. Split
19729 larger stores into multiple parts (up to a maximum of two, in
19730 practice). */
19731 if (count > 16)
19733 int saved;
19734 /* NOTE: base_reg is an internal register number, so each D register
19735 counts as 2. */
19736 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19737 saved += vfp_emit_fstmd (base_reg, 16);
19738 return saved;
19741 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19742 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19744 reg = gen_rtx_REG (DFmode, base_reg);
19745 base_reg += 2;
19747 XVECEXP (par, 0, 0)
19748 = gen_rtx_SET (gen_frame_mem
19749 (BLKmode,
19750 gen_rtx_PRE_MODIFY (Pmode,
19751 stack_pointer_rtx,
19752 plus_constant
19753 (Pmode, stack_pointer_rtx,
19754 - (count * 8)))
19756 gen_rtx_UNSPEC (BLKmode,
19757 gen_rtvec (1, reg),
19758 UNSPEC_PUSH_MULT));
19760 tmp = gen_rtx_SET (stack_pointer_rtx,
19761 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19762 RTX_FRAME_RELATED_P (tmp) = 1;
19763 XVECEXP (dwarf, 0, 0) = tmp;
19765 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19766 RTX_FRAME_RELATED_P (tmp) = 1;
19767 XVECEXP (dwarf, 0, 1) = tmp;
19769 for (i = 1; i < count; i++)
19771 reg = gen_rtx_REG (DFmode, base_reg);
19772 base_reg += 2;
19773 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19775 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19776 plus_constant (Pmode,
19777 stack_pointer_rtx,
19778 i * 8)),
19779 reg);
19780 RTX_FRAME_RELATED_P (tmp) = 1;
19781 XVECEXP (dwarf, 0, i + 1) = tmp;
19784 par = emit_insn (par);
19785 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19786 RTX_FRAME_RELATED_P (par) = 1;
19788 return count * 8;
19791 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19792 has the cmse_nonsecure_call attribute and returns false otherwise. */
19794 bool
19795 detect_cmse_nonsecure_call (tree addr)
19797 if (!addr)
19798 return FALSE;
19800 tree fntype = TREE_TYPE (addr);
19801 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19802 TYPE_ATTRIBUTES (fntype)))
19803 return TRUE;
19804 return FALSE;
19808 /* Emit a call instruction with pattern PAT. ADDR is the address of
19809 the call target. */
19811 void
19812 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19814 rtx insn;
19816 insn = emit_call_insn (pat);
19818 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19819 If the call might use such an entry, add a use of the PIC register
19820 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19821 if (TARGET_VXWORKS_RTP
19822 && flag_pic
19823 && !sibcall
19824 && SYMBOL_REF_P (addr)
19825 && (SYMBOL_REF_DECL (addr)
19826 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19827 : !SYMBOL_REF_LOCAL_P (addr)))
19829 require_pic_register (NULL_RTX, false /*compute_now*/);
19830 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19833 if (TARGET_FDPIC)
19835 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19836 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19839 if (TARGET_AAPCS_BASED)
19841 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19842 linker. We need to add an IP clobber to allow setting
19843 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19844 is not needed since it's a fixed register. */
19845 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19846 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19850 /* Output a 'call' insn. */
19851 const char *
19852 output_call (rtx *operands)
19854 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19856 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19857 if (REGNO (operands[0]) == LR_REGNUM)
19859 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19860 output_asm_insn ("mov%?\t%0, %|lr", operands);
19863 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19865 if (TARGET_INTERWORK || arm_arch4t)
19866 output_asm_insn ("bx%?\t%0", operands);
19867 else
19868 output_asm_insn ("mov%?\t%|pc, %0", operands);
19870 return "";
19873 /* Output a move from arm registers to arm registers of a long double
19874 OPERANDS[0] is the destination.
19875 OPERANDS[1] is the source. */
19876 const char *
19877 output_mov_long_double_arm_from_arm (rtx *operands)
19879 /* We have to be careful here because the two might overlap. */
19880 int dest_start = REGNO (operands[0]);
19881 int src_start = REGNO (operands[1]);
19882 rtx ops[2];
19883 int i;
19885 if (dest_start < src_start)
19887 for (i = 0; i < 3; i++)
19889 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19890 ops[1] = gen_rtx_REG (SImode, src_start + i);
19891 output_asm_insn ("mov%?\t%0, %1", ops);
19894 else
19896 for (i = 2; i >= 0; i--)
19898 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19899 ops[1] = gen_rtx_REG (SImode, src_start + i);
19900 output_asm_insn ("mov%?\t%0, %1", ops);
19904 return "";
19907 void
19908 arm_emit_movpair (rtx dest, rtx src)
19910 /* If the src is an immediate, simplify it. */
19911 if (CONST_INT_P (src))
19913 HOST_WIDE_INT val = INTVAL (src);
19914 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19915 if ((val >> 16) & 0x0000ffff)
19917 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19918 GEN_INT (16)),
19919 GEN_INT ((val >> 16) & 0x0000ffff));
19920 rtx_insn *insn = get_last_insn ();
19921 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19923 return;
19925 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19926 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19927 rtx_insn *insn = get_last_insn ();
19928 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19931 /* Output a move between double words. It must be REG<-MEM
19932 or MEM<-REG. */
19933 const char *
19934 output_move_double (rtx *operands, bool emit, int *count)
19936 enum rtx_code code0 = GET_CODE (operands[0]);
19937 enum rtx_code code1 = GET_CODE (operands[1]);
19938 rtx otherops[3];
19939 if (count)
19940 *count = 1;
19942 /* The only case when this might happen is when
19943 you are looking at the length of a DImode instruction
19944 that has an invalid constant in it. */
19945 if (code0 == REG && code1 != MEM)
19947 gcc_assert (!emit);
19948 *count = 2;
19949 return "";
19952 if (code0 == REG)
19954 unsigned int reg0 = REGNO (operands[0]);
19955 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19957 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19959 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19961 switch (GET_CODE (XEXP (operands[1], 0)))
19963 case REG:
19965 if (emit)
19967 if (can_ldrd
19968 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19969 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19970 else
19971 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19973 break;
19975 case PRE_INC:
19976 gcc_assert (can_ldrd);
19977 if (emit)
19978 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19979 break;
19981 case PRE_DEC:
19982 if (emit)
19984 if (can_ldrd)
19985 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19986 else
19987 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19989 break;
19991 case POST_INC:
19992 if (emit)
19994 if (can_ldrd)
19995 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19996 else
19997 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19999 break;
20001 case POST_DEC:
20002 gcc_assert (can_ldrd);
20003 if (emit)
20004 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20005 break;
20007 case PRE_MODIFY:
20008 case POST_MODIFY:
20009 /* Autoicrement addressing modes should never have overlapping
20010 base and destination registers, and overlapping index registers
20011 are already prohibited, so this doesn't need to worry about
20012 fix_cm3_ldrd. */
20013 otherops[0] = operands[0];
20014 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20015 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20017 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20019 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20021 /* Registers overlap so split out the increment. */
20022 if (emit)
20024 gcc_assert (can_ldrd);
20025 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20026 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20028 if (count)
20029 *count = 2;
20031 else
20033 /* Use a single insn if we can.
20034 FIXME: IWMMXT allows offsets larger than ldrd can
20035 handle, fix these up with a pair of ldr. */
20036 if (can_ldrd
20037 && (TARGET_THUMB2
20038 || !CONST_INT_P (otherops[2])
20039 || (INTVAL (otherops[2]) > -256
20040 && INTVAL (otherops[2]) < 256)))
20042 if (emit)
20043 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20045 else
20047 if (emit)
20049 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20050 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20052 if (count)
20053 *count = 2;
20058 else
20060 /* Use a single insn if we can.
20061 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20062 fix these up with a pair of ldr. */
20063 if (can_ldrd
20064 && (TARGET_THUMB2
20065 || !CONST_INT_P (otherops[2])
20066 || (INTVAL (otherops[2]) > -256
20067 && INTVAL (otherops[2]) < 256)))
20069 if (emit)
20070 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20072 else
20074 if (emit)
20076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20079 if (count)
20080 *count = 2;
20083 break;
20085 case LABEL_REF:
20086 case CONST:
20087 /* We might be able to use ldrd %0, %1 here. However the range is
20088 different to ldr/adr, and it is broken on some ARMv7-M
20089 implementations. */
20090 /* Use the second register of the pair to avoid problematic
20091 overlap. */
20092 otherops[1] = operands[1];
20093 if (emit)
20094 output_asm_insn ("adr%?\t%0, %1", otherops);
20095 operands[1] = otherops[0];
20096 if (emit)
20098 if (can_ldrd)
20099 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20100 else
20101 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20104 if (count)
20105 *count = 2;
20106 break;
20108 /* ??? This needs checking for thumb2. */
20109 default:
20110 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20111 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20113 otherops[0] = operands[0];
20114 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20115 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20117 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20119 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20121 switch ((int) INTVAL (otherops[2]))
20123 case -8:
20124 if (emit)
20125 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20126 return "";
20127 case -4:
20128 if (TARGET_THUMB2)
20129 break;
20130 if (emit)
20131 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20132 return "";
20133 case 4:
20134 if (TARGET_THUMB2)
20135 break;
20136 if (emit)
20137 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20138 return "";
20141 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20142 operands[1] = otherops[0];
20143 if (can_ldrd
20144 && (REG_P (otherops[2])
20145 || TARGET_THUMB2
20146 || (CONST_INT_P (otherops[2])
20147 && INTVAL (otherops[2]) > -256
20148 && INTVAL (otherops[2]) < 256)))
20150 if (reg_overlap_mentioned_p (operands[0],
20151 otherops[2]))
20153 /* Swap base and index registers over to
20154 avoid a conflict. */
20155 std::swap (otherops[1], otherops[2]);
20157 /* If both registers conflict, it will usually
20158 have been fixed by a splitter. */
20159 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20160 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20162 if (emit)
20164 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20165 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20167 if (count)
20168 *count = 2;
20170 else
20172 otherops[0] = operands[0];
20173 if (emit)
20174 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20176 return "";
20179 if (CONST_INT_P (otherops[2]))
20181 if (emit)
20183 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20184 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20185 else
20186 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20189 else
20191 if (emit)
20192 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20195 else
20197 if (emit)
20198 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20201 if (count)
20202 *count = 2;
20204 if (can_ldrd)
20205 return "ldrd%?\t%0, [%1]";
20207 return "ldmia%?\t%1, %M0";
20209 else
20211 otherops[1] = adjust_address (operands[1], SImode, 4);
20212 /* Take care of overlapping base/data reg. */
20213 if (reg_mentioned_p (operands[0], operands[1]))
20215 if (emit)
20217 output_asm_insn ("ldr%?\t%0, %1", otherops);
20218 output_asm_insn ("ldr%?\t%0, %1", operands);
20220 if (count)
20221 *count = 2;
20224 else
20226 if (emit)
20228 output_asm_insn ("ldr%?\t%0, %1", operands);
20229 output_asm_insn ("ldr%?\t%0, %1", otherops);
20231 if (count)
20232 *count = 2;
20237 else
20239 /* Constraints should ensure this. */
20240 gcc_assert (code0 == MEM && code1 == REG);
20241 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20242 || (TARGET_ARM && TARGET_LDRD));
20244 /* For TARGET_ARM the first source register of an STRD
20245 must be even. This is usually the case for double-word
20246 values but user assembly constraints can force an odd
20247 starting register. */
20248 bool allow_strd = TARGET_LDRD
20249 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20250 switch (GET_CODE (XEXP (operands[0], 0)))
20252 case REG:
20253 if (emit)
20255 if (allow_strd)
20256 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20257 else
20258 output_asm_insn ("stm%?\t%m0, %M1", operands);
20260 break;
20262 case PRE_INC:
20263 gcc_assert (allow_strd);
20264 if (emit)
20265 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20266 break;
20268 case PRE_DEC:
20269 if (emit)
20271 if (allow_strd)
20272 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20273 else
20274 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20276 break;
20278 case POST_INC:
20279 if (emit)
20281 if (allow_strd)
20282 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20283 else
20284 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20286 break;
20288 case POST_DEC:
20289 gcc_assert (allow_strd);
20290 if (emit)
20291 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20292 break;
20294 case PRE_MODIFY:
20295 case POST_MODIFY:
20296 otherops[0] = operands[1];
20297 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20298 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20300 /* IWMMXT allows offsets larger than strd can handle,
20301 fix these up with a pair of str. */
20302 if (!TARGET_THUMB2
20303 && CONST_INT_P (otherops[2])
20304 && (INTVAL(otherops[2]) <= -256
20305 || INTVAL(otherops[2]) >= 256))
20307 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20309 if (emit)
20311 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20312 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20314 if (count)
20315 *count = 2;
20317 else
20319 if (emit)
20321 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20322 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20324 if (count)
20325 *count = 2;
20328 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20330 if (emit)
20331 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20333 else
20335 if (emit)
20336 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20338 break;
20340 case PLUS:
20341 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20342 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20344 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20346 case -8:
20347 if (emit)
20348 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20349 return "";
20351 case -4:
20352 if (TARGET_THUMB2)
20353 break;
20354 if (emit)
20355 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20356 return "";
20358 case 4:
20359 if (TARGET_THUMB2)
20360 break;
20361 if (emit)
20362 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20363 return "";
20366 if (allow_strd
20367 && (REG_P (otherops[2])
20368 || TARGET_THUMB2
20369 || (CONST_INT_P (otherops[2])
20370 && INTVAL (otherops[2]) > -256
20371 && INTVAL (otherops[2]) < 256)))
20373 otherops[0] = operands[1];
20374 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20375 if (emit)
20376 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20377 return "";
20379 /* Fall through */
20381 default:
20382 otherops[0] = adjust_address (operands[0], SImode, 4);
20383 otherops[1] = operands[1];
20384 if (emit)
20386 output_asm_insn ("str%?\t%1, %0", operands);
20387 output_asm_insn ("str%?\t%H1, %0", otherops);
20389 if (count)
20390 *count = 2;
20394 return "";
20397 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20398 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20400 const char *
20401 output_move_quad (rtx *operands)
20403 if (REG_P (operands[0]))
20405 /* Load, or reg->reg move. */
20407 if (MEM_P (operands[1]))
20409 switch (GET_CODE (XEXP (operands[1], 0)))
20411 case REG:
20412 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20413 break;
20415 case LABEL_REF:
20416 case CONST:
20417 output_asm_insn ("adr%?\t%0, %1", operands);
20418 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20419 break;
20421 default:
20422 gcc_unreachable ();
20425 else
20427 rtx ops[2];
20428 int dest, src, i;
20430 gcc_assert (REG_P (operands[1]));
20432 dest = REGNO (operands[0]);
20433 src = REGNO (operands[1]);
20435 /* This seems pretty dumb, but hopefully GCC won't try to do it
20436 very often. */
20437 if (dest < src)
20438 for (i = 0; i < 4; i++)
20440 ops[0] = gen_rtx_REG (SImode, dest + i);
20441 ops[1] = gen_rtx_REG (SImode, src + i);
20442 output_asm_insn ("mov%?\t%0, %1", ops);
20444 else
20445 for (i = 3; i >= 0; i--)
20447 ops[0] = gen_rtx_REG (SImode, dest + i);
20448 ops[1] = gen_rtx_REG (SImode, src + i);
20449 output_asm_insn ("mov%?\t%0, %1", ops);
20453 else
20455 gcc_assert (MEM_P (operands[0]));
20456 gcc_assert (REG_P (operands[1]));
20457 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20459 switch (GET_CODE (XEXP (operands[0], 0)))
20461 case REG:
20462 output_asm_insn ("stm%?\t%m0, %M1", operands);
20463 break;
20465 default:
20466 gcc_unreachable ();
20470 return "";
20473 /* Output a VFP load or store instruction. */
20475 const char *
20476 output_move_vfp (rtx *operands)
20478 rtx reg, mem, addr, ops[2];
20479 int load = REG_P (operands[0]);
20480 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20481 int sp = (!TARGET_VFP_FP16INST
20482 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20483 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20484 const char *templ;
20485 char buff[50];
20486 machine_mode mode;
20488 reg = operands[!load];
20489 mem = operands[load];
20491 mode = GET_MODE (reg);
20493 gcc_assert (REG_P (reg));
20494 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20495 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20496 || mode == SFmode
20497 || mode == DFmode
20498 || mode == HImode
20499 || mode == SImode
20500 || mode == DImode
20501 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20502 gcc_assert (MEM_P (mem));
20504 addr = XEXP (mem, 0);
20506 switch (GET_CODE (addr))
20508 case PRE_DEC:
20509 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20510 ops[0] = XEXP (addr, 0);
20511 ops[1] = reg;
20512 break;
20514 case POST_INC:
20515 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20516 ops[0] = XEXP (addr, 0);
20517 ops[1] = reg;
20518 break;
20520 default:
20521 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20522 ops[0] = reg;
20523 ops[1] = mem;
20524 break;
20527 sprintf (buff, templ,
20528 load ? "ld" : "st",
20529 dp ? "64" : sp ? "32" : "16",
20530 dp ? "P" : "",
20531 integer_p ? "\t%@ int" : "");
20532 output_asm_insn (buff, ops);
20534 return "";
20537 /* Output a Neon double-word or quad-word load or store, or a load
20538 or store for larger structure modes.
20540 WARNING: The ordering of elements is weird in big-endian mode,
20541 because the EABI requires that vectors stored in memory appear
20542 as though they were stored by a VSTM, as required by the EABI.
20543 GCC RTL defines element ordering based on in-memory order.
20544 This can be different from the architectural ordering of elements
20545 within a NEON register. The intrinsics defined in arm_neon.h use the
20546 NEON register element ordering, not the GCC RTL element ordering.
20548 For example, the in-memory ordering of a big-endian a quadword
20549 vector with 16-bit elements when stored from register pair {d0,d1}
20550 will be (lowest address first, d0[N] is NEON register element N):
20552 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20554 When necessary, quadword registers (dN, dN+1) are moved to ARM
20555 registers from rN in the order:
20557 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20559 So that STM/LDM can be used on vectors in ARM registers, and the
20560 same memory layout will result as if VSTM/VLDM were used.
20562 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20563 possible, which allows use of appropriate alignment tags.
20564 Note that the choice of "64" is independent of the actual vector
20565 element size; this size simply ensures that the behavior is
20566 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20568 Due to limitations of those instructions, use of VST1.64/VLD1.64
20569 is not possible if:
20570 - the address contains PRE_DEC, or
20571 - the mode refers to more than 4 double-word registers
20573 In those cases, it would be possible to replace VSTM/VLDM by a
20574 sequence of instructions; this is not currently implemented since
20575 this is not certain to actually improve performance. */
20577 const char *
20578 output_move_neon (rtx *operands)
20580 rtx reg, mem, addr, ops[2];
20581 int regno, nregs, load = REG_P (operands[0]);
20582 const char *templ;
20583 char buff[50];
20584 machine_mode mode;
20586 reg = operands[!load];
20587 mem = operands[load];
20589 mode = GET_MODE (reg);
20591 gcc_assert (REG_P (reg));
20592 regno = REGNO (reg);
20593 nregs = REG_NREGS (reg) / 2;
20594 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20595 || NEON_REGNO_OK_FOR_QUAD (regno));
20596 gcc_assert (VALID_NEON_DREG_MODE (mode)
20597 || VALID_NEON_QREG_MODE (mode)
20598 || VALID_NEON_STRUCT_MODE (mode));
20599 gcc_assert (MEM_P (mem));
20601 addr = XEXP (mem, 0);
20603 /* Strip off const from addresses like (const (plus (...))). */
20604 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20605 addr = XEXP (addr, 0);
20607 switch (GET_CODE (addr))
20609 case POST_INC:
20610 /* We have to use vldm / vstm for too-large modes. */
20611 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20613 templ = "v%smia%%?\t%%0!, %%h1";
20614 ops[0] = XEXP (addr, 0);
20616 else
20618 templ = "v%s1.64\t%%h1, %%A0";
20619 ops[0] = mem;
20621 ops[1] = reg;
20622 break;
20624 case PRE_DEC:
20625 /* We have to use vldm / vstm in this case, since there is no
20626 pre-decrement form of the vld1 / vst1 instructions. */
20627 templ = "v%smdb%%?\t%%0!, %%h1";
20628 ops[0] = XEXP (addr, 0);
20629 ops[1] = reg;
20630 break;
20632 case POST_MODIFY:
20633 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20634 gcc_unreachable ();
20636 case REG:
20637 /* We have to use vldm / vstm for too-large modes. */
20638 if (nregs > 1)
20640 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20641 templ = "v%smia%%?\t%%m0, %%h1";
20642 else
20643 templ = "v%s1.64\t%%h1, %%A0";
20645 ops[0] = mem;
20646 ops[1] = reg;
20647 break;
20649 /* Fall through. */
20650 case PLUS:
20651 if (GET_CODE (addr) == PLUS)
20652 addr = XEXP (addr, 0);
20653 /* Fall through. */
20654 case LABEL_REF:
20656 int i;
20657 int overlap = -1;
20658 for (i = 0; i < nregs; i++)
20660 /* We're only using DImode here because it's a convenient
20661 size. */
20662 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20663 ops[1] = adjust_address (mem, DImode, 8 * i);
20664 if (reg_overlap_mentioned_p (ops[0], mem))
20666 gcc_assert (overlap == -1);
20667 overlap = i;
20669 else
20671 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20672 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20673 else
20674 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20675 output_asm_insn (buff, ops);
20678 if (overlap != -1)
20680 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20681 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20682 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20683 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20684 else
20685 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20686 output_asm_insn (buff, ops);
20689 return "";
20692 default:
20693 gcc_unreachable ();
20696 sprintf (buff, templ, load ? "ld" : "st");
20697 output_asm_insn (buff, ops);
20699 return "";
20702 /* Compute and return the length of neon_mov<mode>, where <mode> is
20703 one of VSTRUCT modes: EI, OI, CI or XI. */
20705 arm_attr_length_move_neon (rtx_insn *insn)
20707 rtx reg, mem, addr;
20708 int load;
20709 machine_mode mode;
20711 extract_insn_cached (insn);
20713 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20715 mode = GET_MODE (recog_data.operand[0]);
20716 switch (mode)
20718 case E_EImode:
20719 case E_OImode:
20720 return 8;
20721 case E_CImode:
20722 return 12;
20723 case E_XImode:
20724 return 16;
20725 default:
20726 gcc_unreachable ();
20730 load = REG_P (recog_data.operand[0]);
20731 reg = recog_data.operand[!load];
20732 mem = recog_data.operand[load];
20734 gcc_assert (MEM_P (mem));
20736 addr = XEXP (mem, 0);
20738 /* Strip off const from addresses like (const (plus (...))). */
20739 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20740 addr = XEXP (addr, 0);
20742 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20744 int insns = REG_NREGS (reg) / 2;
20745 return insns * 4;
20747 else
20748 return 4;
20751 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20752 return zero. */
20755 arm_address_offset_is_imm (rtx_insn *insn)
20757 rtx mem, addr;
20759 extract_insn_cached (insn);
20761 if (REG_P (recog_data.operand[0]))
20762 return 0;
20764 mem = recog_data.operand[0];
20766 gcc_assert (MEM_P (mem));
20768 addr = XEXP (mem, 0);
20770 if (REG_P (addr)
20771 || (GET_CODE (addr) == PLUS
20772 && REG_P (XEXP (addr, 0))
20773 && CONST_INT_P (XEXP (addr, 1))))
20774 return 1;
20775 else
20776 return 0;
20779 /* Output an ADD r, s, #n where n may be too big for one instruction.
20780 If adding zero to one register, output nothing. */
20781 const char *
20782 output_add_immediate (rtx *operands)
20784 HOST_WIDE_INT n = INTVAL (operands[2]);
20786 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20788 if (n < 0)
20789 output_multi_immediate (operands,
20790 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20791 -n);
20792 else
20793 output_multi_immediate (operands,
20794 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20798 return "";
20801 /* Output a multiple immediate operation.
20802 OPERANDS is the vector of operands referred to in the output patterns.
20803 INSTR1 is the output pattern to use for the first constant.
20804 INSTR2 is the output pattern to use for subsequent constants.
20805 IMMED_OP is the index of the constant slot in OPERANDS.
20806 N is the constant value. */
20807 static const char *
20808 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20809 int immed_op, HOST_WIDE_INT n)
20811 #if HOST_BITS_PER_WIDE_INT > 32
20812 n &= 0xffffffff;
20813 #endif
20815 if (n == 0)
20817 /* Quick and easy output. */
20818 operands[immed_op] = const0_rtx;
20819 output_asm_insn (instr1, operands);
20821 else
20823 int i;
20824 const char * instr = instr1;
20826 /* Note that n is never zero here (which would give no output). */
20827 for (i = 0; i < 32; i += 2)
20829 if (n & (3 << i))
20831 operands[immed_op] = GEN_INT (n & (255 << i));
20832 output_asm_insn (instr, operands);
20833 instr = instr2;
20834 i += 6;
20839 return "";
20842 /* Return the name of a shifter operation. */
20843 static const char *
20844 arm_shift_nmem(enum rtx_code code)
20846 switch (code)
20848 case ASHIFT:
20849 return ARM_LSL_NAME;
20851 case ASHIFTRT:
20852 return "asr";
20854 case LSHIFTRT:
20855 return "lsr";
20857 case ROTATERT:
20858 return "ror";
20860 default:
20861 abort();
20865 /* Return the appropriate ARM instruction for the operation code.
20866 The returned result should not be overwritten. OP is the rtx of the
20867 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20868 was shifted. */
20869 const char *
20870 arithmetic_instr (rtx op, int shift_first_arg)
20872 switch (GET_CODE (op))
20874 case PLUS:
20875 return "add";
20877 case MINUS:
20878 return shift_first_arg ? "rsb" : "sub";
20880 case IOR:
20881 return "orr";
20883 case XOR:
20884 return "eor";
20886 case AND:
20887 return "and";
20889 case ASHIFT:
20890 case ASHIFTRT:
20891 case LSHIFTRT:
20892 case ROTATERT:
20893 return arm_shift_nmem(GET_CODE(op));
20895 default:
20896 gcc_unreachable ();
20900 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20901 for the operation code. The returned result should not be overwritten.
20902 OP is the rtx code of the shift.
20903 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20904 shift. */
20905 static const char *
20906 shift_op (rtx op, HOST_WIDE_INT *amountp)
20908 const char * mnem;
20909 enum rtx_code code = GET_CODE (op);
20911 switch (code)
20913 case ROTATE:
20914 if (!CONST_INT_P (XEXP (op, 1)))
20916 output_operand_lossage ("invalid shift operand");
20917 return NULL;
20920 code = ROTATERT;
20921 *amountp = 32 - INTVAL (XEXP (op, 1));
20922 mnem = "ror";
20923 break;
20925 case ASHIFT:
20926 case ASHIFTRT:
20927 case LSHIFTRT:
20928 case ROTATERT:
20929 mnem = arm_shift_nmem(code);
20930 if (CONST_INT_P (XEXP (op, 1)))
20932 *amountp = INTVAL (XEXP (op, 1));
20934 else if (REG_P (XEXP (op, 1)))
20936 *amountp = -1;
20937 return mnem;
20939 else
20941 output_operand_lossage ("invalid shift operand");
20942 return NULL;
20944 break;
20946 case MULT:
20947 /* We never have to worry about the amount being other than a
20948 power of 2, since this case can never be reloaded from a reg. */
20949 if (!CONST_INT_P (XEXP (op, 1)))
20951 output_operand_lossage ("invalid shift operand");
20952 return NULL;
20955 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20957 /* Amount must be a power of two. */
20958 if (*amountp & (*amountp - 1))
20960 output_operand_lossage ("invalid shift operand");
20961 return NULL;
20964 *amountp = exact_log2 (*amountp);
20965 gcc_assert (IN_RANGE (*amountp, 0, 31));
20966 return ARM_LSL_NAME;
20968 default:
20969 output_operand_lossage ("invalid shift operand");
20970 return NULL;
20973 /* This is not 100% correct, but follows from the desire to merge
20974 multiplication by a power of 2 with the recognizer for a
20975 shift. >=32 is not a valid shift for "lsl", so we must try and
20976 output a shift that produces the correct arithmetical result.
20977 Using lsr #32 is identical except for the fact that the carry bit
20978 is not set correctly if we set the flags; but we never use the
20979 carry bit from such an operation, so we can ignore that. */
20980 if (code == ROTATERT)
20981 /* Rotate is just modulo 32. */
20982 *amountp &= 31;
20983 else if (*amountp != (*amountp & 31))
20985 if (code == ASHIFT)
20986 mnem = "lsr";
20987 *amountp = 32;
20990 /* Shifts of 0 are no-ops. */
20991 if (*amountp == 0)
20992 return NULL;
20994 return mnem;
20997 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20998 because /bin/as is horribly restrictive. The judgement about
20999 whether or not each character is 'printable' (and can be output as
21000 is) or not (and must be printed with an octal escape) must be made
21001 with reference to the *host* character set -- the situation is
21002 similar to that discussed in the comments above pp_c_char in
21003 c-pretty-print.cc. */
21005 #define MAX_ASCII_LEN 51
21007 void
21008 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21010 int i;
21011 int len_so_far = 0;
21013 fputs ("\t.ascii\t\"", stream);
21015 for (i = 0; i < len; i++)
21017 int c = p[i];
21019 if (len_so_far >= MAX_ASCII_LEN)
21021 fputs ("\"\n\t.ascii\t\"", stream);
21022 len_so_far = 0;
21025 if (ISPRINT (c))
21027 if (c == '\\' || c == '\"')
21029 putc ('\\', stream);
21030 len_so_far++;
21032 putc (c, stream);
21033 len_so_far++;
21035 else
21037 fprintf (stream, "\\%03o", c);
21038 len_so_far += 4;
21042 fputs ("\"\n", stream);
21046 /* Compute the register save mask for registers 0 through 12
21047 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21049 static unsigned long
21050 arm_compute_save_reg0_reg12_mask (void)
21052 unsigned long func_type = arm_current_func_type ();
21053 unsigned long save_reg_mask = 0;
21054 unsigned int reg;
21056 if (IS_INTERRUPT (func_type))
21058 unsigned int max_reg;
21059 /* Interrupt functions must not corrupt any registers,
21060 even call clobbered ones. If this is a leaf function
21061 we can just examine the registers used by the RTL, but
21062 otherwise we have to assume that whatever function is
21063 called might clobber anything, and so we have to save
21064 all the call-clobbered registers as well. */
21065 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21066 /* FIQ handlers have registers r8 - r12 banked, so
21067 we only need to check r0 - r7, Normal ISRs only
21068 bank r14 and r15, so we must check up to r12.
21069 r13 is the stack pointer which is always preserved,
21070 so we do not need to consider it here. */
21071 max_reg = 7;
21072 else
21073 max_reg = 12;
21075 for (reg = 0; reg <= max_reg; reg++)
21076 if (reg_needs_saving_p (reg))
21077 save_reg_mask |= (1 << reg);
21079 /* Also save the pic base register if necessary. */
21080 if (PIC_REGISTER_MAY_NEED_SAVING
21081 && crtl->uses_pic_offset_table)
21082 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21084 else if (IS_VOLATILE(func_type))
21086 /* For noreturn functions we historically omitted register saves
21087 altogether. However this really messes up debugging. As a
21088 compromise save just the frame pointers. Combined with the link
21089 register saved elsewhere this should be sufficient to get
21090 a backtrace. */
21091 if (frame_pointer_needed)
21092 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21093 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21094 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21095 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21096 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21098 else
21100 /* In the normal case we only need to save those registers
21101 which are call saved and which are used by this function. */
21102 for (reg = 0; reg <= 11; reg++)
21103 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21104 save_reg_mask |= (1 << reg);
21106 /* Handle the frame pointer as a special case. */
21107 if (frame_pointer_needed)
21108 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21110 /* If we aren't loading the PIC register,
21111 don't stack it even though it may be live. */
21112 if (PIC_REGISTER_MAY_NEED_SAVING
21113 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21114 || crtl->uses_pic_offset_table))
21115 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21117 /* The prologue will copy SP into R0, so save it. */
21118 if (IS_STACKALIGN (func_type))
21119 save_reg_mask |= 1;
21122 /* Save registers so the exception handler can modify them. */
21123 if (crtl->calls_eh_return)
21125 unsigned int i;
21127 for (i = 0; ; i++)
21129 reg = EH_RETURN_DATA_REGNO (i);
21130 if (reg == INVALID_REGNUM)
21131 break;
21132 save_reg_mask |= 1 << reg;
21136 return save_reg_mask;
21139 /* Return true if r3 is live at the start of the function. */
21141 static bool
21142 arm_r3_live_at_start_p (void)
21144 /* Just look at cfg info, which is still close enough to correct at this
21145 point. This gives false positives for broken functions that might use
21146 uninitialized data that happens to be allocated in r3, but who cares? */
21147 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21150 /* Compute the number of bytes used to store the static chain register on the
21151 stack, above the stack frame. We need to know this accurately to get the
21152 alignment of the rest of the stack frame correct. */
21154 static int
21155 arm_compute_static_chain_stack_bytes (void)
21157 /* Once the value is updated from the init value of -1, do not
21158 re-compute. */
21159 if (cfun->machine->static_chain_stack_bytes != -1)
21160 return cfun->machine->static_chain_stack_bytes;
21162 /* See the defining assertion in arm_expand_prologue. */
21163 if (IS_NESTED (arm_current_func_type ())
21164 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21165 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21166 || flag_stack_clash_protection)
21167 && !df_regs_ever_live_p (LR_REGNUM)))
21168 && arm_r3_live_at_start_p ()
21169 && crtl->args.pretend_args_size == 0)
21170 return 4;
21172 return 0;
21175 /* Compute a bit mask of which core registers need to be
21176 saved on the stack for the current function.
21177 This is used by arm_compute_frame_layout, which may add extra registers. */
21179 static unsigned long
21180 arm_compute_save_core_reg_mask (void)
21182 unsigned int save_reg_mask = 0;
21183 unsigned long func_type = arm_current_func_type ();
21184 unsigned int reg;
21186 if (IS_NAKED (func_type))
21187 /* This should never really happen. */
21188 return 0;
21190 /* If we are creating a stack frame, then we must save the frame pointer,
21191 IP (which will hold the old stack pointer), LR and the PC. */
21192 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21193 save_reg_mask |=
21194 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21195 | (1 << IP_REGNUM)
21196 | (1 << LR_REGNUM)
21197 | (1 << PC_REGNUM);
21199 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21201 /* Decide if we need to save the link register.
21202 Interrupt routines have their own banked link register,
21203 so they never need to save it.
21204 Otherwise if we do not use the link register we do not need to save
21205 it. If we are pushing other registers onto the stack however, we
21206 can save an instruction in the epilogue by pushing the link register
21207 now and then popping it back into the PC. This incurs extra memory
21208 accesses though, so we only do it when optimizing for size, and only
21209 if we know that we will not need a fancy return sequence. */
21210 if (df_regs_ever_live_p (LR_REGNUM)
21211 || (save_reg_mask
21212 && optimize_size
21213 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21214 && !crtl->tail_call_emit
21215 && !crtl->calls_eh_return))
21216 save_reg_mask |= 1 << LR_REGNUM;
21218 if (cfun->machine->lr_save_eliminated)
21219 save_reg_mask &= ~ (1 << LR_REGNUM);
21221 if (TARGET_REALLY_IWMMXT
21222 && ((bit_count (save_reg_mask)
21223 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21224 arm_compute_static_chain_stack_bytes())
21225 ) % 2) != 0)
21227 /* The total number of registers that are going to be pushed
21228 onto the stack is odd. We need to ensure that the stack
21229 is 64-bit aligned before we start to save iWMMXt registers,
21230 and also before we start to create locals. (A local variable
21231 might be a double or long long which we will load/store using
21232 an iWMMXt instruction). Therefore we need to push another
21233 ARM register, so that the stack will be 64-bit aligned. We
21234 try to avoid using the arg registers (r0 -r3) as they might be
21235 used to pass values in a tail call. */
21236 for (reg = 4; reg <= 12; reg++)
21237 if ((save_reg_mask & (1 << reg)) == 0)
21238 break;
21240 if (reg <= 12)
21241 save_reg_mask |= (1 << reg);
21242 else
21244 cfun->machine->sibcall_blocked = 1;
21245 save_reg_mask |= (1 << 3);
21249 /* We may need to push an additional register for use initializing the
21250 PIC base register. */
21251 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21252 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21254 reg = thumb_find_work_register (1 << 4);
21255 if (!call_used_or_fixed_reg_p (reg))
21256 save_reg_mask |= (1 << reg);
21259 return save_reg_mask;
21262 /* Compute a bit mask of which core registers need to be
21263 saved on the stack for the current function. */
21264 static unsigned long
21265 thumb1_compute_save_core_reg_mask (void)
21267 unsigned long mask;
21268 unsigned reg;
21270 mask = 0;
21271 for (reg = 0; reg < 12; reg ++)
21272 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21273 mask |= 1 << reg;
21275 /* Handle the frame pointer as a special case. */
21276 if (frame_pointer_needed)
21277 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21279 if (flag_pic
21280 && !TARGET_SINGLE_PIC_BASE
21281 && arm_pic_register != INVALID_REGNUM
21282 && crtl->uses_pic_offset_table)
21283 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21285 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21286 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21287 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21289 /* LR will also be pushed if any lo regs are pushed. */
21290 if (mask & 0xff || thumb_force_lr_save ())
21291 mask |= (1 << LR_REGNUM);
21293 bool call_clobbered_scratch
21294 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21295 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21297 /* Make sure we have a low work register if we need one. We will
21298 need one if we are going to push a high register, but we are not
21299 currently intending to push a low register. However if both the
21300 prologue and epilogue have a spare call-clobbered low register,
21301 then we won't need to find an additional work register. It does
21302 not need to be the same register in the prologue and
21303 epilogue. */
21304 if ((mask & 0xff) == 0
21305 && !call_clobbered_scratch
21306 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21308 /* Use thumb_find_work_register to choose which register
21309 we will use. If the register is live then we will
21310 have to push it. Use LAST_LO_REGNUM as our fallback
21311 choice for the register to select. */
21312 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21313 /* Make sure the register returned by thumb_find_work_register is
21314 not part of the return value. */
21315 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21316 reg = LAST_LO_REGNUM;
21318 if (callee_saved_reg_p (reg))
21319 mask |= 1 << reg;
21322 /* The 504 below is 8 bytes less than 512 because there are two possible
21323 alignment words. We can't tell here if they will be present or not so we
21324 have to play it safe and assume that they are. */
21325 if ((CALLER_INTERWORKING_SLOT_SIZE +
21326 ROUND_UP_WORD (get_frame_size ()) +
21327 crtl->outgoing_args_size) >= 504)
21329 /* This is the same as the code in thumb1_expand_prologue() which
21330 determines which register to use for stack decrement. */
21331 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21332 if (mask & (1 << reg))
21333 break;
21335 if (reg > LAST_LO_REGNUM)
21337 /* Make sure we have a register available for stack decrement. */
21338 mask |= 1 << LAST_LO_REGNUM;
21342 return mask;
21345 /* Return the number of bytes required to save VFP registers. */
21346 static int
21347 arm_get_vfp_saved_size (void)
21349 unsigned int regno;
21350 int count;
21351 int saved;
21353 saved = 0;
21354 /* Space for saved VFP registers. */
21355 if (TARGET_VFP_BASE)
21357 count = 0;
21358 for (regno = FIRST_VFP_REGNUM;
21359 regno < LAST_VFP_REGNUM;
21360 regno += 2)
21362 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21364 if (count > 0)
21366 /* Workaround ARM10 VFPr1 bug. */
21367 if (count == 2 && !arm_arch6)
21368 count++;
21369 saved += count * 8;
21371 count = 0;
21373 else
21374 count++;
21376 if (count > 0)
21378 if (count == 2 && !arm_arch6)
21379 count++;
21380 saved += count * 8;
21383 return saved;
21387 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21388 everything bar the final return instruction. If simple_return is true,
21389 then do not output epilogue, because it has already been emitted in RTL.
21391 Note: do not forget to update length attribute of corresponding insn pattern
21392 when changing assembly output (eg. length attribute of
21393 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21394 register clearing sequences). */
21395 const char *
21396 output_return_instruction (rtx operand, bool really_return, bool reverse,
21397 bool simple_return)
21399 char conditional[10];
21400 char instr[100];
21401 unsigned reg;
21402 unsigned long live_regs_mask;
21403 unsigned long func_type;
21404 arm_stack_offsets *offsets;
21406 func_type = arm_current_func_type ();
21408 if (IS_NAKED (func_type))
21409 return "";
21411 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21413 /* If this function was declared non-returning, and we have
21414 found a tail call, then we have to trust that the called
21415 function won't return. */
21416 if (really_return)
21418 rtx ops[2];
21420 /* Otherwise, trap an attempted return by aborting. */
21421 ops[0] = operand;
21422 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21423 : "abort");
21424 assemble_external_libcall (ops[1]);
21425 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21428 return "";
21431 gcc_assert (!cfun->calls_alloca || really_return);
21433 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21435 cfun->machine->return_used_this_function = 1;
21437 offsets = arm_get_frame_offsets ();
21438 live_regs_mask = offsets->saved_regs_mask;
21440 if (!simple_return && live_regs_mask)
21442 const char * return_reg;
21444 /* If we do not have any special requirements for function exit
21445 (e.g. interworking) then we can load the return address
21446 directly into the PC. Otherwise we must load it into LR. */
21447 if (really_return
21448 && !IS_CMSE_ENTRY (func_type)
21449 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21450 return_reg = reg_names[PC_REGNUM];
21451 else
21452 return_reg = reg_names[LR_REGNUM];
21454 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21456 /* There are three possible reasons for the IP register
21457 being saved. 1) a stack frame was created, in which case
21458 IP contains the old stack pointer, or 2) an ISR routine
21459 corrupted it, or 3) it was saved to align the stack on
21460 iWMMXt. In case 1, restore IP into SP, otherwise just
21461 restore IP. */
21462 if (frame_pointer_needed)
21464 live_regs_mask &= ~ (1 << IP_REGNUM);
21465 live_regs_mask |= (1 << SP_REGNUM);
21467 else
21468 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21471 /* On some ARM architectures it is faster to use LDR rather than
21472 LDM to load a single register. On other architectures, the
21473 cost is the same. In 26 bit mode, or for exception handlers,
21474 we have to use LDM to load the PC so that the CPSR is also
21475 restored. */
21476 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21477 if (live_regs_mask == (1U << reg))
21478 break;
21480 if (reg <= LAST_ARM_REGNUM
21481 && (reg != LR_REGNUM
21482 || ! really_return
21483 || ! IS_INTERRUPT (func_type)))
21485 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21486 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21488 else
21490 char *p;
21491 int first = 1;
21493 /* Generate the load multiple instruction to restore the
21494 registers. Note we can get here, even if
21495 frame_pointer_needed is true, but only if sp already
21496 points to the base of the saved core registers. */
21497 if (live_regs_mask & (1 << SP_REGNUM))
21499 unsigned HOST_WIDE_INT stack_adjust;
21501 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21502 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21504 if (stack_adjust && arm_arch5t && TARGET_ARM)
21505 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21506 else
21508 /* If we can't use ldmib (SA110 bug),
21509 then try to pop r3 instead. */
21510 if (stack_adjust)
21511 live_regs_mask |= 1 << 3;
21513 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21516 /* For interrupt returns we have to use an LDM rather than
21517 a POP so that we can use the exception return variant. */
21518 else if (IS_INTERRUPT (func_type))
21519 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21520 else
21521 sprintf (instr, "pop%s\t{", conditional);
21523 p = instr + strlen (instr);
21525 for (reg = 0; reg <= SP_REGNUM; reg++)
21526 if (live_regs_mask & (1 << reg))
21528 int l = strlen (reg_names[reg]);
21530 if (first)
21531 first = 0;
21532 else
21534 memcpy (p, ", ", 2);
21535 p += 2;
21538 memcpy (p, "%|", 2);
21539 memcpy (p + 2, reg_names[reg], l);
21540 p += l + 2;
21543 if (live_regs_mask & (1 << LR_REGNUM))
21545 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21546 /* If returning from an interrupt, restore the CPSR. */
21547 if (IS_INTERRUPT (func_type))
21548 strcat (p, "^");
21550 else
21551 strcpy (p, "}");
21554 output_asm_insn (instr, & operand);
21556 /* See if we need to generate an extra instruction to
21557 perform the actual function return. */
21558 if (really_return
21559 && func_type != ARM_FT_INTERWORKED
21560 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21562 /* The return has already been handled
21563 by loading the LR into the PC. */
21564 return "";
21568 if (really_return)
21570 switch ((int) ARM_FUNC_TYPE (func_type))
21572 case ARM_FT_ISR:
21573 case ARM_FT_FIQ:
21574 /* ??? This is wrong for unified assembly syntax. */
21575 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21576 break;
21578 case ARM_FT_INTERWORKED:
21579 gcc_assert (arm_arch5t || arm_arch4t);
21580 sprintf (instr, "bx%s\t%%|lr", conditional);
21581 break;
21583 case ARM_FT_EXCEPTION:
21584 /* ??? This is wrong for unified assembly syntax. */
21585 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21586 break;
21588 default:
21589 if (IS_CMSE_ENTRY (func_type))
21591 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21592 emitted by cmse_nonsecure_entry_clear_before_return () and the
21593 VSTR/VLDR instructions in the prologue and epilogue. */
21594 if (!TARGET_HAVE_FPCXT_CMSE)
21596 /* Check if we have to clear the 'GE bits' which is only used if
21597 parallel add and subtraction instructions are available. */
21598 if (TARGET_INT_SIMD)
21599 snprintf (instr, sizeof (instr),
21600 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21601 else
21602 snprintf (instr, sizeof (instr),
21603 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21605 output_asm_insn (instr, & operand);
21606 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21607 care of it. */
21608 if (TARGET_HARD_FLOAT)
21610 /* Clear the cumulative exception-status bits (0-4,7) and
21611 the condition code bits (28-31) of the FPSCR. We need
21612 to remember to clear the first scratch register used
21613 (IP) and save and restore the second (r4).
21615 Important note: the length of the
21616 thumb2_cmse_entry_return insn pattern must account for
21617 the size of the below instructions. */
21618 output_asm_insn ("push\t{%|r4}", & operand);
21619 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21620 output_asm_insn ("movw\t%|r4, #65376", & operand);
21621 output_asm_insn ("movt\t%|r4, #4095", & operand);
21622 output_asm_insn ("and\t%|ip, %|r4", & operand);
21623 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21624 output_asm_insn ("pop\t{%|r4}", & operand);
21625 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21628 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21630 /* Use bx if it's available. */
21631 else if (arm_arch5t || arm_arch4t)
21632 sprintf (instr, "bx%s\t%%|lr", conditional);
21633 else
21634 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21635 break;
21638 output_asm_insn (instr, & operand);
21641 return "";
21644 /* Output in FILE asm statements needed to declare the NAME of the function
21645 defined by its DECL node. */
21647 void
21648 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21650 size_t cmse_name_len;
21651 char *cmse_name = 0;
21652 char cmse_prefix[] = "__acle_se_";
21654 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21655 extra function label for each function with the 'cmse_nonsecure_entry'
21656 attribute. This extra function label should be prepended with
21657 '__acle_se_', telling the linker that it needs to create secure gateway
21658 veneers for this function. */
21659 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21660 DECL_ATTRIBUTES (decl)))
21662 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21663 cmse_name = XALLOCAVEC (char, cmse_name_len);
21664 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21665 targetm.asm_out.globalize_label (file, cmse_name);
21667 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21668 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21671 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21672 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21673 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21674 ASM_OUTPUT_LABEL (file, name);
21676 if (cmse_name)
21677 ASM_OUTPUT_LABEL (file, cmse_name);
21679 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21682 /* Write the function name into the code section, directly preceding
21683 the function prologue.
21685 Code will be output similar to this:
21687 .ascii "arm_poke_function_name", 0
21688 .align
21690 .word 0xff000000 + (t1 - t0)
21691 arm_poke_function_name
21692 mov ip, sp
21693 stmfd sp!, {fp, ip, lr, pc}
21694 sub fp, ip, #4
21696 When performing a stack backtrace, code can inspect the value
21697 of 'pc' stored at 'fp' + 0. If the trace function then looks
21698 at location pc - 12 and the top 8 bits are set, then we know
21699 that there is a function name embedded immediately preceding this
21700 location and has length ((pc[-3]) & 0xff000000).
21702 We assume that pc is declared as a pointer to an unsigned long.
21704 It is of no benefit to output the function name if we are assembling
21705 a leaf function. These function types will not contain a stack
21706 backtrace structure, therefore it is not possible to determine the
21707 function name. */
21708 void
21709 arm_poke_function_name (FILE *stream, const char *name)
21711 unsigned long alignlength;
21712 unsigned long length;
21713 rtx x;
21715 length = strlen (name) + 1;
21716 alignlength = ROUND_UP_WORD (length);
21718 ASM_OUTPUT_ASCII (stream, name, length);
21719 ASM_OUTPUT_ALIGN (stream, 2);
21720 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21721 assemble_aligned_integer (UNITS_PER_WORD, x);
21724 /* Place some comments into the assembler stream
21725 describing the current function. */
21726 static void
21727 arm_output_function_prologue (FILE *f)
21729 unsigned long func_type;
21731 /* Sanity check. */
21732 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21734 func_type = arm_current_func_type ();
21736 switch ((int) ARM_FUNC_TYPE (func_type))
21738 default:
21739 case ARM_FT_NORMAL:
21740 break;
21741 case ARM_FT_INTERWORKED:
21742 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21743 break;
21744 case ARM_FT_ISR:
21745 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21746 break;
21747 case ARM_FT_FIQ:
21748 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21749 break;
21750 case ARM_FT_EXCEPTION:
21751 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21752 break;
21755 if (IS_NAKED (func_type))
21756 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21758 if (IS_VOLATILE (func_type))
21759 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21761 if (IS_NESTED (func_type))
21762 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21763 if (IS_STACKALIGN (func_type))
21764 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21765 if (IS_CMSE_ENTRY (func_type))
21766 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21768 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21769 (HOST_WIDE_INT) crtl->args.size,
21770 crtl->args.pretend_args_size,
21771 (HOST_WIDE_INT) get_frame_size ());
21773 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21774 frame_pointer_needed,
21775 cfun->machine->uses_anonymous_args);
21777 if (cfun->machine->lr_save_eliminated)
21778 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21780 if (crtl->calls_eh_return)
21781 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21785 static void
21786 arm_output_function_epilogue (FILE *)
21788 arm_stack_offsets *offsets;
21790 if (TARGET_THUMB1)
21792 int regno;
21794 /* Emit any call-via-reg trampolines that are needed for v4t support
21795 of call_reg and call_value_reg type insns. */
21796 for (regno = 0; regno < LR_REGNUM; regno++)
21798 rtx label = cfun->machine->call_via[regno];
21800 if (label != NULL)
21802 switch_to_section (function_section (current_function_decl));
21803 targetm.asm_out.internal_label (asm_out_file, "L",
21804 CODE_LABEL_NUMBER (label));
21805 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21809 /* ??? Probably not safe to set this here, since it assumes that a
21810 function will be emitted as assembly immediately after we generate
21811 RTL for it. This does not happen for inline functions. */
21812 cfun->machine->return_used_this_function = 0;
21814 else /* TARGET_32BIT */
21816 /* We need to take into account any stack-frame rounding. */
21817 offsets = arm_get_frame_offsets ();
21819 gcc_assert (!use_return_insn (FALSE, NULL)
21820 || (cfun->machine->return_used_this_function != 0)
21821 || offsets->saved_regs == offsets->outgoing_args
21822 || frame_pointer_needed);
21826 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21827 STR and STRD. If an even number of registers are being pushed, one
21828 or more STRD patterns are created for each register pair. If an
21829 odd number of registers are pushed, emit an initial STR followed by
21830 as many STRD instructions as are needed. This works best when the
21831 stack is initially 64-bit aligned (the normal case), since it
21832 ensures that each STRD is also 64-bit aligned. */
21833 static void
21834 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21836 int num_regs = 0;
21837 int i;
21838 int regno;
21839 rtx par = NULL_RTX;
21840 rtx dwarf = NULL_RTX;
21841 rtx tmp;
21842 bool first = true;
21844 num_regs = bit_count (saved_regs_mask);
21846 /* Must be at least one register to save, and can't save SP or PC. */
21847 gcc_assert (num_regs > 0 && num_regs <= 14);
21848 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21849 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21851 /* Create sequence for DWARF info. All the frame-related data for
21852 debugging is held in this wrapper. */
21853 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21855 /* Describe the stack adjustment. */
21856 tmp = gen_rtx_SET (stack_pointer_rtx,
21857 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21858 RTX_FRAME_RELATED_P (tmp) = 1;
21859 XVECEXP (dwarf, 0, 0) = tmp;
21861 /* Find the first register. */
21862 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21865 i = 0;
21867 /* If there's an odd number of registers to push. Start off by
21868 pushing a single register. This ensures that subsequent strd
21869 operations are dword aligned (assuming that SP was originally
21870 64-bit aligned). */
21871 if ((num_regs & 1) != 0)
21873 rtx reg, mem, insn;
21875 reg = gen_rtx_REG (SImode, regno);
21876 if (num_regs == 1)
21877 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21878 stack_pointer_rtx));
21879 else
21880 mem = gen_frame_mem (Pmode,
21881 gen_rtx_PRE_MODIFY
21882 (Pmode, stack_pointer_rtx,
21883 plus_constant (Pmode, stack_pointer_rtx,
21884 -4 * num_regs)));
21886 tmp = gen_rtx_SET (mem, reg);
21887 RTX_FRAME_RELATED_P (tmp) = 1;
21888 insn = emit_insn (tmp);
21889 RTX_FRAME_RELATED_P (insn) = 1;
21890 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21891 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21892 RTX_FRAME_RELATED_P (tmp) = 1;
21893 i++;
21894 regno++;
21895 XVECEXP (dwarf, 0, i) = tmp;
21896 first = false;
21899 while (i < num_regs)
21900 if (saved_regs_mask & (1 << regno))
21902 rtx reg1, reg2, mem1, mem2;
21903 rtx tmp0, tmp1, tmp2;
21904 int regno2;
21906 /* Find the register to pair with this one. */
21907 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21908 regno2++)
21911 reg1 = gen_rtx_REG (SImode, regno);
21912 reg2 = gen_rtx_REG (SImode, regno2);
21914 if (first)
21916 rtx insn;
21918 first = false;
21919 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21920 stack_pointer_rtx,
21921 -4 * num_regs));
21922 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21923 stack_pointer_rtx,
21924 -4 * (num_regs - 1)));
21925 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21926 plus_constant (Pmode, stack_pointer_rtx,
21927 -4 * (num_regs)));
21928 tmp1 = gen_rtx_SET (mem1, reg1);
21929 tmp2 = gen_rtx_SET (mem2, reg2);
21930 RTX_FRAME_RELATED_P (tmp0) = 1;
21931 RTX_FRAME_RELATED_P (tmp1) = 1;
21932 RTX_FRAME_RELATED_P (tmp2) = 1;
21933 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21934 XVECEXP (par, 0, 0) = tmp0;
21935 XVECEXP (par, 0, 1) = tmp1;
21936 XVECEXP (par, 0, 2) = tmp2;
21937 insn = emit_insn (par);
21938 RTX_FRAME_RELATED_P (insn) = 1;
21939 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21941 else
21943 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21944 stack_pointer_rtx,
21945 4 * i));
21946 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21947 stack_pointer_rtx,
21948 4 * (i + 1)));
21949 tmp1 = gen_rtx_SET (mem1, reg1);
21950 tmp2 = gen_rtx_SET (mem2, reg2);
21951 RTX_FRAME_RELATED_P (tmp1) = 1;
21952 RTX_FRAME_RELATED_P (tmp2) = 1;
21953 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21954 XVECEXP (par, 0, 0) = tmp1;
21955 XVECEXP (par, 0, 1) = tmp2;
21956 emit_insn (par);
21959 /* Create unwind information. This is an approximation. */
21960 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21961 plus_constant (Pmode,
21962 stack_pointer_rtx,
21963 4 * i)),
21964 reg1);
21965 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21966 plus_constant (Pmode,
21967 stack_pointer_rtx,
21968 4 * (i + 1))),
21969 reg2);
21971 RTX_FRAME_RELATED_P (tmp1) = 1;
21972 RTX_FRAME_RELATED_P (tmp2) = 1;
21973 XVECEXP (dwarf, 0, i + 1) = tmp1;
21974 XVECEXP (dwarf, 0, i + 2) = tmp2;
21975 i += 2;
21976 regno = regno2 + 1;
21978 else
21979 regno++;
21981 return;
21984 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21985 whenever possible, otherwise it emits single-word stores. The first store
21986 also allocates stack space for all saved registers, using writeback with
21987 post-addressing mode. All other stores use offset addressing. If no STRD
21988 can be emitted, this function emits a sequence of single-word stores,
21989 and not an STM as before, because single-word stores provide more freedom
21990 scheduling and can be turned into an STM by peephole optimizations. */
21991 static void
21992 arm_emit_strd_push (unsigned long saved_regs_mask)
21994 int num_regs = 0;
21995 int i, j, dwarf_index = 0;
21996 int offset = 0;
21997 rtx dwarf = NULL_RTX;
21998 rtx insn = NULL_RTX;
21999 rtx tmp, mem;
22001 /* TODO: A more efficient code can be emitted by changing the
22002 layout, e.g., first push all pairs that can use STRD to keep the
22003 stack aligned, and then push all other registers. */
22004 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22005 if (saved_regs_mask & (1 << i))
22006 num_regs++;
22008 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22009 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22010 gcc_assert (num_regs > 0);
22012 /* Create sequence for DWARF info. */
22013 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22015 /* For dwarf info, we generate explicit stack update. */
22016 tmp = gen_rtx_SET (stack_pointer_rtx,
22017 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22018 RTX_FRAME_RELATED_P (tmp) = 1;
22019 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22021 /* Save registers. */
22022 offset = - 4 * num_regs;
22023 j = 0;
22024 while (j <= LAST_ARM_REGNUM)
22025 if (saved_regs_mask & (1 << j))
22027 if ((j % 2 == 0)
22028 && (saved_regs_mask & (1 << (j + 1))))
22030 /* Current register and previous register form register pair for
22031 which STRD can be generated. */
22032 if (offset < 0)
22034 /* Allocate stack space for all saved registers. */
22035 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22036 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22037 mem = gen_frame_mem (DImode, tmp);
22038 offset = 0;
22040 else if (offset > 0)
22041 mem = gen_frame_mem (DImode,
22042 plus_constant (Pmode,
22043 stack_pointer_rtx,
22044 offset));
22045 else
22046 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22048 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22049 RTX_FRAME_RELATED_P (tmp) = 1;
22050 tmp = emit_insn (tmp);
22052 /* Record the first store insn. */
22053 if (dwarf_index == 1)
22054 insn = tmp;
22056 /* Generate dwarf info. */
22057 mem = gen_frame_mem (SImode,
22058 plus_constant (Pmode,
22059 stack_pointer_rtx,
22060 offset));
22061 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22062 RTX_FRAME_RELATED_P (tmp) = 1;
22063 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22065 mem = gen_frame_mem (SImode,
22066 plus_constant (Pmode,
22067 stack_pointer_rtx,
22068 offset + 4));
22069 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22070 RTX_FRAME_RELATED_P (tmp) = 1;
22071 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22073 offset += 8;
22074 j += 2;
22076 else
22078 /* Emit a single word store. */
22079 if (offset < 0)
22081 /* Allocate stack space for all saved registers. */
22082 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22083 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22084 mem = gen_frame_mem (SImode, tmp);
22085 offset = 0;
22087 else if (offset > 0)
22088 mem = gen_frame_mem (SImode,
22089 plus_constant (Pmode,
22090 stack_pointer_rtx,
22091 offset));
22092 else
22093 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22095 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22096 RTX_FRAME_RELATED_P (tmp) = 1;
22097 tmp = emit_insn (tmp);
22099 /* Record the first store insn. */
22100 if (dwarf_index == 1)
22101 insn = tmp;
22103 /* Generate dwarf info. */
22104 mem = gen_frame_mem (SImode,
22105 plus_constant(Pmode,
22106 stack_pointer_rtx,
22107 offset));
22108 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22109 RTX_FRAME_RELATED_P (tmp) = 1;
22110 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22112 offset += 4;
22113 j += 1;
22116 else
22117 j++;
22119 /* Attach dwarf info to the first insn we generate. */
22120 gcc_assert (insn != NULL_RTX);
22121 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22122 RTX_FRAME_RELATED_P (insn) = 1;
22125 /* Generate and emit an insn that we will recognize as a push_multi.
22126 Unfortunately, since this insn does not reflect very well the actual
22127 semantics of the operation, we need to annotate the insn for the benefit
22128 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22129 MASK for registers that should be annotated for DWARF2 frame unwind
22130 information. */
22131 static rtx
22132 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22134 int num_regs = 0;
22135 int num_dwarf_regs = 0;
22136 int i, j;
22137 rtx par;
22138 rtx dwarf;
22139 int dwarf_par_index;
22140 rtx tmp, reg;
22142 /* We don't record the PC in the dwarf frame information. */
22143 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22145 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22147 if (mask & (1 << i))
22148 num_regs++;
22149 if (dwarf_regs_mask & (1 << i))
22150 num_dwarf_regs++;
22153 gcc_assert (num_regs && num_regs <= 16);
22154 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22156 /* For the body of the insn we are going to generate an UNSPEC in
22157 parallel with several USEs. This allows the insn to be recognized
22158 by the push_multi pattern in the arm.md file.
22160 The body of the insn looks something like this:
22162 (parallel [
22163 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22164 (const_int:SI <num>)))
22165 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22166 (use (reg:SI XX))
22167 (use (reg:SI YY))
22171 For the frame note however, we try to be more explicit and actually
22172 show each register being stored into the stack frame, plus a (single)
22173 decrement of the stack pointer. We do it this way in order to be
22174 friendly to the stack unwinding code, which only wants to see a single
22175 stack decrement per instruction. The RTL we generate for the note looks
22176 something like this:
22178 (sequence [
22179 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22180 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22181 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22182 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22186 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22187 instead we'd have a parallel expression detailing all
22188 the stores to the various memory addresses so that debug
22189 information is more up-to-date. Remember however while writing
22190 this to take care of the constraints with the push instruction.
22192 Note also that this has to be taken care of for the VFP registers.
22194 For more see PR43399. */
22196 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22197 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22198 dwarf_par_index = 1;
22200 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22202 if (mask & (1 << i))
22204 reg = gen_rtx_REG (SImode, i);
22206 XVECEXP (par, 0, 0)
22207 = gen_rtx_SET (gen_frame_mem
22208 (BLKmode,
22209 gen_rtx_PRE_MODIFY (Pmode,
22210 stack_pointer_rtx,
22211 plus_constant
22212 (Pmode, stack_pointer_rtx,
22213 -4 * num_regs))
22215 gen_rtx_UNSPEC (BLKmode,
22216 gen_rtvec (1, reg),
22217 UNSPEC_PUSH_MULT));
22219 if (dwarf_regs_mask & (1 << i))
22221 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22222 reg);
22223 RTX_FRAME_RELATED_P (tmp) = 1;
22224 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22227 break;
22231 for (j = 1, i++; j < num_regs; i++)
22233 if (mask & (1 << i))
22235 reg = gen_rtx_REG (SImode, i);
22237 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22239 if (dwarf_regs_mask & (1 << i))
22242 = gen_rtx_SET (gen_frame_mem
22243 (SImode,
22244 plus_constant (Pmode, stack_pointer_rtx,
22245 4 * j)),
22246 reg);
22247 RTX_FRAME_RELATED_P (tmp) = 1;
22248 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22251 j++;
22255 par = emit_insn (par);
22257 tmp = gen_rtx_SET (stack_pointer_rtx,
22258 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22259 RTX_FRAME_RELATED_P (tmp) = 1;
22260 XVECEXP (dwarf, 0, 0) = tmp;
22262 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22264 return par;
22267 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22268 SIZE is the offset to be adjusted.
22269 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22270 static void
22271 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22273 rtx dwarf;
22275 RTX_FRAME_RELATED_P (insn) = 1;
22276 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22277 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22280 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22281 SAVED_REGS_MASK shows which registers need to be restored.
22283 Unfortunately, since this insn does not reflect very well the actual
22284 semantics of the operation, we need to annotate the insn for the benefit
22285 of DWARF2 frame unwind information. */
22286 static void
22287 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22289 int num_regs = 0;
22290 int i, j;
22291 rtx par;
22292 rtx dwarf = NULL_RTX;
22293 rtx tmp, reg;
22294 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22295 int offset_adj;
22296 int emit_update;
22298 offset_adj = return_in_pc ? 1 : 0;
22299 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22300 if (saved_regs_mask & (1 << i))
22301 num_regs++;
22303 gcc_assert (num_regs && num_regs <= 16);
22305 /* If SP is in reglist, then we don't emit SP update insn. */
22306 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22308 /* The parallel needs to hold num_regs SETs
22309 and one SET for the stack update. */
22310 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22312 if (return_in_pc)
22313 XVECEXP (par, 0, 0) = ret_rtx;
22315 if (emit_update)
22317 /* Increment the stack pointer, based on there being
22318 num_regs 4-byte registers to restore. */
22319 tmp = gen_rtx_SET (stack_pointer_rtx,
22320 plus_constant (Pmode,
22321 stack_pointer_rtx,
22322 4 * num_regs));
22323 RTX_FRAME_RELATED_P (tmp) = 1;
22324 XVECEXP (par, 0, offset_adj) = tmp;
22327 /* Now restore every reg, which may include PC. */
22328 for (j = 0, i = 0; j < num_regs; i++)
22329 if (saved_regs_mask & (1 << i))
22331 reg = gen_rtx_REG (SImode, i);
22332 if ((num_regs == 1) && emit_update && !return_in_pc)
22334 /* Emit single load with writeback. */
22335 tmp = gen_frame_mem (SImode,
22336 gen_rtx_POST_INC (Pmode,
22337 stack_pointer_rtx));
22338 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22339 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22340 return;
22343 tmp = gen_rtx_SET (reg,
22344 gen_frame_mem
22345 (SImode,
22346 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22347 RTX_FRAME_RELATED_P (tmp) = 1;
22348 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22350 /* We need to maintain a sequence for DWARF info too. As dwarf info
22351 should not have PC, skip PC. */
22352 if (i != PC_REGNUM)
22353 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22355 j++;
22358 if (return_in_pc)
22359 par = emit_jump_insn (par);
22360 else
22361 par = emit_insn (par);
22363 REG_NOTES (par) = dwarf;
22364 if (!return_in_pc)
22365 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22366 stack_pointer_rtx, stack_pointer_rtx);
22369 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22370 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22372 Unfortunately, since this insn does not reflect very well the actual
22373 semantics of the operation, we need to annotate the insn for the benefit
22374 of DWARF2 frame unwind information. */
22375 static void
22376 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22378 int i, j;
22379 rtx par;
22380 rtx dwarf = NULL_RTX;
22381 rtx tmp, reg;
22383 gcc_assert (num_regs && num_regs <= 32);
22385 /* Workaround ARM10 VFPr1 bug. */
22386 if (num_regs == 2 && !arm_arch6)
22388 if (first_reg == 15)
22389 first_reg--;
22391 num_regs++;
22394 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22395 there could be up to 32 D-registers to restore.
22396 If there are more than 16 D-registers, make two recursive calls,
22397 each of which emits one pop_multi instruction. */
22398 if (num_regs > 16)
22400 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22401 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22402 return;
22405 /* The parallel needs to hold num_regs SETs
22406 and one SET for the stack update. */
22407 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22409 /* Increment the stack pointer, based on there being
22410 num_regs 8-byte registers to restore. */
22411 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22412 RTX_FRAME_RELATED_P (tmp) = 1;
22413 XVECEXP (par, 0, 0) = tmp;
22415 /* Now show every reg that will be restored, using a SET for each. */
22416 for (j = 0, i=first_reg; j < num_regs; i += 2)
22418 reg = gen_rtx_REG (DFmode, i);
22420 tmp = gen_rtx_SET (reg,
22421 gen_frame_mem
22422 (DFmode,
22423 plus_constant (Pmode, base_reg, 8 * j)));
22424 RTX_FRAME_RELATED_P (tmp) = 1;
22425 XVECEXP (par, 0, j + 1) = tmp;
22427 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22429 j++;
22432 par = emit_insn (par);
22433 REG_NOTES (par) = dwarf;
22435 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22436 if (REGNO (base_reg) == IP_REGNUM)
22438 RTX_FRAME_RELATED_P (par) = 1;
22439 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22441 else
22442 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22443 base_reg, base_reg);
22446 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22447 number of registers are being popped, multiple LDRD patterns are created for
22448 all register pairs. If odd number of registers are popped, last register is
22449 loaded by using LDR pattern. */
22450 static void
22451 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22453 int num_regs = 0;
22454 int i, j;
22455 rtx par = NULL_RTX;
22456 rtx dwarf = NULL_RTX;
22457 rtx tmp, reg, tmp1;
22458 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22460 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22461 if (saved_regs_mask & (1 << i))
22462 num_regs++;
22464 gcc_assert (num_regs && num_regs <= 16);
22466 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22467 to be popped. So, if num_regs is even, now it will become odd,
22468 and we can generate pop with PC. If num_regs is odd, it will be
22469 even now, and ldr with return can be generated for PC. */
22470 if (return_in_pc)
22471 num_regs--;
22473 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22475 /* Var j iterates over all the registers to gather all the registers in
22476 saved_regs_mask. Var i gives index of saved registers in stack frame.
22477 A PARALLEL RTX of register-pair is created here, so that pattern for
22478 LDRD can be matched. As PC is always last register to be popped, and
22479 we have already decremented num_regs if PC, we don't have to worry
22480 about PC in this loop. */
22481 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22482 if (saved_regs_mask & (1 << j))
22484 /* Create RTX for memory load. */
22485 reg = gen_rtx_REG (SImode, j);
22486 tmp = gen_rtx_SET (reg,
22487 gen_frame_mem (SImode,
22488 plus_constant (Pmode,
22489 stack_pointer_rtx, 4 * i)));
22490 RTX_FRAME_RELATED_P (tmp) = 1;
22492 if (i % 2 == 0)
22494 /* When saved-register index (i) is even, the RTX to be emitted is
22495 yet to be created. Hence create it first. The LDRD pattern we
22496 are generating is :
22497 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22498 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22499 where target registers need not be consecutive. */
22500 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22501 dwarf = NULL_RTX;
22504 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22505 added as 0th element and if i is odd, reg_i is added as 1st element
22506 of LDRD pattern shown above. */
22507 XVECEXP (par, 0, (i % 2)) = tmp;
22508 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22510 if ((i % 2) == 1)
22512 /* When saved-register index (i) is odd, RTXs for both the registers
22513 to be loaded are generated in above given LDRD pattern, and the
22514 pattern can be emitted now. */
22515 par = emit_insn (par);
22516 REG_NOTES (par) = dwarf;
22517 RTX_FRAME_RELATED_P (par) = 1;
22520 i++;
22523 /* If the number of registers pushed is odd AND return_in_pc is false OR
22524 number of registers are even AND return_in_pc is true, last register is
22525 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22526 then LDR with post increment. */
22528 /* Increment the stack pointer, based on there being
22529 num_regs 4-byte registers to restore. */
22530 tmp = gen_rtx_SET (stack_pointer_rtx,
22531 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22532 RTX_FRAME_RELATED_P (tmp) = 1;
22533 tmp = emit_insn (tmp);
22534 if (!return_in_pc)
22536 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22537 stack_pointer_rtx, stack_pointer_rtx);
22540 dwarf = NULL_RTX;
22542 if (((num_regs % 2) == 1 && !return_in_pc)
22543 || ((num_regs % 2) == 0 && return_in_pc))
22545 /* Scan for the single register to be popped. Skip until the saved
22546 register is found. */
22547 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22549 /* Gen LDR with post increment here. */
22550 tmp1 = gen_rtx_MEM (SImode,
22551 gen_rtx_POST_INC (SImode,
22552 stack_pointer_rtx));
22553 set_mem_alias_set (tmp1, get_frame_alias_set ());
22555 reg = gen_rtx_REG (SImode, j);
22556 tmp = gen_rtx_SET (reg, tmp1);
22557 RTX_FRAME_RELATED_P (tmp) = 1;
22558 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22560 if (return_in_pc)
22562 /* If return_in_pc, j must be PC_REGNUM. */
22563 gcc_assert (j == PC_REGNUM);
22564 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22565 XVECEXP (par, 0, 0) = ret_rtx;
22566 XVECEXP (par, 0, 1) = tmp;
22567 par = emit_jump_insn (par);
22569 else
22571 par = emit_insn (tmp);
22572 REG_NOTES (par) = dwarf;
22573 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22574 stack_pointer_rtx, stack_pointer_rtx);
22578 else if ((num_regs % 2) == 1 && return_in_pc)
22580 /* There are 2 registers to be popped. So, generate the pattern
22581 pop_multiple_with_stack_update_and_return to pop in PC. */
22582 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22585 return;
22588 /* LDRD in ARM mode needs consecutive registers as operands. This function
22589 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22590 offset addressing and then generates one separate stack udpate. This provides
22591 more scheduling freedom, compared to writeback on every load. However,
22592 if the function returns using load into PC directly
22593 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22594 before the last load. TODO: Add a peephole optimization to recognize
22595 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22596 peephole optimization to merge the load at stack-offset zero
22597 with the stack update instruction using load with writeback
22598 in post-index addressing mode. */
22599 static void
22600 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22602 int j = 0;
22603 int offset = 0;
22604 rtx par = NULL_RTX;
22605 rtx dwarf = NULL_RTX;
22606 rtx tmp, mem;
22608 /* Restore saved registers. */
22609 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22610 j = 0;
22611 while (j <= LAST_ARM_REGNUM)
22612 if (saved_regs_mask & (1 << j))
22614 if ((j % 2) == 0
22615 && (saved_regs_mask & (1 << (j + 1)))
22616 && (j + 1) != PC_REGNUM)
22618 /* Current register and next register form register pair for which
22619 LDRD can be generated. PC is always the last register popped, and
22620 we handle it separately. */
22621 if (offset > 0)
22622 mem = gen_frame_mem (DImode,
22623 plus_constant (Pmode,
22624 stack_pointer_rtx,
22625 offset));
22626 else
22627 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22629 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22630 tmp = emit_insn (tmp);
22631 RTX_FRAME_RELATED_P (tmp) = 1;
22633 /* Generate dwarf info. */
22635 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22636 gen_rtx_REG (SImode, j),
22637 NULL_RTX);
22638 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22639 gen_rtx_REG (SImode, j + 1),
22640 dwarf);
22642 REG_NOTES (tmp) = dwarf;
22644 offset += 8;
22645 j += 2;
22647 else if (j != PC_REGNUM)
22649 /* Emit a single word load. */
22650 if (offset > 0)
22651 mem = gen_frame_mem (SImode,
22652 plus_constant (Pmode,
22653 stack_pointer_rtx,
22654 offset));
22655 else
22656 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22658 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22659 tmp = emit_insn (tmp);
22660 RTX_FRAME_RELATED_P (tmp) = 1;
22662 /* Generate dwarf info. */
22663 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22664 gen_rtx_REG (SImode, j),
22665 NULL_RTX);
22667 offset += 4;
22668 j += 1;
22670 else /* j == PC_REGNUM */
22671 j++;
22673 else
22674 j++;
22676 /* Update the stack. */
22677 if (offset > 0)
22679 tmp = gen_rtx_SET (stack_pointer_rtx,
22680 plus_constant (Pmode,
22681 stack_pointer_rtx,
22682 offset));
22683 tmp = emit_insn (tmp);
22684 arm_add_cfa_adjust_cfa_note (tmp, offset,
22685 stack_pointer_rtx, stack_pointer_rtx);
22686 offset = 0;
22689 if (saved_regs_mask & (1 << PC_REGNUM))
22691 /* Only PC is to be popped. */
22692 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22693 XVECEXP (par, 0, 0) = ret_rtx;
22694 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22695 gen_frame_mem (SImode,
22696 gen_rtx_POST_INC (SImode,
22697 stack_pointer_rtx)));
22698 RTX_FRAME_RELATED_P (tmp) = 1;
22699 XVECEXP (par, 0, 1) = tmp;
22700 par = emit_jump_insn (par);
22702 /* Generate dwarf info. */
22703 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22704 gen_rtx_REG (SImode, PC_REGNUM),
22705 NULL_RTX);
22706 REG_NOTES (par) = dwarf;
22707 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22708 stack_pointer_rtx, stack_pointer_rtx);
22712 /* Calculate the size of the return value that is passed in registers. */
22713 static unsigned
22714 arm_size_return_regs (void)
22716 machine_mode mode;
22718 if (crtl->return_rtx != 0)
22719 mode = GET_MODE (crtl->return_rtx);
22720 else
22721 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22723 return GET_MODE_SIZE (mode);
22726 /* Return true if the current function needs to save/restore LR. */
22727 static bool
22728 thumb_force_lr_save (void)
22730 return !cfun->machine->lr_save_eliminated
22731 && (!crtl->is_leaf
22732 || thumb_far_jump_used_p ()
22733 || df_regs_ever_live_p (LR_REGNUM));
22736 /* We do not know if r3 will be available because
22737 we do have an indirect tailcall happening in this
22738 particular case. */
22739 static bool
22740 is_indirect_tailcall_p (rtx call)
22742 rtx pat = PATTERN (call);
22744 /* Indirect tail call. */
22745 pat = XVECEXP (pat, 0, 0);
22746 if (GET_CODE (pat) == SET)
22747 pat = SET_SRC (pat);
22749 pat = XEXP (XEXP (pat, 0), 0);
22750 return REG_P (pat);
22753 /* Return true if r3 is used by any of the tail call insns in the
22754 current function. */
22755 static bool
22756 any_sibcall_could_use_r3 (void)
22758 edge_iterator ei;
22759 edge e;
22761 if (!crtl->tail_call_emit)
22762 return false;
22763 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22764 if (e->flags & EDGE_SIBCALL)
22766 rtx_insn *call = BB_END (e->src);
22767 if (!CALL_P (call))
22768 call = prev_nonnote_nondebug_insn (call);
22769 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22770 if (find_regno_fusage (call, USE, 3)
22771 || is_indirect_tailcall_p (call))
22772 return true;
22774 return false;
22778 /* Compute the distance from register FROM to register TO.
22779 These can be the arg pointer (26), the soft frame pointer (25),
22780 the stack pointer (13) or the hard frame pointer (11).
22781 In thumb mode r7 is used as the soft frame pointer, if needed.
22782 Typical stack layout looks like this:
22784 old stack pointer -> | |
22785 ----
22786 | | \
22787 | | saved arguments for
22788 | | vararg functions
22789 | | /
22791 hard FP & arg pointer -> | | \
22792 | | stack
22793 | | frame
22794 | | /
22796 | | \
22797 | | call saved
22798 | | registers
22799 soft frame pointer -> | | /
22801 | | \
22802 | | local
22803 | | variables
22804 locals base pointer -> | | /
22806 | | \
22807 | | outgoing
22808 | | arguments
22809 current stack pointer -> | | /
22812 For a given function some or all of these stack components
22813 may not be needed, giving rise to the possibility of
22814 eliminating some of the registers.
22816 The values returned by this function must reflect the behavior
22817 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22819 The sign of the number returned reflects the direction of stack
22820 growth, so the values are positive for all eliminations except
22821 from the soft frame pointer to the hard frame pointer.
22823 SFP may point just inside the local variables block to ensure correct
22824 alignment. */
22827 /* Return cached stack offsets. */
22829 static arm_stack_offsets *
22830 arm_get_frame_offsets (void)
22832 struct arm_stack_offsets *offsets;
22834 offsets = &cfun->machine->stack_offsets;
22836 return offsets;
22840 /* Calculate stack offsets. These are used to calculate register elimination
22841 offsets and in prologue/epilogue code. Also calculates which registers
22842 should be saved. */
22844 static void
22845 arm_compute_frame_layout (void)
22847 struct arm_stack_offsets *offsets;
22848 unsigned long func_type;
22849 int saved;
22850 int core_saved;
22851 HOST_WIDE_INT frame_size;
22852 int i;
22854 offsets = &cfun->machine->stack_offsets;
22856 /* Initially this is the size of the local variables. It will translated
22857 into an offset once we have determined the size of preceding data. */
22858 frame_size = ROUND_UP_WORD (get_frame_size ());
22860 /* Space for variadic functions. */
22861 offsets->saved_args = crtl->args.pretend_args_size;
22863 /* In Thumb mode this is incorrect, but never used. */
22864 offsets->frame
22865 = (offsets->saved_args
22866 + arm_compute_static_chain_stack_bytes ()
22867 + (frame_pointer_needed ? 4 : 0));
22869 if (TARGET_32BIT)
22871 unsigned int regno;
22873 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22874 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22875 saved = core_saved;
22877 /* We know that SP will be doubleword aligned on entry, and we must
22878 preserve that condition at any subroutine call. We also require the
22879 soft frame pointer to be doubleword aligned. */
22881 if (TARGET_REALLY_IWMMXT)
22883 /* Check for the call-saved iWMMXt registers. */
22884 for (regno = FIRST_IWMMXT_REGNUM;
22885 regno <= LAST_IWMMXT_REGNUM;
22886 regno++)
22887 if (reg_needs_saving_p (regno))
22888 saved += 8;
22891 func_type = arm_current_func_type ();
22892 /* Space for saved VFP registers. */
22893 if (! IS_VOLATILE (func_type)
22894 && TARGET_VFP_BASE)
22895 saved += arm_get_vfp_saved_size ();
22897 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22898 nonecure entry functions with VSTR/VLDR. */
22899 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22900 saved += 4;
22902 else /* TARGET_THUMB1 */
22904 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22905 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22906 saved = core_saved;
22907 if (TARGET_BACKTRACE)
22908 saved += 16;
22911 /* Saved registers include the stack frame. */
22912 offsets->saved_regs
22913 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22914 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22916 /* A leaf function does not need any stack alignment if it has nothing
22917 on the stack. */
22918 if (crtl->is_leaf && frame_size == 0
22919 /* However if it calls alloca(), we have a dynamically allocated
22920 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22921 && ! cfun->calls_alloca)
22923 offsets->outgoing_args = offsets->soft_frame;
22924 offsets->locals_base = offsets->soft_frame;
22925 return;
22928 /* Ensure SFP has the correct alignment. */
22929 if (ARM_DOUBLEWORD_ALIGN
22930 && (offsets->soft_frame & 7))
22932 offsets->soft_frame += 4;
22933 /* Try to align stack by pushing an extra reg. Don't bother doing this
22934 when there is a stack frame as the alignment will be rolled into
22935 the normal stack adjustment. */
22936 if (frame_size + crtl->outgoing_args_size == 0)
22938 int reg = -1;
22940 /* Register r3 is caller-saved. Normally it does not need to be
22941 saved on entry by the prologue. However if we choose to save
22942 it for padding then we may confuse the compiler into thinking
22943 a prologue sequence is required when in fact it is not. This
22944 will occur when shrink-wrapping if r3 is used as a scratch
22945 register and there are no other callee-saved writes.
22947 This situation can be avoided when other callee-saved registers
22948 are available and r3 is not mandatory if we choose a callee-saved
22949 register for padding. */
22950 bool prefer_callee_reg_p = false;
22952 /* If it is safe to use r3, then do so. This sometimes
22953 generates better code on Thumb-2 by avoiding the need to
22954 use 32-bit push/pop instructions. */
22955 if (! any_sibcall_could_use_r3 ()
22956 && arm_size_return_regs () <= 12
22957 && (offsets->saved_regs_mask & (1 << 3)) == 0
22958 && (TARGET_THUMB2
22959 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22961 reg = 3;
22962 if (!TARGET_THUMB2)
22963 prefer_callee_reg_p = true;
22965 if (reg == -1
22966 || prefer_callee_reg_p)
22968 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22970 /* Avoid fixed registers; they may be changed at
22971 arbitrary times so it's unsafe to restore them
22972 during the epilogue. */
22973 if (!fixed_regs[i]
22974 && (offsets->saved_regs_mask & (1 << i)) == 0)
22976 reg = i;
22977 break;
22982 if (reg != -1)
22984 offsets->saved_regs += 4;
22985 offsets->saved_regs_mask |= (1 << reg);
22990 offsets->locals_base = offsets->soft_frame + frame_size;
22991 offsets->outgoing_args = (offsets->locals_base
22992 + crtl->outgoing_args_size);
22994 if (ARM_DOUBLEWORD_ALIGN)
22996 /* Ensure SP remains doubleword aligned. */
22997 if (offsets->outgoing_args & 7)
22998 offsets->outgoing_args += 4;
22999 gcc_assert (!(offsets->outgoing_args & 7));
23004 /* Calculate the relative offsets for the different stack pointers. Positive
23005 offsets are in the direction of stack growth. */
23007 HOST_WIDE_INT
23008 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23010 arm_stack_offsets *offsets;
23012 offsets = arm_get_frame_offsets ();
23014 /* OK, now we have enough information to compute the distances.
23015 There must be an entry in these switch tables for each pair
23016 of registers in ELIMINABLE_REGS, even if some of the entries
23017 seem to be redundant or useless. */
23018 switch (from)
23020 case ARG_POINTER_REGNUM:
23021 switch (to)
23023 case THUMB_HARD_FRAME_POINTER_REGNUM:
23024 return 0;
23026 case FRAME_POINTER_REGNUM:
23027 /* This is the reverse of the soft frame pointer
23028 to hard frame pointer elimination below. */
23029 return offsets->soft_frame - offsets->saved_args;
23031 case ARM_HARD_FRAME_POINTER_REGNUM:
23032 /* This is only non-zero in the case where the static chain register
23033 is stored above the frame. */
23034 return offsets->frame - offsets->saved_args - 4;
23036 case STACK_POINTER_REGNUM:
23037 /* If nothing has been pushed on the stack at all
23038 then this will return -4. This *is* correct! */
23039 return offsets->outgoing_args - (offsets->saved_args + 4);
23041 default:
23042 gcc_unreachable ();
23044 gcc_unreachable ();
23046 case FRAME_POINTER_REGNUM:
23047 switch (to)
23049 case THUMB_HARD_FRAME_POINTER_REGNUM:
23050 return 0;
23052 case ARM_HARD_FRAME_POINTER_REGNUM:
23053 /* The hard frame pointer points to the top entry in the
23054 stack frame. The soft frame pointer to the bottom entry
23055 in the stack frame. If there is no stack frame at all,
23056 then they are identical. */
23058 return offsets->frame - offsets->soft_frame;
23060 case STACK_POINTER_REGNUM:
23061 return offsets->outgoing_args - offsets->soft_frame;
23063 default:
23064 gcc_unreachable ();
23066 gcc_unreachable ();
23068 default:
23069 /* You cannot eliminate from the stack pointer.
23070 In theory you could eliminate from the hard frame
23071 pointer to the stack pointer, but this will never
23072 happen, since if a stack frame is not needed the
23073 hard frame pointer will never be used. */
23074 gcc_unreachable ();
23078 /* Given FROM and TO register numbers, say whether this elimination is
23079 allowed. Frame pointer elimination is automatically handled.
23081 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23082 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23083 pointer, we must eliminate FRAME_POINTER_REGNUM into
23084 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23085 ARG_POINTER_REGNUM. */
23087 bool
23088 arm_can_eliminate (const int from, const int to)
23090 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23091 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23092 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23093 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23094 true);
23097 /* Emit RTL to save coprocessor registers on function entry. Returns the
23098 number of bytes pushed. */
23100 static int
23101 arm_save_coproc_regs(void)
23103 int saved_size = 0;
23104 unsigned reg;
23105 unsigned start_reg;
23106 rtx insn;
23108 if (TARGET_REALLY_IWMMXT)
23109 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23110 if (reg_needs_saving_p (reg))
23112 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23113 insn = gen_rtx_MEM (V2SImode, insn);
23114 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23115 RTX_FRAME_RELATED_P (insn) = 1;
23116 saved_size += 8;
23119 if (TARGET_VFP_BASE)
23121 start_reg = FIRST_VFP_REGNUM;
23123 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23125 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23127 if (start_reg != reg)
23128 saved_size += vfp_emit_fstmd (start_reg,
23129 (reg - start_reg) / 2);
23130 start_reg = reg + 2;
23133 if (start_reg != reg)
23134 saved_size += vfp_emit_fstmd (start_reg,
23135 (reg - start_reg) / 2);
23137 return saved_size;
23141 /* Set the Thumb frame pointer from the stack pointer. */
23143 static void
23144 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23146 HOST_WIDE_INT amount;
23147 rtx insn, dwarf;
23149 amount = offsets->outgoing_args - offsets->locals_base;
23150 if (amount < 1024)
23151 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23152 stack_pointer_rtx, GEN_INT (amount)));
23153 else
23155 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23156 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23157 expects the first two operands to be the same. */
23158 if (TARGET_THUMB2)
23160 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23161 stack_pointer_rtx,
23162 hard_frame_pointer_rtx));
23164 else
23166 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23167 hard_frame_pointer_rtx,
23168 stack_pointer_rtx));
23170 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23171 plus_constant (Pmode, stack_pointer_rtx, amount));
23172 RTX_FRAME_RELATED_P (dwarf) = 1;
23173 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23176 RTX_FRAME_RELATED_P (insn) = 1;
23179 struct scratch_reg {
23180 rtx reg;
23181 bool saved;
23184 /* Return a short-lived scratch register for use as a 2nd scratch register on
23185 function entry after the registers are saved in the prologue. This register
23186 must be released by means of release_scratch_register_on_entry. IP is not
23187 considered since it is always used as the 1st scratch register if available.
23189 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23190 mask of live registers. */
23192 static void
23193 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23194 unsigned long live_regs)
23196 int regno = -1;
23198 sr->saved = false;
23200 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23201 regno = LR_REGNUM;
23202 else
23204 unsigned int i;
23206 for (i = 4; i < 11; i++)
23207 if (regno1 != i && (live_regs & (1 << i)) != 0)
23209 regno = i;
23210 break;
23213 if (regno < 0)
23215 /* If IP is used as the 1st scratch register for a nested function,
23216 then either r3 wasn't available or is used to preserve IP. */
23217 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23218 regno1 = 3;
23219 regno = (regno1 == 3 ? 2 : 3);
23220 sr->saved
23221 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23222 regno);
23226 sr->reg = gen_rtx_REG (SImode, regno);
23227 if (sr->saved)
23229 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23230 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23231 rtx x = gen_rtx_SET (stack_pointer_rtx,
23232 plus_constant (Pmode, stack_pointer_rtx, -4));
23233 RTX_FRAME_RELATED_P (insn) = 1;
23234 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23238 /* Release a scratch register obtained from the preceding function. */
23240 static void
23241 release_scratch_register_on_entry (struct scratch_reg *sr)
23243 if (sr->saved)
23245 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23246 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23247 rtx x = gen_rtx_SET (stack_pointer_rtx,
23248 plus_constant (Pmode, stack_pointer_rtx, 4));
23249 RTX_FRAME_RELATED_P (insn) = 1;
23250 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23254 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23256 #if PROBE_INTERVAL > 4096
23257 #error Cannot use indexed addressing mode for stack probing
23258 #endif
23260 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23261 inclusive. These are offsets from the current stack pointer. REGNO1
23262 is the index number of the 1st scratch register and LIVE_REGS is the
23263 mask of live registers. */
23265 static void
23266 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23267 unsigned int regno1, unsigned long live_regs)
23269 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23271 /* See if we have a constant small number of probes to generate. If so,
23272 that's the easy case. */
23273 if (size <= PROBE_INTERVAL)
23275 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23276 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23277 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23280 /* The run-time loop is made up of 10 insns in the generic case while the
23281 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23282 else if (size <= 5 * PROBE_INTERVAL)
23284 HOST_WIDE_INT i, rem;
23286 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23287 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23288 emit_stack_probe (reg1);
23290 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23291 it exceeds SIZE. If only two probes are needed, this will not
23292 generate any code. Then probe at FIRST + SIZE. */
23293 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23295 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23296 emit_stack_probe (reg1);
23299 rem = size - (i - PROBE_INTERVAL);
23300 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23302 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23303 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23305 else
23306 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23309 /* Otherwise, do the same as above, but in a loop. Note that we must be
23310 extra careful with variables wrapping around because we might be at
23311 the very top (or the very bottom) of the address space and we have
23312 to be able to handle this case properly; in particular, we use an
23313 equality test for the loop condition. */
23314 else
23316 HOST_WIDE_INT rounded_size;
23317 struct scratch_reg sr;
23319 get_scratch_register_on_entry (&sr, regno1, live_regs);
23321 emit_move_insn (reg1, GEN_INT (first));
23324 /* Step 1: round SIZE to the previous multiple of the interval. */
23326 rounded_size = size & -PROBE_INTERVAL;
23327 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23330 /* Step 2: compute initial and final value of the loop counter. */
23332 /* TEST_ADDR = SP + FIRST. */
23333 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23335 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23336 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23339 /* Step 3: the loop
23343 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23344 probe at TEST_ADDR
23346 while (TEST_ADDR != LAST_ADDR)
23348 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23349 until it is equal to ROUNDED_SIZE. */
23351 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23354 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23355 that SIZE is equal to ROUNDED_SIZE. */
23357 if (size != rounded_size)
23359 HOST_WIDE_INT rem = size - rounded_size;
23361 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23363 emit_set_insn (sr.reg,
23364 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23365 emit_stack_probe (plus_constant (Pmode, sr.reg,
23366 PROBE_INTERVAL - rem));
23368 else
23369 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23372 release_scratch_register_on_entry (&sr);
23375 /* Make sure nothing is scheduled before we are done. */
23376 emit_insn (gen_blockage ());
23379 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23380 absolute addresses. */
23382 const char *
23383 output_probe_stack_range (rtx reg1, rtx reg2)
23385 static int labelno = 0;
23386 char loop_lab[32];
23387 rtx xops[2];
23389 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23391 /* Loop. */
23392 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23394 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23395 xops[0] = reg1;
23396 xops[1] = GEN_INT (PROBE_INTERVAL);
23397 output_asm_insn ("sub\t%0, %0, %1", xops);
23399 /* Probe at TEST_ADDR. */
23400 output_asm_insn ("str\tr0, [%0, #0]", xops);
23402 /* Test if TEST_ADDR == LAST_ADDR. */
23403 xops[1] = reg2;
23404 output_asm_insn ("cmp\t%0, %1", xops);
23406 /* Branch. */
23407 fputs ("\tbne\t", asm_out_file);
23408 assemble_name_raw (asm_out_file, loop_lab);
23409 fputc ('\n', asm_out_file);
23411 return "";
23414 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23415 function. */
23416 void
23417 arm_expand_prologue (void)
23419 rtx amount;
23420 rtx insn;
23421 rtx ip_rtx;
23422 unsigned long live_regs_mask;
23423 unsigned long func_type;
23424 int fp_offset = 0;
23425 int saved_pretend_args = 0;
23426 int saved_regs = 0;
23427 unsigned HOST_WIDE_INT args_to_push;
23428 HOST_WIDE_INT size;
23429 arm_stack_offsets *offsets;
23430 bool clobber_ip;
23432 func_type = arm_current_func_type ();
23434 /* Naked functions don't have prologues. */
23435 if (IS_NAKED (func_type))
23437 if (flag_stack_usage_info)
23438 current_function_static_stack_size = 0;
23439 return;
23442 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23443 args_to_push = crtl->args.pretend_args_size;
23445 /* Compute which register we will have to save onto the stack. */
23446 offsets = arm_get_frame_offsets ();
23447 live_regs_mask = offsets->saved_regs_mask;
23449 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23451 if (IS_STACKALIGN (func_type))
23453 rtx r0, r1;
23455 /* Handle a word-aligned stack pointer. We generate the following:
23457 mov r0, sp
23458 bic r1, r0, #7
23459 mov sp, r1
23460 <save and restore r0 in normal prologue/epilogue>
23461 mov sp, r0
23462 bx lr
23464 The unwinder doesn't need to know about the stack realignment.
23465 Just tell it we saved SP in r0. */
23466 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23468 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23469 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23471 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23472 RTX_FRAME_RELATED_P (insn) = 1;
23473 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23475 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23477 /* ??? The CFA changes here, which may cause GDB to conclude that it
23478 has entered a different function. That said, the unwind info is
23479 correct, individually, before and after this instruction because
23480 we've described the save of SP, which will override the default
23481 handling of SP as restoring from the CFA. */
23482 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23485 /* Let's compute the static_chain_stack_bytes required and store it. Right
23486 now the value must be -1 as stored by arm_init_machine_status (). */
23487 cfun->machine->static_chain_stack_bytes
23488 = arm_compute_static_chain_stack_bytes ();
23490 /* The static chain register is the same as the IP register. If it is
23491 clobbered when creating the frame, we need to save and restore it. */
23492 clobber_ip = IS_NESTED (func_type)
23493 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23494 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23495 || flag_stack_clash_protection)
23496 && !df_regs_ever_live_p (LR_REGNUM)
23497 && arm_r3_live_at_start_p ()));
23499 /* Find somewhere to store IP whilst the frame is being created.
23500 We try the following places in order:
23502 1. The last argument register r3 if it is available.
23503 2. A slot on the stack above the frame if there are no
23504 arguments to push onto the stack.
23505 3. Register r3 again, after pushing the argument registers
23506 onto the stack, if this is a varargs function.
23507 4. The last slot on the stack created for the arguments to
23508 push, if this isn't a varargs function.
23510 Note - we only need to tell the dwarf2 backend about the SP
23511 adjustment in the second variant; the static chain register
23512 doesn't need to be unwound, as it doesn't contain a value
23513 inherited from the caller. */
23514 if (clobber_ip)
23516 if (!arm_r3_live_at_start_p ())
23517 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23518 else if (args_to_push == 0)
23520 rtx addr, dwarf;
23522 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23523 saved_regs += 4;
23525 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23526 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23527 fp_offset = 4;
23529 /* Just tell the dwarf backend that we adjusted SP. */
23530 dwarf = gen_rtx_SET (stack_pointer_rtx,
23531 plus_constant (Pmode, stack_pointer_rtx,
23532 -fp_offset));
23533 RTX_FRAME_RELATED_P (insn) = 1;
23534 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23536 else
23538 /* Store the args on the stack. */
23539 if (cfun->machine->uses_anonymous_args)
23541 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23542 (0xf0 >> (args_to_push / 4)) & 0xf);
23543 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23544 saved_pretend_args = 1;
23546 else
23548 rtx addr, dwarf;
23550 if (args_to_push == 4)
23551 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23552 else
23553 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23554 plus_constant (Pmode,
23555 stack_pointer_rtx,
23556 -args_to_push));
23558 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23560 /* Just tell the dwarf backend that we adjusted SP. */
23561 dwarf = gen_rtx_SET (stack_pointer_rtx,
23562 plus_constant (Pmode, stack_pointer_rtx,
23563 -args_to_push));
23564 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23567 RTX_FRAME_RELATED_P (insn) = 1;
23568 fp_offset = args_to_push;
23569 args_to_push = 0;
23573 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23575 if (IS_INTERRUPT (func_type))
23577 /* Interrupt functions must not corrupt any registers.
23578 Creating a frame pointer however, corrupts the IP
23579 register, so we must push it first. */
23580 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23582 /* Do not set RTX_FRAME_RELATED_P on this insn.
23583 The dwarf stack unwinding code only wants to see one
23584 stack decrement per function, and this is not it. If
23585 this instruction is labeled as being part of the frame
23586 creation sequence then dwarf2out_frame_debug_expr will
23587 die when it encounters the assignment of IP to FP
23588 later on, since the use of SP here establishes SP as
23589 the CFA register and not IP.
23591 Anyway this instruction is not really part of the stack
23592 frame creation although it is part of the prologue. */
23595 insn = emit_set_insn (ip_rtx,
23596 plus_constant (Pmode, stack_pointer_rtx,
23597 fp_offset));
23598 RTX_FRAME_RELATED_P (insn) = 1;
23601 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23602 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23604 saved_regs += 4;
23605 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23606 GEN_INT (FPCXTNS_ENUM)));
23607 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23608 plus_constant (Pmode, stack_pointer_rtx, -4));
23609 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23610 RTX_FRAME_RELATED_P (insn) = 1;
23613 if (args_to_push)
23615 /* Push the argument registers, or reserve space for them. */
23616 if (cfun->machine->uses_anonymous_args)
23617 insn = emit_multi_reg_push
23618 ((0xf0 >> (args_to_push / 4)) & 0xf,
23619 (0xf0 >> (args_to_push / 4)) & 0xf);
23620 else
23621 insn = emit_insn
23622 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23623 GEN_INT (- args_to_push)));
23624 RTX_FRAME_RELATED_P (insn) = 1;
23627 /* If this is an interrupt service routine, and the link register
23628 is going to be pushed, and we're not generating extra
23629 push of IP (needed when frame is needed and frame layout if apcs),
23630 subtracting four from LR now will mean that the function return
23631 can be done with a single instruction. */
23632 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23633 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23634 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23635 && TARGET_ARM)
23637 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23639 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23642 if (live_regs_mask)
23644 unsigned long dwarf_regs_mask = live_regs_mask;
23646 saved_regs += bit_count (live_regs_mask) * 4;
23647 if (optimize_size && !frame_pointer_needed
23648 && saved_regs == offsets->saved_regs - offsets->saved_args)
23650 /* If no coprocessor registers are being pushed and we don't have
23651 to worry about a frame pointer then push extra registers to
23652 create the stack frame. This is done in a way that does not
23653 alter the frame layout, so is independent of the epilogue. */
23654 int n;
23655 int frame;
23656 n = 0;
23657 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23658 n++;
23659 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23660 if (frame && n * 4 >= frame)
23662 n = frame / 4;
23663 live_regs_mask |= (1 << n) - 1;
23664 saved_regs += frame;
23668 if (TARGET_LDRD
23669 && current_tune->prefer_ldrd_strd
23670 && !optimize_function_for_size_p (cfun))
23672 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23673 if (TARGET_THUMB2)
23674 thumb2_emit_strd_push (live_regs_mask);
23675 else if (TARGET_ARM
23676 && !TARGET_APCS_FRAME
23677 && !IS_INTERRUPT (func_type))
23678 arm_emit_strd_push (live_regs_mask);
23679 else
23681 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23682 RTX_FRAME_RELATED_P (insn) = 1;
23685 else
23687 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23688 RTX_FRAME_RELATED_P (insn) = 1;
23692 if (! IS_VOLATILE (func_type))
23693 saved_regs += arm_save_coproc_regs ();
23695 if (frame_pointer_needed && TARGET_ARM)
23697 /* Create the new frame pointer. */
23698 if (TARGET_APCS_FRAME)
23700 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23701 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23702 RTX_FRAME_RELATED_P (insn) = 1;
23704 else
23706 insn = GEN_INT (saved_regs - (4 + fp_offset));
23707 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23708 stack_pointer_rtx, insn));
23709 RTX_FRAME_RELATED_P (insn) = 1;
23713 size = offsets->outgoing_args - offsets->saved_args;
23714 if (flag_stack_usage_info)
23715 current_function_static_stack_size = size;
23717 /* If this isn't an interrupt service routine and we have a frame, then do
23718 stack checking. We use IP as the first scratch register, except for the
23719 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23720 if (!IS_INTERRUPT (func_type)
23721 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23722 || flag_stack_clash_protection))
23724 unsigned int regno;
23726 if (!IS_NESTED (func_type) || clobber_ip)
23727 regno = IP_REGNUM;
23728 else if (df_regs_ever_live_p (LR_REGNUM))
23729 regno = LR_REGNUM;
23730 else
23731 regno = 3;
23733 if (crtl->is_leaf && !cfun->calls_alloca)
23735 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23736 arm_emit_probe_stack_range (get_stack_check_protect (),
23737 size - get_stack_check_protect (),
23738 regno, live_regs_mask);
23740 else if (size > 0)
23741 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23742 regno, live_regs_mask);
23745 /* Recover the static chain register. */
23746 if (clobber_ip)
23748 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23749 insn = gen_rtx_REG (SImode, 3);
23750 else
23752 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23753 insn = gen_frame_mem (SImode, insn);
23755 emit_set_insn (ip_rtx, insn);
23756 emit_insn (gen_force_register_use (ip_rtx));
23759 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23761 /* This add can produce multiple insns for a large constant, so we
23762 need to get tricky. */
23763 rtx_insn *last = get_last_insn ();
23765 amount = GEN_INT (offsets->saved_args + saved_regs
23766 - offsets->outgoing_args);
23768 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23769 amount));
23772 last = last ? NEXT_INSN (last) : get_insns ();
23773 RTX_FRAME_RELATED_P (last) = 1;
23775 while (last != insn);
23777 /* If the frame pointer is needed, emit a special barrier that
23778 will prevent the scheduler from moving stores to the frame
23779 before the stack adjustment. */
23780 if (frame_pointer_needed)
23781 emit_insn (gen_stack_tie (stack_pointer_rtx,
23782 hard_frame_pointer_rtx));
23786 if (frame_pointer_needed && TARGET_THUMB2)
23787 thumb_set_frame_pointer (offsets);
23789 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23791 unsigned long mask;
23793 mask = live_regs_mask;
23794 mask &= THUMB2_WORK_REGS;
23795 if (!IS_NESTED (func_type))
23796 mask |= (1 << IP_REGNUM);
23797 arm_load_pic_register (mask, NULL_RTX);
23800 /* If we are profiling, make sure no instructions are scheduled before
23801 the call to mcount. Similarly if the user has requested no
23802 scheduling in the prolog. Similarly if we want non-call exceptions
23803 using the EABI unwinder, to prevent faulting instructions from being
23804 swapped with a stack adjustment. */
23805 if (crtl->profile || !TARGET_SCHED_PROLOG
23806 || (arm_except_unwind_info (&global_options) == UI_TARGET
23807 && cfun->can_throw_non_call_exceptions))
23808 emit_insn (gen_blockage ());
23810 /* If the link register is being kept alive, with the return address in it,
23811 then make sure that it does not get reused by the ce2 pass. */
23812 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23813 cfun->machine->lr_save_eliminated = 1;
23816 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23817 static void
23818 arm_print_condition (FILE *stream)
23820 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23822 /* Branch conversion is not implemented for Thumb-2. */
23823 if (TARGET_THUMB)
23825 output_operand_lossage ("predicated Thumb instruction");
23826 return;
23828 if (current_insn_predicate != NULL)
23830 output_operand_lossage
23831 ("predicated instruction in conditional sequence");
23832 return;
23835 fputs (arm_condition_codes[arm_current_cc], stream);
23837 else if (current_insn_predicate)
23839 enum arm_cond_code code;
23841 if (TARGET_THUMB1)
23843 output_operand_lossage ("predicated Thumb instruction");
23844 return;
23847 code = get_arm_condition_code (current_insn_predicate);
23848 fputs (arm_condition_codes[code], stream);
23853 /* Globally reserved letters: acln
23854 Puncutation letters currently used: @_|?().!#
23855 Lower case letters currently used: bcdefhimpqtvwxyz
23856 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23857 Letters previously used, but now deprecated/obsolete: sWXYZ.
23859 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23861 If CODE is 'd', then the X is a condition operand and the instruction
23862 should only be executed if the condition is true.
23863 if CODE is 'D', then the X is a condition operand and the instruction
23864 should only be executed if the condition is false: however, if the mode
23865 of the comparison is CCFPEmode, then always execute the instruction -- we
23866 do this because in these circumstances !GE does not necessarily imply LT;
23867 in these cases the instruction pattern will take care to make sure that
23868 an instruction containing %d will follow, thereby undoing the effects of
23869 doing this instruction unconditionally.
23870 If CODE is 'N' then X is a floating point operand that must be negated
23871 before output.
23872 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23873 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23874 If CODE is 'V', then the operand must be a CONST_INT representing
23875 the bits to preserve in the modified register (Rd) of a BFI or BFC
23876 instruction: print out both the width and lsb (shift) fields. */
23877 static void
23878 arm_print_operand (FILE *stream, rtx x, int code)
23880 switch (code)
23882 case '@':
23883 fputs (ASM_COMMENT_START, stream);
23884 return;
23886 case '_':
23887 fputs (user_label_prefix, stream);
23888 return;
23890 case '|':
23891 fputs (REGISTER_PREFIX, stream);
23892 return;
23894 case '?':
23895 arm_print_condition (stream);
23896 return;
23898 case '.':
23899 /* The current condition code for a condition code setting instruction.
23900 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23901 fputc('s', stream);
23902 arm_print_condition (stream);
23903 return;
23905 case '!':
23906 /* If the instruction is conditionally executed then print
23907 the current condition code, otherwise print 's'. */
23908 gcc_assert (TARGET_THUMB2);
23909 if (current_insn_predicate)
23910 arm_print_condition (stream);
23911 else
23912 fputc('s', stream);
23913 break;
23915 /* %# is a "break" sequence. It doesn't output anything, but is used to
23916 separate e.g. operand numbers from following text, if that text consists
23917 of further digits which we don't want to be part of the operand
23918 number. */
23919 case '#':
23920 return;
23922 case 'N':
23924 REAL_VALUE_TYPE r;
23925 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23926 fprintf (stream, "%s", fp_const_from_val (&r));
23928 return;
23930 /* An integer or symbol address without a preceding # sign. */
23931 case 'c':
23932 switch (GET_CODE (x))
23934 case CONST_INT:
23935 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23936 break;
23938 case SYMBOL_REF:
23939 output_addr_const (stream, x);
23940 break;
23942 case CONST:
23943 if (GET_CODE (XEXP (x, 0)) == PLUS
23944 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23946 output_addr_const (stream, x);
23947 break;
23949 /* Fall through. */
23951 default:
23952 output_operand_lossage ("Unsupported operand for code '%c'", code);
23954 return;
23956 /* An integer that we want to print in HEX. */
23957 case 'x':
23958 switch (GET_CODE (x))
23960 case CONST_INT:
23961 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23962 break;
23964 default:
23965 output_operand_lossage ("Unsupported operand for code '%c'", code);
23967 return;
23969 case 'B':
23970 if (CONST_INT_P (x))
23972 HOST_WIDE_INT val;
23973 val = ARM_SIGN_EXTEND (~INTVAL (x));
23974 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23976 else
23978 putc ('~', stream);
23979 output_addr_const (stream, x);
23981 return;
23983 case 'b':
23984 /* Print the log2 of a CONST_INT. */
23986 HOST_WIDE_INT val;
23988 if (!CONST_INT_P (x)
23989 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23990 output_operand_lossage ("Unsupported operand for code '%c'", code);
23991 else
23992 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23994 return;
23996 case 'L':
23997 /* The low 16 bits of an immediate constant. */
23998 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23999 return;
24001 case 'i':
24002 fprintf (stream, "%s", arithmetic_instr (x, 1));
24003 return;
24005 case 'I':
24006 fprintf (stream, "%s", arithmetic_instr (x, 0));
24007 return;
24009 case 'S':
24011 HOST_WIDE_INT val;
24012 const char *shift;
24014 shift = shift_op (x, &val);
24016 if (shift)
24018 fprintf (stream, ", %s ", shift);
24019 if (val == -1)
24020 arm_print_operand (stream, XEXP (x, 1), 0);
24021 else
24022 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24025 return;
24027 /* An explanation of the 'Q', 'R' and 'H' register operands:
24029 In a pair of registers containing a DI or DF value the 'Q'
24030 operand returns the register number of the register containing
24031 the least significant part of the value. The 'R' operand returns
24032 the register number of the register containing the most
24033 significant part of the value.
24035 The 'H' operand returns the higher of the two register numbers.
24036 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24037 same as the 'Q' operand, since the most significant part of the
24038 value is held in the lower number register. The reverse is true
24039 on systems where WORDS_BIG_ENDIAN is false.
24041 The purpose of these operands is to distinguish between cases
24042 where the endian-ness of the values is important (for example
24043 when they are added together), and cases where the endian-ness
24044 is irrelevant, but the order of register operations is important.
24045 For example when loading a value from memory into a register
24046 pair, the endian-ness does not matter. Provided that the value
24047 from the lower memory address is put into the lower numbered
24048 register, and the value from the higher address is put into the
24049 higher numbered register, the load will work regardless of whether
24050 the value being loaded is big-wordian or little-wordian. The
24051 order of the two register loads can matter however, if the address
24052 of the memory location is actually held in one of the registers
24053 being overwritten by the load.
24055 The 'Q' and 'R' constraints are also available for 64-bit
24056 constants. */
24057 case 'Q':
24058 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24060 rtx part = gen_lowpart (SImode, x);
24061 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24062 return;
24065 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24067 output_operand_lossage ("invalid operand for code '%c'", code);
24068 return;
24071 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24072 return;
24074 case 'R':
24075 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24077 machine_mode mode = GET_MODE (x);
24078 rtx part;
24080 if (mode == VOIDmode)
24081 mode = DImode;
24082 part = gen_highpart_mode (SImode, mode, x);
24083 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24084 return;
24087 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24089 output_operand_lossage ("invalid operand for code '%c'", code);
24090 return;
24093 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24094 return;
24096 case 'H':
24097 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24099 output_operand_lossage ("invalid operand for code '%c'", code);
24100 return;
24103 asm_fprintf (stream, "%r", REGNO (x) + 1);
24104 return;
24106 case 'J':
24107 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24109 output_operand_lossage ("invalid operand for code '%c'", code);
24110 return;
24113 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24114 return;
24116 case 'K':
24117 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24119 output_operand_lossage ("invalid operand for code '%c'", code);
24120 return;
24123 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24124 return;
24126 case 'm':
24127 asm_fprintf (stream, "%r",
24128 REG_P (XEXP (x, 0))
24129 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24130 return;
24132 case 'M':
24133 asm_fprintf (stream, "{%r-%r}",
24134 REGNO (x),
24135 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24136 return;
24138 /* Like 'M', but writing doubleword vector registers, for use by Neon
24139 insns. */
24140 case 'h':
24142 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24143 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24144 if (numregs == 1)
24145 asm_fprintf (stream, "{d%d}", regno);
24146 else
24147 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24149 return;
24151 case 'd':
24152 /* CONST_TRUE_RTX means always -- that's the default. */
24153 if (x == const_true_rtx)
24154 return;
24156 if (!COMPARISON_P (x))
24158 output_operand_lossage ("invalid operand for code '%c'", code);
24159 return;
24162 fputs (arm_condition_codes[get_arm_condition_code (x)],
24163 stream);
24164 return;
24166 case 'D':
24167 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24168 want to do that. */
24169 if (x == const_true_rtx)
24171 output_operand_lossage ("instruction never executed");
24172 return;
24174 if (!COMPARISON_P (x))
24176 output_operand_lossage ("invalid operand for code '%c'", code);
24177 return;
24180 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24181 (get_arm_condition_code (x))],
24182 stream);
24183 return;
24185 case 'V':
24187 /* Output the LSB (shift) and width for a bitmask instruction
24188 based on a literal mask. The LSB is printed first,
24189 followed by the width.
24191 Eg. For 0b1...1110001, the result is #1, #3. */
24192 if (!CONST_INT_P (x))
24194 output_operand_lossage ("invalid operand for code '%c'", code);
24195 return;
24198 unsigned HOST_WIDE_INT val
24199 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24200 int lsb = exact_log2 (val & -val);
24201 asm_fprintf (stream, "#%d, #%d", lsb,
24202 (exact_log2 (val + (val & -val)) - lsb));
24204 return;
24206 case 's':
24207 case 'W':
24208 case 'X':
24209 case 'Y':
24210 case 'Z':
24211 /* Former Maverick support, removed after GCC-4.7. */
24212 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24213 return;
24215 case 'U':
24216 if (!REG_P (x)
24217 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24218 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24219 /* Bad value for wCG register number. */
24221 output_operand_lossage ("invalid operand for code '%c'", code);
24222 return;
24225 else
24226 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24227 return;
24229 /* Print an iWMMXt control register name. */
24230 case 'w':
24231 if (!CONST_INT_P (x)
24232 || INTVAL (x) < 0
24233 || INTVAL (x) >= 16)
24234 /* Bad value for wC register number. */
24236 output_operand_lossage ("invalid operand for code '%c'", code);
24237 return;
24240 else
24242 static const char * wc_reg_names [16] =
24244 "wCID", "wCon", "wCSSF", "wCASF",
24245 "wC4", "wC5", "wC6", "wC7",
24246 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24247 "wC12", "wC13", "wC14", "wC15"
24250 fputs (wc_reg_names [INTVAL (x)], stream);
24252 return;
24254 /* Print the high single-precision register of a VFP double-precision
24255 register. */
24256 case 'p':
24258 machine_mode mode = GET_MODE (x);
24259 int regno;
24261 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24263 output_operand_lossage ("invalid operand for code '%c'", code);
24264 return;
24267 regno = REGNO (x);
24268 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24270 output_operand_lossage ("invalid operand for code '%c'", code);
24271 return;
24274 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24276 return;
24278 /* Print a VFP/Neon double precision or quad precision register name. */
24279 case 'P':
24280 case 'q':
24282 machine_mode mode = GET_MODE (x);
24283 int is_quad = (code == 'q');
24284 int regno;
24286 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24288 output_operand_lossage ("invalid operand for code '%c'", code);
24289 return;
24292 if (!REG_P (x)
24293 || !IS_VFP_REGNUM (REGNO (x)))
24295 output_operand_lossage ("invalid operand for code '%c'", code);
24296 return;
24299 regno = REGNO (x);
24300 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24301 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24303 output_operand_lossage ("invalid operand for code '%c'", code);
24304 return;
24307 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24308 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24310 return;
24312 /* These two codes print the low/high doubleword register of a Neon quad
24313 register, respectively. For pair-structure types, can also print
24314 low/high quadword registers. */
24315 case 'e':
24316 case 'f':
24318 machine_mode mode = GET_MODE (x);
24319 int regno;
24321 if ((GET_MODE_SIZE (mode) != 16
24322 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24324 output_operand_lossage ("invalid operand for code '%c'", code);
24325 return;
24328 regno = REGNO (x);
24329 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24331 output_operand_lossage ("invalid operand for code '%c'", code);
24332 return;
24335 if (GET_MODE_SIZE (mode) == 16)
24336 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24337 + (code == 'f' ? 1 : 0));
24338 else
24339 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24340 + (code == 'f' ? 1 : 0));
24342 return;
24344 /* Print a VFPv3 floating-point constant, represented as an integer
24345 index. */
24346 case 'G':
24348 int index = vfp3_const_double_index (x);
24349 gcc_assert (index != -1);
24350 fprintf (stream, "%d", index);
24352 return;
24354 /* Print bits representing opcode features for Neon.
24356 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24357 and polynomials as unsigned.
24359 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24361 Bit 2 is 1 for rounding functions, 0 otherwise. */
24363 /* Identify the type as 's', 'u', 'p' or 'f'. */
24364 case 'T':
24366 HOST_WIDE_INT bits = INTVAL (x);
24367 fputc ("uspf"[bits & 3], stream);
24369 return;
24371 /* Likewise, but signed and unsigned integers are both 'i'. */
24372 case 'F':
24374 HOST_WIDE_INT bits = INTVAL (x);
24375 fputc ("iipf"[bits & 3], stream);
24377 return;
24379 /* As for 'T', but emit 'u' instead of 'p'. */
24380 case 't':
24382 HOST_WIDE_INT bits = INTVAL (x);
24383 fputc ("usuf"[bits & 3], stream);
24385 return;
24387 /* Bit 2: rounding (vs none). */
24388 case 'O':
24390 HOST_WIDE_INT bits = INTVAL (x);
24391 fputs ((bits & 4) != 0 ? "r" : "", stream);
24393 return;
24395 /* Memory operand for vld1/vst1 instruction. */
24396 case 'A':
24398 rtx addr;
24399 bool postinc = FALSE;
24400 rtx postinc_reg = NULL;
24401 unsigned align, memsize, align_bits;
24403 gcc_assert (MEM_P (x));
24404 addr = XEXP (x, 0);
24405 if (GET_CODE (addr) == POST_INC)
24407 postinc = 1;
24408 addr = XEXP (addr, 0);
24410 if (GET_CODE (addr) == POST_MODIFY)
24412 postinc_reg = XEXP( XEXP (addr, 1), 1);
24413 addr = XEXP (addr, 0);
24415 asm_fprintf (stream, "[%r", REGNO (addr));
24417 /* We know the alignment of this access, so we can emit a hint in the
24418 instruction (for some alignments) as an aid to the memory subsystem
24419 of the target. */
24420 align = MEM_ALIGN (x) >> 3;
24421 memsize = MEM_SIZE (x);
24423 /* Only certain alignment specifiers are supported by the hardware. */
24424 if (memsize == 32 && (align % 32) == 0)
24425 align_bits = 256;
24426 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24427 align_bits = 128;
24428 else if (memsize >= 8 && (align % 8) == 0)
24429 align_bits = 64;
24430 else
24431 align_bits = 0;
24433 if (align_bits != 0)
24434 asm_fprintf (stream, ":%d", align_bits);
24436 asm_fprintf (stream, "]");
24438 if (postinc)
24439 fputs("!", stream);
24440 if (postinc_reg)
24441 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24443 return;
24445 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24446 rtx_code the memory operands output looks like following.
24447 1. [Rn], #+/-<imm>
24448 2. [Rn, #+/-<imm>]!
24449 3. [Rn, #+/-<imm>]
24450 4. [Rn]. */
24451 case 'E':
24453 rtx addr;
24454 rtx postinc_reg = NULL;
24455 unsigned inc_val = 0;
24456 enum rtx_code code;
24458 gcc_assert (MEM_P (x));
24459 addr = XEXP (x, 0);
24460 code = GET_CODE (addr);
24461 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24462 || code == PRE_DEC)
24464 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24465 inc_val = GET_MODE_SIZE (GET_MODE (x));
24466 if (code == POST_INC || code == POST_DEC)
24467 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24468 ? "": "-", inc_val);
24469 else
24470 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24471 ? "": "-", inc_val);
24473 else if (code == POST_MODIFY || code == PRE_MODIFY)
24475 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24476 postinc_reg = XEXP (XEXP (addr, 1), 1);
24477 if (postinc_reg && CONST_INT_P (postinc_reg))
24479 if (code == POST_MODIFY)
24480 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24481 else
24482 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24485 else if (code == PLUS)
24487 rtx base = XEXP (addr, 0);
24488 rtx index = XEXP (addr, 1);
24490 gcc_assert (REG_P (base) && CONST_INT_P (index));
24492 HOST_WIDE_INT offset = INTVAL (index);
24493 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24495 else
24497 gcc_assert (REG_P (addr));
24498 asm_fprintf (stream, "[%r]",REGNO (addr));
24501 return;
24503 case 'C':
24505 rtx addr;
24507 gcc_assert (MEM_P (x));
24508 addr = XEXP (x, 0);
24509 gcc_assert (REG_P (addr));
24510 asm_fprintf (stream, "[%r]", REGNO (addr));
24512 return;
24514 /* Translate an S register number into a D register number and element index. */
24515 case 'y':
24517 machine_mode mode = GET_MODE (x);
24518 int regno;
24520 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24522 output_operand_lossage ("invalid operand for code '%c'", code);
24523 return;
24526 regno = REGNO (x);
24527 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24529 output_operand_lossage ("invalid operand for code '%c'", code);
24530 return;
24533 regno = regno - FIRST_VFP_REGNUM;
24534 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24536 return;
24538 case 'v':
24539 gcc_assert (CONST_DOUBLE_P (x));
24540 int result;
24541 result = vfp3_const_double_for_fract_bits (x);
24542 if (result == 0)
24543 result = vfp3_const_double_for_bits (x);
24544 fprintf (stream, "#%d", result);
24545 return;
24547 /* Register specifier for vld1.16/vst1.16. Translate the S register
24548 number into a D register number and element index. */
24549 case 'z':
24551 machine_mode mode = GET_MODE (x);
24552 int regno;
24554 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24556 output_operand_lossage ("invalid operand for code '%c'", code);
24557 return;
24560 regno = REGNO (x);
24561 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24563 output_operand_lossage ("invalid operand for code '%c'", code);
24564 return;
24567 regno = regno - FIRST_VFP_REGNUM;
24568 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24570 return;
24572 default:
24573 if (x == 0)
24575 output_operand_lossage ("missing operand");
24576 return;
24579 switch (GET_CODE (x))
24581 case REG:
24582 asm_fprintf (stream, "%r", REGNO (x));
24583 break;
24585 case MEM:
24586 output_address (GET_MODE (x), XEXP (x, 0));
24587 break;
24589 case CONST_DOUBLE:
24591 char fpstr[20];
24592 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24593 sizeof (fpstr), 0, 1);
24594 fprintf (stream, "#%s", fpstr);
24596 break;
24598 default:
24599 gcc_assert (GET_CODE (x) != NEG);
24600 fputc ('#', stream);
24601 if (GET_CODE (x) == HIGH)
24603 fputs (":lower16:", stream);
24604 x = XEXP (x, 0);
24607 output_addr_const (stream, x);
24608 break;
24613 /* Target hook for printing a memory address. */
24614 static void
24615 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24617 if (TARGET_32BIT)
24619 int is_minus = GET_CODE (x) == MINUS;
24621 if (REG_P (x))
24622 asm_fprintf (stream, "[%r]", REGNO (x));
24623 else if (GET_CODE (x) == PLUS || is_minus)
24625 rtx base = XEXP (x, 0);
24626 rtx index = XEXP (x, 1);
24627 HOST_WIDE_INT offset = 0;
24628 if (!REG_P (base)
24629 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24631 /* Ensure that BASE is a register. */
24632 /* (one of them must be). */
24633 /* Also ensure the SP is not used as in index register. */
24634 std::swap (base, index);
24636 switch (GET_CODE (index))
24638 case CONST_INT:
24639 offset = INTVAL (index);
24640 if (is_minus)
24641 offset = -offset;
24642 asm_fprintf (stream, "[%r, #%wd]",
24643 REGNO (base), offset);
24644 break;
24646 case REG:
24647 asm_fprintf (stream, "[%r, %s%r]",
24648 REGNO (base), is_minus ? "-" : "",
24649 REGNO (index));
24650 break;
24652 case MULT:
24653 case ASHIFTRT:
24654 case LSHIFTRT:
24655 case ASHIFT:
24656 case ROTATERT:
24658 asm_fprintf (stream, "[%r, %s%r",
24659 REGNO (base), is_minus ? "-" : "",
24660 REGNO (XEXP (index, 0)));
24661 arm_print_operand (stream, index, 'S');
24662 fputs ("]", stream);
24663 break;
24666 default:
24667 gcc_unreachable ();
24670 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24671 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24673 gcc_assert (REG_P (XEXP (x, 0)));
24675 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24676 asm_fprintf (stream, "[%r, #%s%d]!",
24677 REGNO (XEXP (x, 0)),
24678 GET_CODE (x) == PRE_DEC ? "-" : "",
24679 GET_MODE_SIZE (mode));
24680 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24681 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24682 else
24683 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24684 GET_CODE (x) == POST_DEC ? "-" : "",
24685 GET_MODE_SIZE (mode));
24687 else if (GET_CODE (x) == PRE_MODIFY)
24689 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24690 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24691 asm_fprintf (stream, "#%wd]!",
24692 INTVAL (XEXP (XEXP (x, 1), 1)));
24693 else
24694 asm_fprintf (stream, "%r]!",
24695 REGNO (XEXP (XEXP (x, 1), 1)));
24697 else if (GET_CODE (x) == POST_MODIFY)
24699 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24700 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24701 asm_fprintf (stream, "#%wd",
24702 INTVAL (XEXP (XEXP (x, 1), 1)));
24703 else
24704 asm_fprintf (stream, "%r",
24705 REGNO (XEXP (XEXP (x, 1), 1)));
24707 else output_addr_const (stream, x);
24709 else
24711 if (REG_P (x))
24712 asm_fprintf (stream, "[%r]", REGNO (x));
24713 else if (GET_CODE (x) == POST_INC)
24714 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24715 else if (GET_CODE (x) == PLUS)
24717 gcc_assert (REG_P (XEXP (x, 0)));
24718 if (CONST_INT_P (XEXP (x, 1)))
24719 asm_fprintf (stream, "[%r, #%wd]",
24720 REGNO (XEXP (x, 0)),
24721 INTVAL (XEXP (x, 1)));
24722 else
24723 asm_fprintf (stream, "[%r, %r]",
24724 REGNO (XEXP (x, 0)),
24725 REGNO (XEXP (x, 1)));
24727 else
24728 output_addr_const (stream, x);
24732 /* Target hook for indicating whether a punctuation character for
24733 TARGET_PRINT_OPERAND is valid. */
24734 static bool
24735 arm_print_operand_punct_valid_p (unsigned char code)
24737 return (code == '@' || code == '|' || code == '.'
24738 || code == '(' || code == ')' || code == '#'
24739 || (TARGET_32BIT && (code == '?'))
24740 || (TARGET_THUMB2 && (code == '!'))
24741 || (TARGET_THUMB && (code == '_')));
24744 /* Target hook for assembling integer objects. The ARM version needs to
24745 handle word-sized values specially. */
24746 static bool
24747 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24749 machine_mode mode;
24751 if (size == UNITS_PER_WORD && aligned_p)
24753 fputs ("\t.word\t", asm_out_file);
24754 output_addr_const (asm_out_file, x);
24756 /* Mark symbols as position independent. We only do this in the
24757 .text segment, not in the .data segment. */
24758 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24759 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24761 /* See legitimize_pic_address for an explanation of the
24762 TARGET_VXWORKS_RTP check. */
24763 /* References to weak symbols cannot be resolved locally:
24764 they may be overridden by a non-weak definition at link
24765 time. */
24766 if (!arm_pic_data_is_text_relative
24767 || (SYMBOL_REF_P (x)
24768 && (!SYMBOL_REF_LOCAL_P (x)
24769 || (SYMBOL_REF_DECL (x)
24770 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24771 || (SYMBOL_REF_FUNCTION_P (x)
24772 && !arm_fdpic_local_funcdesc_p (x)))))
24774 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24775 fputs ("(GOTFUNCDESC)", asm_out_file);
24776 else
24777 fputs ("(GOT)", asm_out_file);
24779 else
24781 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24782 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24783 else
24785 bool is_readonly;
24787 if (!TARGET_FDPIC
24788 || arm_is_segment_info_known (x, &is_readonly))
24789 fputs ("(GOTOFF)", asm_out_file);
24790 else
24791 fputs ("(GOT)", asm_out_file);
24796 /* For FDPIC we also have to mark symbol for .data section. */
24797 if (TARGET_FDPIC
24798 && !making_const_table
24799 && SYMBOL_REF_P (x)
24800 && SYMBOL_REF_FUNCTION_P (x))
24801 fputs ("(FUNCDESC)", asm_out_file);
24803 fputc ('\n', asm_out_file);
24804 return true;
24807 mode = GET_MODE (x);
24809 if (arm_vector_mode_supported_p (mode))
24811 int i, units;
24813 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24815 units = CONST_VECTOR_NUNITS (x);
24816 size = GET_MODE_UNIT_SIZE (mode);
24818 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24819 for (i = 0; i < units; i++)
24821 rtx elt = CONST_VECTOR_ELT (x, i);
24822 assemble_integer
24823 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24825 else
24826 for (i = 0; i < units; i++)
24828 rtx elt = CONST_VECTOR_ELT (x, i);
24829 assemble_real
24830 (*CONST_DOUBLE_REAL_VALUE (elt),
24831 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24832 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24835 return true;
24838 return default_assemble_integer (x, size, aligned_p);
24841 static void
24842 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24844 section *s;
24846 if (!TARGET_AAPCS_BASED)
24848 (is_ctor ?
24849 default_named_section_asm_out_constructor
24850 : default_named_section_asm_out_destructor) (symbol, priority);
24851 return;
24854 /* Put these in the .init_array section, using a special relocation. */
24855 if (priority != DEFAULT_INIT_PRIORITY)
24857 char buf[18];
24858 sprintf (buf, "%s.%.5u",
24859 is_ctor ? ".init_array" : ".fini_array",
24860 priority);
24861 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24863 else if (is_ctor)
24864 s = ctors_section;
24865 else
24866 s = dtors_section;
24868 switch_to_section (s);
24869 assemble_align (POINTER_SIZE);
24870 fputs ("\t.word\t", asm_out_file);
24871 output_addr_const (asm_out_file, symbol);
24872 fputs ("(target1)\n", asm_out_file);
24875 /* Add a function to the list of static constructors. */
24877 static void
24878 arm_elf_asm_constructor (rtx symbol, int priority)
24880 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24883 /* Add a function to the list of static destructors. */
24885 static void
24886 arm_elf_asm_destructor (rtx symbol, int priority)
24888 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24891 /* A finite state machine takes care of noticing whether or not instructions
24892 can be conditionally executed, and thus decrease execution time and code
24893 size by deleting branch instructions. The fsm is controlled by
24894 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24896 /* The state of the fsm controlling condition codes are:
24897 0: normal, do nothing special
24898 1: make ASM_OUTPUT_OPCODE not output this instruction
24899 2: make ASM_OUTPUT_OPCODE not output this instruction
24900 3: make instructions conditional
24901 4: make instructions conditional
24903 State transitions (state->state by whom under condition):
24904 0 -> 1 final_prescan_insn if the `target' is a label
24905 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24906 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24907 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24908 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24909 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24910 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24911 (the target insn is arm_target_insn).
24913 If the jump clobbers the conditions then we use states 2 and 4.
24915 A similar thing can be done with conditional return insns.
24917 XXX In case the `target' is an unconditional branch, this conditionalising
24918 of the instructions always reduces code size, but not always execution
24919 time. But then, I want to reduce the code size to somewhere near what
24920 /bin/cc produces. */
24922 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24923 instructions. When a COND_EXEC instruction is seen the subsequent
24924 instructions are scanned so that multiple conditional instructions can be
24925 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24926 specify the length and true/false mask for the IT block. These will be
24927 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24929 /* Returns the index of the ARM condition code string in
24930 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24931 COMPARISON should be an rtx like `(eq (...) (...))'. */
24933 enum arm_cond_code
24934 maybe_get_arm_condition_code (rtx comparison)
24936 machine_mode mode = GET_MODE (XEXP (comparison, 0));
24937 enum arm_cond_code code;
24938 enum rtx_code comp_code = GET_CODE (comparison);
24940 if (GET_MODE_CLASS (mode) != MODE_CC)
24941 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24942 XEXP (comparison, 1));
24944 switch (mode)
24946 case E_CC_DNEmode: code = ARM_NE; goto dominance;
24947 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24948 case E_CC_DGEmode: code = ARM_GE; goto dominance;
24949 case E_CC_DGTmode: code = ARM_GT; goto dominance;
24950 case E_CC_DLEmode: code = ARM_LE; goto dominance;
24951 case E_CC_DLTmode: code = ARM_LT; goto dominance;
24952 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24953 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24954 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24955 case E_CC_DLTUmode: code = ARM_CC;
24957 dominance:
24958 if (comp_code == EQ)
24959 return ARM_INVERSE_CONDITION_CODE (code);
24960 if (comp_code == NE)
24961 return code;
24962 return ARM_NV;
24964 case E_CC_NZmode:
24965 switch (comp_code)
24967 case NE: return ARM_NE;
24968 case EQ: return ARM_EQ;
24969 case GE: return ARM_PL;
24970 case LT: return ARM_MI;
24971 default: return ARM_NV;
24974 case E_CC_Zmode:
24975 switch (comp_code)
24977 case NE: return ARM_NE;
24978 case EQ: return ARM_EQ;
24979 default: return ARM_NV;
24982 case E_CC_Nmode:
24983 switch (comp_code)
24985 case NE: return ARM_MI;
24986 case EQ: return ARM_PL;
24987 default: return ARM_NV;
24990 case E_CCFPEmode:
24991 case E_CCFPmode:
24992 /* We can handle all cases except UNEQ and LTGT. */
24993 switch (comp_code)
24995 case GE: return ARM_GE;
24996 case GT: return ARM_GT;
24997 case LE: return ARM_LS;
24998 case LT: return ARM_MI;
24999 case NE: return ARM_NE;
25000 case EQ: return ARM_EQ;
25001 case ORDERED: return ARM_VC;
25002 case UNORDERED: return ARM_VS;
25003 case UNLT: return ARM_LT;
25004 case UNLE: return ARM_LE;
25005 case UNGT: return ARM_HI;
25006 case UNGE: return ARM_PL;
25007 /* UNEQ and LTGT do not have a representation. */
25008 case UNEQ: /* Fall through. */
25009 case LTGT: /* Fall through. */
25010 default: return ARM_NV;
25013 case E_CC_SWPmode:
25014 switch (comp_code)
25016 case NE: return ARM_NE;
25017 case EQ: return ARM_EQ;
25018 case GE: return ARM_LE;
25019 case GT: return ARM_LT;
25020 case LE: return ARM_GE;
25021 case LT: return ARM_GT;
25022 case GEU: return ARM_LS;
25023 case GTU: return ARM_CC;
25024 case LEU: return ARM_CS;
25025 case LTU: return ARM_HI;
25026 default: return ARM_NV;
25029 case E_CC_Cmode:
25030 switch (comp_code)
25032 case LTU: return ARM_CS;
25033 case GEU: return ARM_CC;
25034 default: return ARM_NV;
25037 case E_CC_NVmode:
25038 switch (comp_code)
25040 case GE: return ARM_GE;
25041 case LT: return ARM_LT;
25042 default: return ARM_NV;
25045 case E_CC_Bmode:
25046 switch (comp_code)
25048 case GEU: return ARM_CS;
25049 case LTU: return ARM_CC;
25050 default: return ARM_NV;
25053 case E_CC_Vmode:
25054 switch (comp_code)
25056 case NE: return ARM_VS;
25057 case EQ: return ARM_VC;
25058 default: return ARM_NV;
25061 case E_CC_ADCmode:
25062 switch (comp_code)
25064 case GEU: return ARM_CS;
25065 case LTU: return ARM_CC;
25066 default: return ARM_NV;
25069 case E_CCmode:
25070 case E_CC_RSBmode:
25071 switch (comp_code)
25073 case NE: return ARM_NE;
25074 case EQ: return ARM_EQ;
25075 case GE: return ARM_GE;
25076 case GT: return ARM_GT;
25077 case LE: return ARM_LE;
25078 case LT: return ARM_LT;
25079 case GEU: return ARM_CS;
25080 case GTU: return ARM_HI;
25081 case LEU: return ARM_LS;
25082 case LTU: return ARM_CC;
25083 default: return ARM_NV;
25086 default: gcc_unreachable ();
25090 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25091 static enum arm_cond_code
25092 get_arm_condition_code (rtx comparison)
25094 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25095 gcc_assert (code != ARM_NV);
25096 return code;
25099 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25100 code registers when not targetting Thumb1. The VFP condition register
25101 only exists when generating hard-float code. */
25102 static bool
25103 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25105 if (!TARGET_32BIT)
25106 return false;
25108 *p1 = CC_REGNUM;
25109 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25110 return true;
25113 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25114 instructions. */
25115 void
25116 thumb2_final_prescan_insn (rtx_insn *insn)
25118 rtx_insn *first_insn = insn;
25119 rtx body = PATTERN (insn);
25120 rtx predicate;
25121 enum arm_cond_code code;
25122 int n;
25123 int mask;
25124 int max;
25126 /* max_insns_skipped in the tune was already taken into account in the
25127 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25128 just emit the IT blocks as we can. It does not make sense to split
25129 the IT blocks. */
25130 max = MAX_INSN_PER_IT_BLOCK;
25132 /* Remove the previous insn from the count of insns to be output. */
25133 if (arm_condexec_count)
25134 arm_condexec_count--;
25136 /* Nothing to do if we are already inside a conditional block. */
25137 if (arm_condexec_count)
25138 return;
25140 if (GET_CODE (body) != COND_EXEC)
25141 return;
25143 /* Conditional jumps are implemented directly. */
25144 if (JUMP_P (insn))
25145 return;
25147 predicate = COND_EXEC_TEST (body);
25148 arm_current_cc = get_arm_condition_code (predicate);
25150 n = get_attr_ce_count (insn);
25151 arm_condexec_count = 1;
25152 arm_condexec_mask = (1 << n) - 1;
25153 arm_condexec_masklen = n;
25154 /* See if subsequent instructions can be combined into the same block. */
25155 for (;;)
25157 insn = next_nonnote_insn (insn);
25159 /* Jumping into the middle of an IT block is illegal, so a label or
25160 barrier terminates the block. */
25161 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25162 break;
25164 body = PATTERN (insn);
25165 /* USE and CLOBBER aren't really insns, so just skip them. */
25166 if (GET_CODE (body) == USE
25167 || GET_CODE (body) == CLOBBER)
25168 continue;
25170 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25171 if (GET_CODE (body) != COND_EXEC)
25172 break;
25173 /* Maximum number of conditionally executed instructions in a block. */
25174 n = get_attr_ce_count (insn);
25175 if (arm_condexec_masklen + n > max)
25176 break;
25178 predicate = COND_EXEC_TEST (body);
25179 code = get_arm_condition_code (predicate);
25180 mask = (1 << n) - 1;
25181 if (arm_current_cc == code)
25182 arm_condexec_mask |= (mask << arm_condexec_masklen);
25183 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25184 break;
25186 arm_condexec_count++;
25187 arm_condexec_masklen += n;
25189 /* A jump must be the last instruction in a conditional block. */
25190 if (JUMP_P (insn))
25191 break;
25193 /* Restore recog_data (getting the attributes of other insns can
25194 destroy this array, but final.cc assumes that it remains intact
25195 across this call). */
25196 extract_constrain_insn_cached (first_insn);
25199 void
25200 arm_final_prescan_insn (rtx_insn *insn)
25202 /* BODY will hold the body of INSN. */
25203 rtx body = PATTERN (insn);
25205 /* This will be 1 if trying to repeat the trick, and things need to be
25206 reversed if it appears to fail. */
25207 int reverse = 0;
25209 /* If we start with a return insn, we only succeed if we find another one. */
25210 int seeking_return = 0;
25211 enum rtx_code return_code = UNKNOWN;
25213 /* START_INSN will hold the insn from where we start looking. This is the
25214 first insn after the following code_label if REVERSE is true. */
25215 rtx_insn *start_insn = insn;
25217 /* If in state 4, check if the target branch is reached, in order to
25218 change back to state 0. */
25219 if (arm_ccfsm_state == 4)
25221 if (insn == arm_target_insn)
25223 arm_target_insn = NULL;
25224 arm_ccfsm_state = 0;
25226 return;
25229 /* If in state 3, it is possible to repeat the trick, if this insn is an
25230 unconditional branch to a label, and immediately following this branch
25231 is the previous target label which is only used once, and the label this
25232 branch jumps to is not too far off. */
25233 if (arm_ccfsm_state == 3)
25235 if (simplejump_p (insn))
25237 start_insn = next_nonnote_insn (start_insn);
25238 if (BARRIER_P (start_insn))
25240 /* XXX Isn't this always a barrier? */
25241 start_insn = next_nonnote_insn (start_insn);
25243 if (LABEL_P (start_insn)
25244 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25245 && LABEL_NUSES (start_insn) == 1)
25246 reverse = TRUE;
25247 else
25248 return;
25250 else if (ANY_RETURN_P (body))
25252 start_insn = next_nonnote_insn (start_insn);
25253 if (BARRIER_P (start_insn))
25254 start_insn = next_nonnote_insn (start_insn);
25255 if (LABEL_P (start_insn)
25256 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25257 && LABEL_NUSES (start_insn) == 1)
25259 reverse = TRUE;
25260 seeking_return = 1;
25261 return_code = GET_CODE (body);
25263 else
25264 return;
25266 else
25267 return;
25270 gcc_assert (!arm_ccfsm_state || reverse);
25271 if (!JUMP_P (insn))
25272 return;
25274 /* This jump might be paralleled with a clobber of the condition codes
25275 the jump should always come first */
25276 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25277 body = XVECEXP (body, 0, 0);
25279 if (reverse
25280 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25281 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25283 int insns_skipped;
25284 int fail = FALSE, succeed = FALSE;
25285 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25286 int then_not_else = TRUE;
25287 rtx_insn *this_insn = start_insn;
25288 rtx label = 0;
25290 /* Register the insn jumped to. */
25291 if (reverse)
25293 if (!seeking_return)
25294 label = XEXP (SET_SRC (body), 0);
25296 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25297 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25298 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25300 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25301 then_not_else = FALSE;
25303 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25305 seeking_return = 1;
25306 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25308 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25310 seeking_return = 1;
25311 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25312 then_not_else = FALSE;
25314 else
25315 gcc_unreachable ();
25317 /* See how many insns this branch skips, and what kind of insns. If all
25318 insns are okay, and the label or unconditional branch to the same
25319 label is not too far away, succeed. */
25320 for (insns_skipped = 0;
25321 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25323 rtx scanbody;
25325 this_insn = next_nonnote_insn (this_insn);
25326 if (!this_insn)
25327 break;
25329 switch (GET_CODE (this_insn))
25331 case CODE_LABEL:
25332 /* Succeed if it is the target label, otherwise fail since
25333 control falls in from somewhere else. */
25334 if (this_insn == label)
25336 arm_ccfsm_state = 1;
25337 succeed = TRUE;
25339 else
25340 fail = TRUE;
25341 break;
25343 case BARRIER:
25344 /* Succeed if the following insn is the target label.
25345 Otherwise fail.
25346 If return insns are used then the last insn in a function
25347 will be a barrier. */
25348 this_insn = next_nonnote_insn (this_insn);
25349 if (this_insn && this_insn == label)
25351 arm_ccfsm_state = 1;
25352 succeed = TRUE;
25354 else
25355 fail = TRUE;
25356 break;
25358 case CALL_INSN:
25359 /* The AAPCS says that conditional calls should not be
25360 used since they make interworking inefficient (the
25361 linker can't transform BL<cond> into BLX). That's
25362 only a problem if the machine has BLX. */
25363 if (arm_arch5t)
25365 fail = TRUE;
25366 break;
25369 /* Succeed if the following insn is the target label, or
25370 if the following two insns are a barrier and the
25371 target label. */
25372 this_insn = next_nonnote_insn (this_insn);
25373 if (this_insn && BARRIER_P (this_insn))
25374 this_insn = next_nonnote_insn (this_insn);
25376 if (this_insn && this_insn == label
25377 && insns_skipped < max_insns_skipped)
25379 arm_ccfsm_state = 1;
25380 succeed = TRUE;
25382 else
25383 fail = TRUE;
25384 break;
25386 case JUMP_INSN:
25387 /* If this is an unconditional branch to the same label, succeed.
25388 If it is to another label, do nothing. If it is conditional,
25389 fail. */
25390 /* XXX Probably, the tests for SET and the PC are
25391 unnecessary. */
25393 scanbody = PATTERN (this_insn);
25394 if (GET_CODE (scanbody) == SET
25395 && GET_CODE (SET_DEST (scanbody)) == PC)
25397 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25398 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25400 arm_ccfsm_state = 2;
25401 succeed = TRUE;
25403 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25404 fail = TRUE;
25406 /* Fail if a conditional return is undesirable (e.g. on a
25407 StrongARM), but still allow this if optimizing for size. */
25408 else if (GET_CODE (scanbody) == return_code
25409 && !use_return_insn (TRUE, NULL)
25410 && !optimize_size)
25411 fail = TRUE;
25412 else if (GET_CODE (scanbody) == return_code)
25414 arm_ccfsm_state = 2;
25415 succeed = TRUE;
25417 else if (GET_CODE (scanbody) == PARALLEL)
25419 switch (get_attr_conds (this_insn))
25421 case CONDS_NOCOND:
25422 break;
25423 default:
25424 fail = TRUE;
25425 break;
25428 else
25429 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25431 break;
25433 case INSN:
25434 /* Instructions using or affecting the condition codes make it
25435 fail. */
25436 scanbody = PATTERN (this_insn);
25437 if (!(GET_CODE (scanbody) == SET
25438 || GET_CODE (scanbody) == PARALLEL)
25439 || get_attr_conds (this_insn) != CONDS_NOCOND)
25440 fail = TRUE;
25441 break;
25443 default:
25444 break;
25447 if (succeed)
25449 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25450 arm_target_label = CODE_LABEL_NUMBER (label);
25451 else
25453 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25455 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25457 this_insn = next_nonnote_insn (this_insn);
25458 gcc_assert (!this_insn
25459 || (!BARRIER_P (this_insn)
25460 && !LABEL_P (this_insn)));
25462 if (!this_insn)
25464 /* Oh, dear! we ran off the end.. give up. */
25465 extract_constrain_insn_cached (insn);
25466 arm_ccfsm_state = 0;
25467 arm_target_insn = NULL;
25468 return;
25470 arm_target_insn = this_insn;
25473 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25474 what it was. */
25475 if (!reverse)
25476 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25478 if (reverse || then_not_else)
25479 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25482 /* Restore recog_data (getting the attributes of other insns can
25483 destroy this array, but final.cc assumes that it remains intact
25484 across this call. */
25485 extract_constrain_insn_cached (insn);
25489 /* Output IT instructions. */
25490 void
25491 thumb2_asm_output_opcode (FILE * stream)
25493 char buff[5];
25494 int n;
25496 if (arm_condexec_mask)
25498 for (n = 0; n < arm_condexec_masklen; n++)
25499 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25500 buff[n] = 0;
25501 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25502 arm_condition_codes[arm_current_cc]);
25503 arm_condexec_mask = 0;
25507 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25508 UNITS_PER_WORD bytes wide. */
25509 static unsigned int
25510 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25512 if (IS_VPR_REGNUM (regno))
25513 return CEIL (GET_MODE_SIZE (mode), 2);
25515 if (TARGET_32BIT
25516 && regno > PC_REGNUM
25517 && regno != FRAME_POINTER_REGNUM
25518 && regno != ARG_POINTER_REGNUM
25519 && !IS_VFP_REGNUM (regno))
25520 return 1;
25522 return ARM_NUM_REGS (mode);
25525 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25526 static bool
25527 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25529 if (GET_MODE_CLASS (mode) == MODE_CC)
25530 return (regno == CC_REGNUM
25531 || (TARGET_VFP_BASE
25532 && regno == VFPCC_REGNUM));
25534 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25535 return false;
25537 if (IS_VPR_REGNUM (regno))
25538 return mode == HImode
25539 || mode == V16BImode
25540 || mode == V8BImode
25541 || mode == V4BImode;
25543 if (TARGET_THUMB1)
25544 /* For the Thumb we only allow values bigger than SImode in
25545 registers 0 - 6, so that there is always a second low
25546 register available to hold the upper part of the value.
25547 We probably we ought to ensure that the register is the
25548 start of an even numbered register pair. */
25549 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25551 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25553 if (mode == DFmode || mode == DImode)
25554 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25556 if (mode == HFmode || mode == BFmode || mode == HImode
25557 || mode == SFmode || mode == SImode)
25558 return VFP_REGNO_OK_FOR_SINGLE (regno);
25560 if (TARGET_NEON)
25561 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25562 || (VALID_NEON_QREG_MODE (mode)
25563 && NEON_REGNO_OK_FOR_QUAD (regno))
25564 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25565 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25566 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25567 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25568 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25569 if (TARGET_HAVE_MVE)
25570 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25571 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25572 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25574 return false;
25577 if (TARGET_REALLY_IWMMXT)
25579 if (IS_IWMMXT_GR_REGNUM (regno))
25580 return mode == SImode;
25582 if (IS_IWMMXT_REGNUM (regno))
25583 return VALID_IWMMXT_REG_MODE (mode);
25586 /* We allow almost any value to be stored in the general registers.
25587 Restrict doubleword quantities to even register pairs in ARM state
25588 so that we can use ldrd. The same restriction applies for MVE
25589 in order to support Armv8.1-M Mainline instructions.
25590 Do not allow very large Neon structure opaque modes in general
25591 registers; they would use too many. */
25592 if (regno <= LAST_ARM_REGNUM)
25594 if (ARM_NUM_REGS (mode) > 4)
25595 return false;
25597 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25598 return true;
25600 return !((TARGET_LDRD || TARGET_CDE)
25601 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25604 if (regno == FRAME_POINTER_REGNUM
25605 || regno == ARG_POINTER_REGNUM)
25606 /* We only allow integers in the fake hard registers. */
25607 return GET_MODE_CLASS (mode) == MODE_INT;
25609 return false;
25612 /* Implement TARGET_MODES_TIEABLE_P. */
25614 static bool
25615 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25617 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25618 return true;
25620 /* We specifically want to allow elements of "structure" modes to
25621 be tieable to the structure. This more general condition allows
25622 other rarer situations too. */
25623 if ((TARGET_NEON
25624 && (VALID_NEON_DREG_MODE (mode1)
25625 || VALID_NEON_QREG_MODE (mode1)
25626 || VALID_NEON_STRUCT_MODE (mode1))
25627 && (VALID_NEON_DREG_MODE (mode2)
25628 || VALID_NEON_QREG_MODE (mode2)
25629 || VALID_NEON_STRUCT_MODE (mode2)))
25630 || (TARGET_HAVE_MVE
25631 && (VALID_MVE_MODE (mode1)
25632 || VALID_MVE_STRUCT_MODE (mode1))
25633 && (VALID_MVE_MODE (mode2)
25634 || VALID_MVE_STRUCT_MODE (mode2))))
25635 return true;
25637 return false;
25640 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25641 not used in arm mode. */
25643 enum reg_class
25644 arm_regno_class (int regno)
25646 if (regno == PC_REGNUM)
25647 return NO_REGS;
25649 if (IS_VPR_REGNUM (regno))
25650 return VPR_REG;
25652 if (TARGET_THUMB1)
25654 if (regno == STACK_POINTER_REGNUM)
25655 return STACK_REG;
25656 if (regno == CC_REGNUM)
25657 return CC_REG;
25658 if (regno < 8)
25659 return LO_REGS;
25660 return HI_REGS;
25663 if (TARGET_THUMB2 && regno < 8)
25664 return LO_REGS;
25666 if ( regno <= LAST_ARM_REGNUM
25667 || regno == FRAME_POINTER_REGNUM
25668 || regno == ARG_POINTER_REGNUM)
25669 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25671 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25672 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25674 if (IS_VFP_REGNUM (regno))
25676 if (regno <= D7_VFP_REGNUM)
25677 return VFP_D0_D7_REGS;
25678 else if (regno <= LAST_LO_VFP_REGNUM)
25679 return VFP_LO_REGS;
25680 else
25681 return VFP_HI_REGS;
25684 if (IS_IWMMXT_REGNUM (regno))
25685 return IWMMXT_REGS;
25687 if (IS_IWMMXT_GR_REGNUM (regno))
25688 return IWMMXT_GR_REGS;
25690 return NO_REGS;
25693 /* Handle a special case when computing the offset
25694 of an argument from the frame pointer. */
25696 arm_debugger_arg_offset (int value, rtx addr)
25698 rtx_insn *insn;
25700 /* We are only interested if dbxout_parms() failed to compute the offset. */
25701 if (value != 0)
25702 return 0;
25704 /* We can only cope with the case where the address is held in a register. */
25705 if (!REG_P (addr))
25706 return 0;
25708 /* If we are using the frame pointer to point at the argument, then
25709 an offset of 0 is correct. */
25710 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25711 return 0;
25713 /* If we are using the stack pointer to point at the
25714 argument, then an offset of 0 is correct. */
25715 /* ??? Check this is consistent with thumb2 frame layout. */
25716 if ((TARGET_THUMB || !frame_pointer_needed)
25717 && REGNO (addr) == SP_REGNUM)
25718 return 0;
25720 /* Oh dear. The argument is pointed to by a register rather
25721 than being held in a register, or being stored at a known
25722 offset from the frame pointer. Since GDB only understands
25723 those two kinds of argument we must translate the address
25724 held in the register into an offset from the frame pointer.
25725 We do this by searching through the insns for the function
25726 looking to see where this register gets its value. If the
25727 register is initialized from the frame pointer plus an offset
25728 then we are in luck and we can continue, otherwise we give up.
25730 This code is exercised by producing debugging information
25731 for a function with arguments like this:
25733 double func (double a, double b, int c, double d) {return d;}
25735 Without this code the stab for parameter 'd' will be set to
25736 an offset of 0 from the frame pointer, rather than 8. */
25738 /* The if() statement says:
25740 If the insn is a normal instruction
25741 and if the insn is setting the value in a register
25742 and if the register being set is the register holding the address of the argument
25743 and if the address is computing by an addition
25744 that involves adding to a register
25745 which is the frame pointer
25746 a constant integer
25748 then... */
25750 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25752 if ( NONJUMP_INSN_P (insn)
25753 && GET_CODE (PATTERN (insn)) == SET
25754 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25755 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25756 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25757 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25758 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25761 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25763 break;
25767 if (value == 0)
25769 debug_rtx (addr);
25770 warning (0, "unable to compute real location of stacked parameter");
25771 value = 8; /* XXX magic hack */
25774 return value;
25777 /* Implement TARGET_PROMOTED_TYPE. */
25779 static tree
25780 arm_promoted_type (const_tree t)
25782 if (SCALAR_FLOAT_TYPE_P (t)
25783 && TYPE_PRECISION (t) == 16
25784 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25785 return float_type_node;
25786 return NULL_TREE;
25789 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25790 This simply adds HFmode as a supported mode; even though we don't
25791 implement arithmetic on this type directly, it's supported by
25792 optabs conversions, much the way the double-word arithmetic is
25793 special-cased in the default hook. */
25795 static bool
25796 arm_scalar_mode_supported_p (scalar_mode mode)
25798 if (mode == HFmode)
25799 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25800 else if (ALL_FIXED_POINT_MODE_P (mode))
25801 return true;
25802 else
25803 return default_scalar_mode_supported_p (mode);
25806 /* Set the value of FLT_EVAL_METHOD.
25807 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25809 0: evaluate all operations and constants, whose semantic type has at
25810 most the range and precision of type float, to the range and
25811 precision of float; evaluate all other operations and constants to
25812 the range and precision of the semantic type;
25814 N, where _FloatN is a supported interchange floating type
25815 evaluate all operations and constants, whose semantic type has at
25816 most the range and precision of _FloatN type, to the range and
25817 precision of the _FloatN type; evaluate all other operations and
25818 constants to the range and precision of the semantic type;
25820 If we have the ARMv8.2-A extensions then we support _Float16 in native
25821 precision, so we should set this to 16. Otherwise, we support the type,
25822 but want to evaluate expressions in float precision, so set this to
25823 0. */
25825 static enum flt_eval_method
25826 arm_excess_precision (enum excess_precision_type type)
25828 switch (type)
25830 case EXCESS_PRECISION_TYPE_FAST:
25831 case EXCESS_PRECISION_TYPE_STANDARD:
25832 /* We can calculate either in 16-bit range and precision or
25833 32-bit range and precision. Make that decision based on whether
25834 we have native support for the ARMv8.2-A 16-bit floating-point
25835 instructions or not. */
25836 return (TARGET_VFP_FP16INST
25837 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25838 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25839 case EXCESS_PRECISION_TYPE_IMPLICIT:
25840 case EXCESS_PRECISION_TYPE_FLOAT16:
25841 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25842 default:
25843 gcc_unreachable ();
25845 return FLT_EVAL_METHOD_UNPREDICTABLE;
25849 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25850 _Float16 if we are using anything other than ieee format for 16-bit
25851 floating point. Otherwise, punt to the default implementation. */
25852 static opt_scalar_float_mode
25853 arm_floatn_mode (int n, bool extended)
25855 if (!extended && n == 16)
25857 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25858 return HFmode;
25859 return opt_scalar_float_mode ();
25862 return default_floatn_mode (n, extended);
25866 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25867 not to early-clobber SRC registers in the process.
25869 We assume that the operands described by SRC and DEST represent a
25870 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25871 number of components into which the copy has been decomposed. */
25872 void
25873 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25875 unsigned int i;
25877 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25878 || REGNO (operands[0]) < REGNO (operands[1]))
25880 for (i = 0; i < count; i++)
25882 operands[2 * i] = dest[i];
25883 operands[2 * i + 1] = src[i];
25886 else
25888 for (i = 0; i < count; i++)
25890 operands[2 * i] = dest[count - i - 1];
25891 operands[2 * i + 1] = src[count - i - 1];
25896 /* Split operands into moves from op[1] + op[2] into op[0]. */
25898 void
25899 neon_split_vcombine (rtx operands[3])
25901 unsigned int dest = REGNO (operands[0]);
25902 unsigned int src1 = REGNO (operands[1]);
25903 unsigned int src2 = REGNO (operands[2]);
25904 machine_mode halfmode = GET_MODE (operands[1]);
25905 unsigned int halfregs = REG_NREGS (operands[1]);
25906 rtx destlo, desthi;
25908 if (src1 == dest && src2 == dest + halfregs)
25910 /* No-op move. Can't split to nothing; emit something. */
25911 emit_note (NOTE_INSN_DELETED);
25912 return;
25915 /* Preserve register attributes for variable tracking. */
25916 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25917 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25918 GET_MODE_SIZE (halfmode));
25920 /* Special case of reversed high/low parts. Use VSWP. */
25921 if (src2 == dest && src1 == dest + halfregs)
25923 rtx x = gen_rtx_SET (destlo, operands[1]);
25924 rtx y = gen_rtx_SET (desthi, operands[2]);
25925 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25926 return;
25929 if (!reg_overlap_mentioned_p (operands[2], destlo))
25931 /* Try to avoid unnecessary moves if part of the result
25932 is in the right place already. */
25933 if (src1 != dest)
25934 emit_move_insn (destlo, operands[1]);
25935 if (src2 != dest + halfregs)
25936 emit_move_insn (desthi, operands[2]);
25938 else
25940 if (src2 != dest + halfregs)
25941 emit_move_insn (desthi, operands[2]);
25942 if (src1 != dest)
25943 emit_move_insn (destlo, operands[1]);
25947 /* Return the number (counting from 0) of
25948 the least significant set bit in MASK. */
25950 inline static int
25951 number_of_first_bit_set (unsigned mask)
25953 return ctz_hwi (mask);
25956 /* Like emit_multi_reg_push, but allowing for a different set of
25957 registers to be described as saved. MASK is the set of registers
25958 to be saved; REAL_REGS is the set of registers to be described as
25959 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25961 static rtx_insn *
25962 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25964 unsigned long regno;
25965 rtx par[10], tmp, reg;
25966 rtx_insn *insn;
25967 int i, j;
25969 /* Build the parallel of the registers actually being stored. */
25970 for (i = 0; mask; ++i, mask &= mask - 1)
25972 regno = ctz_hwi (mask);
25973 reg = gen_rtx_REG (SImode, regno);
25975 if (i == 0)
25976 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25977 else
25978 tmp = gen_rtx_USE (VOIDmode, reg);
25980 par[i] = tmp;
25983 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25984 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25985 tmp = gen_frame_mem (BLKmode, tmp);
25986 tmp = gen_rtx_SET (tmp, par[0]);
25987 par[0] = tmp;
25989 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25990 insn = emit_insn (tmp);
25992 /* Always build the stack adjustment note for unwind info. */
25993 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25994 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25995 par[0] = tmp;
25997 /* Build the parallel of the registers recorded as saved for unwind. */
25998 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26000 regno = ctz_hwi (real_regs);
26001 reg = gen_rtx_REG (SImode, regno);
26003 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26004 tmp = gen_frame_mem (SImode, tmp);
26005 tmp = gen_rtx_SET (tmp, reg);
26006 RTX_FRAME_RELATED_P (tmp) = 1;
26007 par[j + 1] = tmp;
26010 if (j == 0)
26011 tmp = par[0];
26012 else
26014 RTX_FRAME_RELATED_P (par[0]) = 1;
26015 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26018 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26020 return insn;
26023 /* Emit code to push or pop registers to or from the stack. F is the
26024 assembly file. MASK is the registers to pop. */
26025 static void
26026 thumb_pop (FILE *f, unsigned long mask)
26028 int regno;
26029 int lo_mask = mask & 0xFF;
26031 gcc_assert (mask);
26033 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26035 /* Special case. Do not generate a POP PC statement here, do it in
26036 thumb_exit() */
26037 thumb_exit (f, -1);
26038 return;
26041 fprintf (f, "\tpop\t{");
26043 /* Look at the low registers first. */
26044 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26046 if (lo_mask & 1)
26048 asm_fprintf (f, "%r", regno);
26050 if ((lo_mask & ~1) != 0)
26051 fprintf (f, ", ");
26055 if (mask & (1 << PC_REGNUM))
26057 /* Catch popping the PC. */
26058 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26059 || IS_CMSE_ENTRY (arm_current_func_type ()))
26061 /* The PC is never poped directly, instead
26062 it is popped into r3 and then BX is used. */
26063 fprintf (f, "}\n");
26065 thumb_exit (f, -1);
26067 return;
26069 else
26071 if (mask & 0xFF)
26072 fprintf (f, ", ");
26074 asm_fprintf (f, "%r", PC_REGNUM);
26078 fprintf (f, "}\n");
26081 /* Generate code to return from a thumb function.
26082 If 'reg_containing_return_addr' is -1, then the return address is
26083 actually on the stack, at the stack pointer.
26085 Note: do not forget to update length attribute of corresponding insn pattern
26086 when changing assembly output (eg. length attribute of epilogue_insns when
26087 updating Armv8-M Baseline Security Extensions register clearing
26088 sequences). */
26089 static void
26090 thumb_exit (FILE *f, int reg_containing_return_addr)
26092 unsigned regs_available_for_popping;
26093 unsigned regs_to_pop;
26094 int pops_needed;
26095 unsigned available;
26096 unsigned required;
26097 machine_mode mode;
26098 int size;
26099 int restore_a4 = FALSE;
26101 /* Compute the registers we need to pop. */
26102 regs_to_pop = 0;
26103 pops_needed = 0;
26105 if (reg_containing_return_addr == -1)
26107 regs_to_pop |= 1 << LR_REGNUM;
26108 ++pops_needed;
26111 if (TARGET_BACKTRACE)
26113 /* Restore the (ARM) frame pointer and stack pointer. */
26114 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26115 pops_needed += 2;
26118 /* If there is nothing to pop then just emit the BX instruction and
26119 return. */
26120 if (pops_needed == 0)
26122 if (crtl->calls_eh_return)
26123 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26125 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26127 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26128 emitted by cmse_nonsecure_entry_clear_before_return (). */
26129 if (!TARGET_HAVE_FPCXT_CMSE)
26130 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26131 reg_containing_return_addr);
26132 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26134 else
26135 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26136 return;
26138 /* Otherwise if we are not supporting interworking and we have not created
26139 a backtrace structure and the function was not entered in ARM mode then
26140 just pop the return address straight into the PC. */
26141 else if (!TARGET_INTERWORK
26142 && !TARGET_BACKTRACE
26143 && !is_called_in_ARM_mode (current_function_decl)
26144 && !crtl->calls_eh_return
26145 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26147 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26148 return;
26151 /* Find out how many of the (return) argument registers we can corrupt. */
26152 regs_available_for_popping = 0;
26154 /* If returning via __builtin_eh_return, the bottom three registers
26155 all contain information needed for the return. */
26156 if (crtl->calls_eh_return)
26157 size = 12;
26158 else
26160 /* If we can deduce the registers used from the function's
26161 return value. This is more reliable that examining
26162 df_regs_ever_live_p () because that will be set if the register is
26163 ever used in the function, not just if the register is used
26164 to hold a return value. */
26166 if (crtl->return_rtx != 0)
26167 mode = GET_MODE (crtl->return_rtx);
26168 else
26169 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26171 size = GET_MODE_SIZE (mode);
26173 if (size == 0)
26175 /* In a void function we can use any argument register.
26176 In a function that returns a structure on the stack
26177 we can use the second and third argument registers. */
26178 if (mode == VOIDmode)
26179 regs_available_for_popping =
26180 (1 << ARG_REGISTER (1))
26181 | (1 << ARG_REGISTER (2))
26182 | (1 << ARG_REGISTER (3));
26183 else
26184 regs_available_for_popping =
26185 (1 << ARG_REGISTER (2))
26186 | (1 << ARG_REGISTER (3));
26188 else if (size <= 4)
26189 regs_available_for_popping =
26190 (1 << ARG_REGISTER (2))
26191 | (1 << ARG_REGISTER (3));
26192 else if (size <= 8)
26193 regs_available_for_popping =
26194 (1 << ARG_REGISTER (3));
26197 /* Match registers to be popped with registers into which we pop them. */
26198 for (available = regs_available_for_popping,
26199 required = regs_to_pop;
26200 required != 0 && available != 0;
26201 available &= ~(available & - available),
26202 required &= ~(required & - required))
26203 -- pops_needed;
26205 /* If we have any popping registers left over, remove them. */
26206 if (available > 0)
26207 regs_available_for_popping &= ~available;
26209 /* Otherwise if we need another popping register we can use
26210 the fourth argument register. */
26211 else if (pops_needed)
26213 /* If we have not found any free argument registers and
26214 reg a4 contains the return address, we must move it. */
26215 if (regs_available_for_popping == 0
26216 && reg_containing_return_addr == LAST_ARG_REGNUM)
26218 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26219 reg_containing_return_addr = LR_REGNUM;
26221 else if (size > 12)
26223 /* Register a4 is being used to hold part of the return value,
26224 but we have dire need of a free, low register. */
26225 restore_a4 = TRUE;
26227 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26230 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26232 /* The fourth argument register is available. */
26233 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26235 --pops_needed;
26239 /* Pop as many registers as we can. */
26240 thumb_pop (f, regs_available_for_popping);
26242 /* Process the registers we popped. */
26243 if (reg_containing_return_addr == -1)
26245 /* The return address was popped into the lowest numbered register. */
26246 regs_to_pop &= ~(1 << LR_REGNUM);
26248 reg_containing_return_addr =
26249 number_of_first_bit_set (regs_available_for_popping);
26251 /* Remove this register for the mask of available registers, so that
26252 the return address will not be corrupted by further pops. */
26253 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26256 /* If we popped other registers then handle them here. */
26257 if (regs_available_for_popping)
26259 int frame_pointer;
26261 /* Work out which register currently contains the frame pointer. */
26262 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26264 /* Move it into the correct place. */
26265 asm_fprintf (f, "\tmov\t%r, %r\n",
26266 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26268 /* (Temporarily) remove it from the mask of popped registers. */
26269 regs_available_for_popping &= ~(1 << frame_pointer);
26270 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26272 if (regs_available_for_popping)
26274 int stack_pointer;
26276 /* We popped the stack pointer as well,
26277 find the register that contains it. */
26278 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26280 /* Move it into the stack register. */
26281 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26283 /* At this point we have popped all necessary registers, so
26284 do not worry about restoring regs_available_for_popping
26285 to its correct value:
26287 assert (pops_needed == 0)
26288 assert (regs_available_for_popping == (1 << frame_pointer))
26289 assert (regs_to_pop == (1 << STACK_POINTER)) */
26291 else
26293 /* Since we have just move the popped value into the frame
26294 pointer, the popping register is available for reuse, and
26295 we know that we still have the stack pointer left to pop. */
26296 regs_available_for_popping |= (1 << frame_pointer);
26300 /* If we still have registers left on the stack, but we no longer have
26301 any registers into which we can pop them, then we must move the return
26302 address into the link register and make available the register that
26303 contained it. */
26304 if (regs_available_for_popping == 0 && pops_needed > 0)
26306 regs_available_for_popping |= 1 << reg_containing_return_addr;
26308 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26309 reg_containing_return_addr);
26311 reg_containing_return_addr = LR_REGNUM;
26314 /* If we have registers left on the stack then pop some more.
26315 We know that at most we will want to pop FP and SP. */
26316 if (pops_needed > 0)
26318 int popped_into;
26319 int move_to;
26321 thumb_pop (f, regs_available_for_popping);
26323 /* We have popped either FP or SP.
26324 Move whichever one it is into the correct register. */
26325 popped_into = number_of_first_bit_set (regs_available_for_popping);
26326 move_to = number_of_first_bit_set (regs_to_pop);
26328 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26329 --pops_needed;
26332 /* If we still have not popped everything then we must have only
26333 had one register available to us and we are now popping the SP. */
26334 if (pops_needed > 0)
26336 int popped_into;
26338 thumb_pop (f, regs_available_for_popping);
26340 popped_into = number_of_first_bit_set (regs_available_for_popping);
26342 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26344 assert (regs_to_pop == (1 << STACK_POINTER))
26345 assert (pops_needed == 1)
26349 /* If necessary restore the a4 register. */
26350 if (restore_a4)
26352 if (reg_containing_return_addr != LR_REGNUM)
26354 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26355 reg_containing_return_addr = LR_REGNUM;
26358 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26361 if (crtl->calls_eh_return)
26362 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26364 /* Return to caller. */
26365 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26367 /* This is for the cases where LR is not being used to contain the return
26368 address. It may therefore contain information that we might not want
26369 to leak, hence it must be cleared. The value in R0 will never be a
26370 secret at this point, so it is safe to use it, see the clearing code
26371 in cmse_nonsecure_entry_clear_before_return (). */
26372 if (reg_containing_return_addr != LR_REGNUM)
26373 asm_fprintf (f, "\tmov\tlr, r0\n");
26375 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26376 by cmse_nonsecure_entry_clear_before_return (). */
26377 if (!TARGET_HAVE_FPCXT_CMSE)
26378 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26379 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26381 else
26382 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26385 /* Scan INSN just before assembler is output for it.
26386 For Thumb-1, we track the status of the condition codes; this
26387 information is used in the cbranchsi4_insn pattern. */
26388 void
26389 thumb1_final_prescan_insn (rtx_insn *insn)
26391 if (flag_print_asm_name)
26392 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26393 INSN_ADDRESSES (INSN_UID (insn)));
26394 /* Don't overwrite the previous setter when we get to a cbranch. */
26395 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26397 enum attr_conds conds;
26399 if (cfun->machine->thumb1_cc_insn)
26401 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26402 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26403 CC_STATUS_INIT;
26405 conds = get_attr_conds (insn);
26406 if (conds == CONDS_SET)
26408 rtx set = single_set (insn);
26409 cfun->machine->thumb1_cc_insn = insn;
26410 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26411 cfun->machine->thumb1_cc_op1 = const0_rtx;
26412 cfun->machine->thumb1_cc_mode = CC_NZmode;
26413 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26415 rtx src1 = XEXP (SET_SRC (set), 1);
26416 if (src1 == const0_rtx)
26417 cfun->machine->thumb1_cc_mode = CCmode;
26419 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26421 /* Record the src register operand instead of dest because
26422 cprop_hardreg pass propagates src. */
26423 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26426 else if (conds != CONDS_NOCOND)
26427 cfun->machine->thumb1_cc_insn = NULL_RTX;
26430 /* Check if unexpected far jump is used. */
26431 if (cfun->machine->lr_save_eliminated
26432 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26433 internal_error("Unexpected thumb1 far jump");
26437 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26439 unsigned HOST_WIDE_INT mask = 0xff;
26440 int i;
26442 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26443 if (val == 0) /* XXX */
26444 return 0;
26446 for (i = 0; i < 25; i++)
26447 if ((val & (mask << i)) == val)
26448 return 1;
26450 return 0;
26453 /* Returns nonzero if the current function contains,
26454 or might contain a far jump. */
26455 static int
26456 thumb_far_jump_used_p (void)
26458 rtx_insn *insn;
26459 bool far_jump = false;
26460 unsigned int func_size = 0;
26462 /* If we have already decided that far jumps may be used,
26463 do not bother checking again, and always return true even if
26464 it turns out that they are not being used. Once we have made
26465 the decision that far jumps are present (and that hence the link
26466 register will be pushed onto the stack) we cannot go back on it. */
26467 if (cfun->machine->far_jump_used)
26468 return 1;
26470 /* If this function is not being called from the prologue/epilogue
26471 generation code then it must be being called from the
26472 INITIAL_ELIMINATION_OFFSET macro. */
26473 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26475 /* In this case we know that we are being asked about the elimination
26476 of the arg pointer register. If that register is not being used,
26477 then there are no arguments on the stack, and we do not have to
26478 worry that a far jump might force the prologue to push the link
26479 register, changing the stack offsets. In this case we can just
26480 return false, since the presence of far jumps in the function will
26481 not affect stack offsets.
26483 If the arg pointer is live (or if it was live, but has now been
26484 eliminated and so set to dead) then we do have to test to see if
26485 the function might contain a far jump. This test can lead to some
26486 false negatives, since before reload is completed, then length of
26487 branch instructions is not known, so gcc defaults to returning their
26488 longest length, which in turn sets the far jump attribute to true.
26490 A false negative will not result in bad code being generated, but it
26491 will result in a needless push and pop of the link register. We
26492 hope that this does not occur too often.
26494 If we need doubleword stack alignment this could affect the other
26495 elimination offsets so we can't risk getting it wrong. */
26496 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26497 cfun->machine->arg_pointer_live = 1;
26498 else if (!cfun->machine->arg_pointer_live)
26499 return 0;
26502 /* We should not change far_jump_used during or after reload, as there is
26503 no chance to change stack frame layout. */
26504 if (reload_in_progress || reload_completed)
26505 return 0;
26507 /* Check to see if the function contains a branch
26508 insn with the far jump attribute set. */
26509 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26511 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26513 far_jump = true;
26515 func_size += get_attr_length (insn);
26518 /* Attribute far_jump will always be true for thumb1 before
26519 shorten_branch pass. So checking far_jump attribute before
26520 shorten_branch isn't much useful.
26522 Following heuristic tries to estimate more accurately if a far jump
26523 may finally be used. The heuristic is very conservative as there is
26524 no chance to roll-back the decision of not to use far jump.
26526 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26527 2-byte insn is associated with a 4 byte constant pool. Using
26528 function size 2048/3 as the threshold is conservative enough. */
26529 if (far_jump)
26531 if ((func_size * 3) >= 2048)
26533 /* Record the fact that we have decided that
26534 the function does use far jumps. */
26535 cfun->machine->far_jump_used = 1;
26536 return 1;
26540 return 0;
26543 /* Return nonzero if FUNC must be entered in ARM mode. */
26544 static bool
26545 is_called_in_ARM_mode (tree func)
26547 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26549 /* Ignore the problem about functions whose address is taken. */
26550 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26551 return true;
26553 #ifdef ARM_PE
26554 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26555 #else
26556 return false;
26557 #endif
26560 /* Given the stack offsets and register mask in OFFSETS, decide how
26561 many additional registers to push instead of subtracting a constant
26562 from SP. For epilogues the principle is the same except we use pop.
26563 FOR_PROLOGUE indicates which we're generating. */
26564 static int
26565 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26567 HOST_WIDE_INT amount;
26568 unsigned long live_regs_mask = offsets->saved_regs_mask;
26569 /* Extract a mask of the ones we can give to the Thumb's push/pop
26570 instruction. */
26571 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26572 /* Then count how many other high registers will need to be pushed. */
26573 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26574 int n_free, reg_base, size;
26576 if (!for_prologue && frame_pointer_needed)
26577 amount = offsets->locals_base - offsets->saved_regs;
26578 else
26579 amount = offsets->outgoing_args - offsets->saved_regs;
26581 /* If the stack frame size is 512 exactly, we can save one load
26582 instruction, which should make this a win even when optimizing
26583 for speed. */
26584 if (!optimize_size && amount != 512)
26585 return 0;
26587 /* Can't do this if there are high registers to push. */
26588 if (high_regs_pushed != 0)
26589 return 0;
26591 /* Shouldn't do it in the prologue if no registers would normally
26592 be pushed at all. In the epilogue, also allow it if we'll have
26593 a pop insn for the PC. */
26594 if (l_mask == 0
26595 && (for_prologue
26596 || TARGET_BACKTRACE
26597 || (live_regs_mask & 1 << LR_REGNUM) == 0
26598 || TARGET_INTERWORK
26599 || crtl->args.pretend_args_size != 0))
26600 return 0;
26602 /* Don't do this if thumb_expand_prologue wants to emit instructions
26603 between the push and the stack frame allocation. */
26604 if (for_prologue
26605 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26606 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26607 return 0;
26609 reg_base = 0;
26610 n_free = 0;
26611 if (!for_prologue)
26613 size = arm_size_return_regs ();
26614 reg_base = ARM_NUM_INTS (size);
26615 live_regs_mask >>= reg_base;
26618 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26619 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26621 live_regs_mask >>= 1;
26622 n_free++;
26625 if (n_free == 0)
26626 return 0;
26627 gcc_assert (amount / 4 * 4 == amount);
26629 if (amount >= 512 && (amount - n_free * 4) < 512)
26630 return (amount - 508) / 4;
26631 if (amount <= n_free * 4)
26632 return amount / 4;
26633 return 0;
26636 /* The bits which aren't usefully expanded as rtl. */
26637 const char *
26638 thumb1_unexpanded_epilogue (void)
26640 arm_stack_offsets *offsets;
26641 int regno;
26642 unsigned long live_regs_mask = 0;
26643 int high_regs_pushed = 0;
26644 int extra_pop;
26645 int had_to_push_lr;
26646 int size;
26648 if (cfun->machine->return_used_this_function != 0)
26649 return "";
26651 if (IS_NAKED (arm_current_func_type ()))
26652 return "";
26654 offsets = arm_get_frame_offsets ();
26655 live_regs_mask = offsets->saved_regs_mask;
26656 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26658 /* If we can deduce the registers used from the function's return value.
26659 This is more reliable that examining df_regs_ever_live_p () because that
26660 will be set if the register is ever used in the function, not just if
26661 the register is used to hold a return value. */
26662 size = arm_size_return_regs ();
26664 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26665 if (extra_pop > 0)
26667 unsigned long extra_mask = (1 << extra_pop) - 1;
26668 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26671 /* The prolog may have pushed some high registers to use as
26672 work registers. e.g. the testsuite file:
26673 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26674 compiles to produce:
26675 push {r4, r5, r6, r7, lr}
26676 mov r7, r9
26677 mov r6, r8
26678 push {r6, r7}
26679 as part of the prolog. We have to undo that pushing here. */
26681 if (high_regs_pushed)
26683 unsigned long mask = live_regs_mask & 0xff;
26684 int next_hi_reg;
26686 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26688 if (mask == 0)
26689 /* Oh dear! We have no low registers into which we can pop
26690 high registers! */
26691 internal_error
26692 ("no low registers available for popping high registers");
26694 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26695 if (live_regs_mask & (1 << next_hi_reg))
26696 break;
26698 while (high_regs_pushed)
26700 /* Find lo register(s) into which the high register(s) can
26701 be popped. */
26702 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26704 if (mask & (1 << regno))
26705 high_regs_pushed--;
26706 if (high_regs_pushed == 0)
26707 break;
26710 if (high_regs_pushed == 0 && regno >= 0)
26711 mask &= ~((1 << regno) - 1);
26713 /* Pop the values into the low register(s). */
26714 thumb_pop (asm_out_file, mask);
26716 /* Move the value(s) into the high registers. */
26717 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26719 if (mask & (1 << regno))
26721 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26722 regno);
26724 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26725 next_hi_reg--)
26726 if (live_regs_mask & (1 << next_hi_reg))
26727 break;
26731 live_regs_mask &= ~0x0f00;
26734 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26735 live_regs_mask &= 0xff;
26737 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26739 /* Pop the return address into the PC. */
26740 if (had_to_push_lr)
26741 live_regs_mask |= 1 << PC_REGNUM;
26743 /* Either no argument registers were pushed or a backtrace
26744 structure was created which includes an adjusted stack
26745 pointer, so just pop everything. */
26746 if (live_regs_mask)
26747 thumb_pop (asm_out_file, live_regs_mask);
26749 /* We have either just popped the return address into the
26750 PC or it is was kept in LR for the entire function.
26751 Note that thumb_pop has already called thumb_exit if the
26752 PC was in the list. */
26753 if (!had_to_push_lr)
26754 thumb_exit (asm_out_file, LR_REGNUM);
26756 else
26758 /* Pop everything but the return address. */
26759 if (live_regs_mask)
26760 thumb_pop (asm_out_file, live_regs_mask);
26762 if (had_to_push_lr)
26764 if (size > 12)
26766 /* We have no free low regs, so save one. */
26767 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26768 LAST_ARG_REGNUM);
26771 /* Get the return address into a temporary register. */
26772 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26774 if (size > 12)
26776 /* Move the return address to lr. */
26777 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26778 LAST_ARG_REGNUM);
26779 /* Restore the low register. */
26780 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26781 IP_REGNUM);
26782 regno = LR_REGNUM;
26784 else
26785 regno = LAST_ARG_REGNUM;
26787 else
26788 regno = LR_REGNUM;
26790 /* Remove the argument registers that were pushed onto the stack. */
26791 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26792 SP_REGNUM, SP_REGNUM,
26793 crtl->args.pretend_args_size);
26795 thumb_exit (asm_out_file, regno);
26798 return "";
26801 /* Functions to save and restore machine-specific function data. */
26802 static struct machine_function *
26803 arm_init_machine_status (void)
26805 struct machine_function *machine;
26806 machine = ggc_cleared_alloc<machine_function> ();
26808 #if ARM_FT_UNKNOWN != 0
26809 machine->func_type = ARM_FT_UNKNOWN;
26810 #endif
26811 machine->static_chain_stack_bytes = -1;
26812 return machine;
26815 /* Return an RTX indicating where the return address to the
26816 calling function can be found. */
26818 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26820 if (count != 0)
26821 return NULL_RTX;
26823 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26826 /* Do anything needed before RTL is emitted for each function. */
26827 void
26828 arm_init_expanders (void)
26830 /* Arrange to initialize and mark the machine per-function status. */
26831 init_machine_status = arm_init_machine_status;
26833 /* This is to stop the combine pass optimizing away the alignment
26834 adjustment of va_arg. */
26835 /* ??? It is claimed that this should not be necessary. */
26836 if (cfun)
26837 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26840 /* Check that FUNC is called with a different mode. */
26842 bool
26843 arm_change_mode_p (tree func)
26845 if (TREE_CODE (func) != FUNCTION_DECL)
26846 return false;
26848 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26850 if (!callee_tree)
26851 callee_tree = target_option_default_node;
26853 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26854 int flags = callee_opts->x_target_flags;
26856 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26859 /* Like arm_compute_initial_elimination offset. Simpler because there
26860 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26861 to point at the base of the local variables after static stack
26862 space for a function has been allocated. */
26864 HOST_WIDE_INT
26865 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26867 arm_stack_offsets *offsets;
26869 offsets = arm_get_frame_offsets ();
26871 switch (from)
26873 case ARG_POINTER_REGNUM:
26874 switch (to)
26876 case STACK_POINTER_REGNUM:
26877 return offsets->outgoing_args - offsets->saved_args;
26879 case FRAME_POINTER_REGNUM:
26880 return offsets->soft_frame - offsets->saved_args;
26882 case ARM_HARD_FRAME_POINTER_REGNUM:
26883 return offsets->saved_regs - offsets->saved_args;
26885 case THUMB_HARD_FRAME_POINTER_REGNUM:
26886 return offsets->locals_base - offsets->saved_args;
26888 default:
26889 gcc_unreachable ();
26891 break;
26893 case FRAME_POINTER_REGNUM:
26894 switch (to)
26896 case STACK_POINTER_REGNUM:
26897 return offsets->outgoing_args - offsets->soft_frame;
26899 case ARM_HARD_FRAME_POINTER_REGNUM:
26900 return offsets->saved_regs - offsets->soft_frame;
26902 case THUMB_HARD_FRAME_POINTER_REGNUM:
26903 return offsets->locals_base - offsets->soft_frame;
26905 default:
26906 gcc_unreachable ();
26908 break;
26910 default:
26911 gcc_unreachable ();
26915 /* Generate the function's prologue. */
26917 void
26918 thumb1_expand_prologue (void)
26920 rtx_insn *insn;
26922 HOST_WIDE_INT amount;
26923 HOST_WIDE_INT size;
26924 arm_stack_offsets *offsets;
26925 unsigned long func_type;
26926 int regno;
26927 unsigned long live_regs_mask;
26928 unsigned long l_mask;
26929 unsigned high_regs_pushed = 0;
26930 bool lr_needs_saving;
26932 func_type = arm_current_func_type ();
26934 /* Naked functions don't have prologues. */
26935 if (IS_NAKED (func_type))
26937 if (flag_stack_usage_info)
26938 current_function_static_stack_size = 0;
26939 return;
26942 if (IS_INTERRUPT (func_type))
26944 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26945 return;
26948 if (is_called_in_ARM_mode (current_function_decl))
26949 emit_insn (gen_prologue_thumb1_interwork ());
26951 offsets = arm_get_frame_offsets ();
26952 live_regs_mask = offsets->saved_regs_mask;
26953 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26955 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26956 l_mask = live_regs_mask & 0x40ff;
26957 /* Then count how many other high registers will need to be pushed. */
26958 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26960 if (crtl->args.pretend_args_size)
26962 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26964 if (cfun->machine->uses_anonymous_args)
26966 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26967 unsigned long mask;
26969 mask = 1ul << (LAST_ARG_REGNUM + 1);
26970 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26972 insn = thumb1_emit_multi_reg_push (mask, 0);
26974 else
26976 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26977 stack_pointer_rtx, x));
26979 RTX_FRAME_RELATED_P (insn) = 1;
26982 if (TARGET_BACKTRACE)
26984 HOST_WIDE_INT offset = 0;
26985 unsigned work_register;
26986 rtx work_reg, x, arm_hfp_rtx;
26988 /* We have been asked to create a stack backtrace structure.
26989 The code looks like this:
26991 0 .align 2
26992 0 func:
26993 0 sub SP, #16 Reserve space for 4 registers.
26994 2 push {R7} Push low registers.
26995 4 add R7, SP, #20 Get the stack pointer before the push.
26996 6 str R7, [SP, #8] Store the stack pointer
26997 (before reserving the space).
26998 8 mov R7, PC Get hold of the start of this code + 12.
26999 10 str R7, [SP, #16] Store it.
27000 12 mov R7, FP Get hold of the current frame pointer.
27001 14 str R7, [SP, #4] Store it.
27002 16 mov R7, LR Get hold of the current return address.
27003 18 str R7, [SP, #12] Store it.
27004 20 add R7, SP, #16 Point at the start of the
27005 backtrace structure.
27006 22 mov FP, R7 Put this value into the frame pointer. */
27008 work_register = thumb_find_work_register (live_regs_mask);
27009 work_reg = gen_rtx_REG (SImode, work_register);
27010 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27012 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27013 stack_pointer_rtx, GEN_INT (-16)));
27014 RTX_FRAME_RELATED_P (insn) = 1;
27016 if (l_mask)
27018 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27019 RTX_FRAME_RELATED_P (insn) = 1;
27020 lr_needs_saving = false;
27022 offset = bit_count (l_mask) * UNITS_PER_WORD;
27025 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27026 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27028 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27029 x = gen_frame_mem (SImode, x);
27030 emit_move_insn (x, work_reg);
27032 /* Make sure that the instruction fetching the PC is in the right place
27033 to calculate "start of backtrace creation code + 12". */
27034 /* ??? The stores using the common WORK_REG ought to be enough to
27035 prevent the scheduler from doing anything weird. Failing that
27036 we could always move all of the following into an UNSPEC_VOLATILE. */
27037 if (l_mask)
27039 x = gen_rtx_REG (SImode, PC_REGNUM);
27040 emit_move_insn (work_reg, x);
27042 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27043 x = gen_frame_mem (SImode, x);
27044 emit_move_insn (x, work_reg);
27046 emit_move_insn (work_reg, arm_hfp_rtx);
27048 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27049 x = gen_frame_mem (SImode, x);
27050 emit_move_insn (x, work_reg);
27052 else
27054 emit_move_insn (work_reg, arm_hfp_rtx);
27056 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27057 x = gen_frame_mem (SImode, x);
27058 emit_move_insn (x, work_reg);
27060 x = gen_rtx_REG (SImode, PC_REGNUM);
27061 emit_move_insn (work_reg, x);
27063 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27064 x = gen_frame_mem (SImode, x);
27065 emit_move_insn (x, work_reg);
27068 x = gen_rtx_REG (SImode, LR_REGNUM);
27069 emit_move_insn (work_reg, x);
27071 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27072 x = gen_frame_mem (SImode, x);
27073 emit_move_insn (x, work_reg);
27075 x = GEN_INT (offset + 12);
27076 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27078 emit_move_insn (arm_hfp_rtx, work_reg);
27080 /* Optimization: If we are not pushing any low registers but we are going
27081 to push some high registers then delay our first push. This will just
27082 be a push of LR and we can combine it with the push of the first high
27083 register. */
27084 else if ((l_mask & 0xff) != 0
27085 || (high_regs_pushed == 0 && lr_needs_saving))
27087 unsigned long mask = l_mask;
27088 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27089 insn = thumb1_emit_multi_reg_push (mask, mask);
27090 RTX_FRAME_RELATED_P (insn) = 1;
27091 lr_needs_saving = false;
27094 if (high_regs_pushed)
27096 unsigned pushable_regs;
27097 unsigned next_hi_reg;
27098 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27099 : crtl->args.info.nregs;
27100 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27102 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27103 if (live_regs_mask & (1 << next_hi_reg))
27104 break;
27106 /* Here we need to mask out registers used for passing arguments
27107 even if they can be pushed. This is to avoid using them to
27108 stash the high registers. Such kind of stash may clobber the
27109 use of arguments. */
27110 pushable_regs = l_mask & (~arg_regs_mask);
27111 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27113 /* Normally, LR can be used as a scratch register once it has been
27114 saved; but if the function examines its own return address then
27115 the value is still live and we need to avoid using it. */
27116 bool return_addr_live
27117 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27118 LR_REGNUM);
27120 if (lr_needs_saving || return_addr_live)
27121 pushable_regs &= ~(1 << LR_REGNUM);
27123 if (pushable_regs == 0)
27124 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27126 while (high_regs_pushed > 0)
27128 unsigned long real_regs_mask = 0;
27129 unsigned long push_mask = 0;
27131 for (regno = LR_REGNUM; regno >= 0; regno --)
27133 if (pushable_regs & (1 << regno))
27135 emit_move_insn (gen_rtx_REG (SImode, regno),
27136 gen_rtx_REG (SImode, next_hi_reg));
27138 high_regs_pushed --;
27139 real_regs_mask |= (1 << next_hi_reg);
27140 push_mask |= (1 << regno);
27142 if (high_regs_pushed)
27144 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27145 next_hi_reg --)
27146 if (live_regs_mask & (1 << next_hi_reg))
27147 break;
27149 else
27150 break;
27154 /* If we had to find a work register and we have not yet
27155 saved the LR then add it to the list of regs to push. */
27156 if (lr_needs_saving)
27158 push_mask |= 1 << LR_REGNUM;
27159 real_regs_mask |= 1 << LR_REGNUM;
27160 lr_needs_saving = false;
27161 /* If the return address is not live at this point, we
27162 can add LR to the list of registers that we can use
27163 for pushes. */
27164 if (!return_addr_live)
27165 pushable_regs |= 1 << LR_REGNUM;
27168 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27169 RTX_FRAME_RELATED_P (insn) = 1;
27173 /* Load the pic register before setting the frame pointer,
27174 so we can use r7 as a temporary work register. */
27175 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27176 arm_load_pic_register (live_regs_mask, NULL_RTX);
27178 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27179 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27180 stack_pointer_rtx);
27182 size = offsets->outgoing_args - offsets->saved_args;
27183 if (flag_stack_usage_info)
27184 current_function_static_stack_size = size;
27186 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27187 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27188 || flag_stack_clash_protection)
27189 && size)
27190 sorry ("%<-fstack-check=specific%> for Thumb-1");
27192 amount = offsets->outgoing_args - offsets->saved_regs;
27193 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27194 if (amount)
27196 if (amount < 512)
27198 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27199 GEN_INT (- amount)));
27200 RTX_FRAME_RELATED_P (insn) = 1;
27202 else
27204 rtx reg, dwarf;
27206 /* The stack decrement is too big for an immediate value in a single
27207 insn. In theory we could issue multiple subtracts, but after
27208 three of them it becomes more space efficient to place the full
27209 value in the constant pool and load into a register. (Also the
27210 ARM debugger really likes to see only one stack decrement per
27211 function). So instead we look for a scratch register into which
27212 we can load the decrement, and then we subtract this from the
27213 stack pointer. Unfortunately on the thumb the only available
27214 scratch registers are the argument registers, and we cannot use
27215 these as they may hold arguments to the function. Instead we
27216 attempt to locate a call preserved register which is used by this
27217 function. If we can find one, then we know that it will have
27218 been pushed at the start of the prologue and so we can corrupt
27219 it now. */
27220 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27221 if (live_regs_mask & (1 << regno))
27222 break;
27224 gcc_assert(regno <= LAST_LO_REGNUM);
27226 reg = gen_rtx_REG (SImode, regno);
27228 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27230 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27231 stack_pointer_rtx, reg));
27233 dwarf = gen_rtx_SET (stack_pointer_rtx,
27234 plus_constant (Pmode, stack_pointer_rtx,
27235 -amount));
27236 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27237 RTX_FRAME_RELATED_P (insn) = 1;
27241 if (frame_pointer_needed)
27242 thumb_set_frame_pointer (offsets);
27244 /* If we are profiling, make sure no instructions are scheduled before
27245 the call to mcount. Similarly if the user has requested no
27246 scheduling in the prolog. Similarly if we want non-call exceptions
27247 using the EABI unwinder, to prevent faulting instructions from being
27248 swapped with a stack adjustment. */
27249 if (crtl->profile || !TARGET_SCHED_PROLOG
27250 || (arm_except_unwind_info (&global_options) == UI_TARGET
27251 && cfun->can_throw_non_call_exceptions))
27252 emit_insn (gen_blockage ());
27254 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27255 if (live_regs_mask & 0xff)
27256 cfun->machine->lr_save_eliminated = 0;
27259 /* Clear caller saved registers not used to pass return values and leaked
27260 condition flags before exiting a cmse_nonsecure_entry function. */
27262 void
27263 cmse_nonsecure_entry_clear_before_return (void)
27265 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27266 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27267 uint32_t padding_bits_to_clear = 0;
27268 auto_sbitmap to_clear_bitmap (maxregno + 1);
27269 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27270 tree result_type;
27272 bitmap_clear (to_clear_bitmap);
27273 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27274 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27276 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27277 registers. */
27278 if (clear_vfpregs)
27280 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27282 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27284 if (!TARGET_HAVE_FPCXT_CMSE)
27286 /* Make sure we don't clear the two scratch registers used to clear
27287 the relevant FPSCR bits in output_return_instruction. */
27288 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27289 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27290 emit_use (gen_rtx_REG (SImode, 4));
27291 bitmap_clear_bit (to_clear_bitmap, 4);
27295 /* If the user has defined registers to be caller saved, these are no longer
27296 restored by the function before returning and must thus be cleared for
27297 security purposes. */
27298 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27300 /* We do not touch registers that can be used to pass arguments as per
27301 the AAPCS, since these should never be made callee-saved by user
27302 options. */
27303 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27304 continue;
27305 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27306 continue;
27307 if (!callee_saved_reg_p (regno)
27308 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27309 || TARGET_HARD_FLOAT))
27310 bitmap_set_bit (to_clear_bitmap, regno);
27313 /* Make sure we do not clear the registers used to return the result in. */
27314 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27315 if (!VOID_TYPE_P (result_type))
27317 uint64_t to_clear_return_mask;
27318 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27320 /* No need to check that we return in registers, because we don't
27321 support returning on stack yet. */
27322 gcc_assert (REG_P (result_rtl));
27323 to_clear_return_mask
27324 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27325 &padding_bits_to_clear);
27326 if (to_clear_return_mask)
27328 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27329 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27331 if (to_clear_return_mask & (1ULL << regno))
27332 bitmap_clear_bit (to_clear_bitmap, regno);
27337 if (padding_bits_to_clear != 0)
27339 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27340 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27342 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27343 returning a composite type, which only uses r0. Let's make sure that
27344 r1-r3 is cleared too. */
27345 bitmap_clear (to_clear_arg_regs_bitmap);
27346 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27347 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27350 /* Clear full registers that leak before returning. */
27351 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27352 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27353 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27354 clearing_reg);
27357 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27358 POP instruction can be generated. LR should be replaced by PC. All
27359 the checks required are already done by USE_RETURN_INSN (). Hence,
27360 all we really need to check here is if single register is to be
27361 returned, or multiple register return. */
27362 void
27363 thumb2_expand_return (bool simple_return)
27365 int i, num_regs;
27366 unsigned long saved_regs_mask;
27367 arm_stack_offsets *offsets;
27369 offsets = arm_get_frame_offsets ();
27370 saved_regs_mask = offsets->saved_regs_mask;
27372 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27373 if (saved_regs_mask & (1 << i))
27374 num_regs++;
27376 if (!simple_return && saved_regs_mask)
27378 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27379 functions or adapt code to handle according to ACLE. This path should
27380 not be reachable for cmse_nonsecure_entry functions though we prefer
27381 to assert it for now to ensure that future code changes do not silently
27382 change this behavior. */
27383 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27384 if (num_regs == 1)
27386 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27387 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27388 rtx addr = gen_rtx_MEM (SImode,
27389 gen_rtx_POST_INC (SImode,
27390 stack_pointer_rtx));
27391 set_mem_alias_set (addr, get_frame_alias_set ());
27392 XVECEXP (par, 0, 0) = ret_rtx;
27393 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27394 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27395 emit_jump_insn (par);
27397 else
27399 saved_regs_mask &= ~ (1 << LR_REGNUM);
27400 saved_regs_mask |= (1 << PC_REGNUM);
27401 arm_emit_multi_reg_pop (saved_regs_mask);
27404 else
27406 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27407 cmse_nonsecure_entry_clear_before_return ();
27408 emit_jump_insn (simple_return_rtx);
27412 void
27413 thumb1_expand_epilogue (void)
27415 HOST_WIDE_INT amount;
27416 arm_stack_offsets *offsets;
27417 int regno;
27419 /* Naked functions don't have prologues. */
27420 if (IS_NAKED (arm_current_func_type ()))
27421 return;
27423 offsets = arm_get_frame_offsets ();
27424 amount = offsets->outgoing_args - offsets->saved_regs;
27426 if (frame_pointer_needed)
27428 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27429 amount = offsets->locals_base - offsets->saved_regs;
27431 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27433 gcc_assert (amount >= 0);
27434 if (amount)
27436 emit_insn (gen_blockage ());
27438 if (amount < 512)
27439 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27440 GEN_INT (amount)));
27441 else
27443 /* r3 is always free in the epilogue. */
27444 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27446 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27447 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27451 /* Emit a USE (stack_pointer_rtx), so that
27452 the stack adjustment will not be deleted. */
27453 emit_insn (gen_force_register_use (stack_pointer_rtx));
27455 if (crtl->profile || !TARGET_SCHED_PROLOG)
27456 emit_insn (gen_blockage ());
27458 /* Emit a clobber for each insn that will be restored in the epilogue,
27459 so that flow2 will get register lifetimes correct. */
27460 for (regno = 0; regno < 13; regno++)
27461 if (reg_needs_saving_p (regno))
27462 emit_clobber (gen_rtx_REG (SImode, regno));
27464 if (! df_regs_ever_live_p (LR_REGNUM))
27465 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27467 /* Clear all caller-saved regs that are not used to return. */
27468 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27469 cmse_nonsecure_entry_clear_before_return ();
27472 /* Epilogue code for APCS frame. */
27473 static void
27474 arm_expand_epilogue_apcs_frame (bool really_return)
27476 unsigned long func_type;
27477 unsigned long saved_regs_mask;
27478 int num_regs = 0;
27479 int i;
27480 int floats_from_frame = 0;
27481 arm_stack_offsets *offsets;
27483 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27484 func_type = arm_current_func_type ();
27486 /* Get frame offsets for ARM. */
27487 offsets = arm_get_frame_offsets ();
27488 saved_regs_mask = offsets->saved_regs_mask;
27490 /* Find the offset of the floating-point save area in the frame. */
27491 floats_from_frame
27492 = (offsets->saved_args
27493 + arm_compute_static_chain_stack_bytes ()
27494 - offsets->frame);
27496 /* Compute how many core registers saved and how far away the floats are. */
27497 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27498 if (saved_regs_mask & (1 << i))
27500 num_regs++;
27501 floats_from_frame += 4;
27504 if (TARGET_VFP_BASE)
27506 int start_reg;
27507 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27509 /* The offset is from IP_REGNUM. */
27510 int saved_size = arm_get_vfp_saved_size ();
27511 if (saved_size > 0)
27513 rtx_insn *insn;
27514 floats_from_frame += saved_size;
27515 insn = emit_insn (gen_addsi3 (ip_rtx,
27516 hard_frame_pointer_rtx,
27517 GEN_INT (-floats_from_frame)));
27518 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27519 ip_rtx, hard_frame_pointer_rtx);
27522 /* Generate VFP register multi-pop. */
27523 start_reg = FIRST_VFP_REGNUM;
27525 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27526 /* Look for a case where a reg does not need restoring. */
27527 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27529 if (start_reg != i)
27530 arm_emit_vfp_multi_reg_pop (start_reg,
27531 (i - start_reg) / 2,
27532 gen_rtx_REG (SImode,
27533 IP_REGNUM));
27534 start_reg = i + 2;
27537 /* Restore the remaining regs that we have discovered (or possibly
27538 even all of them, if the conditional in the for loop never
27539 fired). */
27540 if (start_reg != i)
27541 arm_emit_vfp_multi_reg_pop (start_reg,
27542 (i - start_reg) / 2,
27543 gen_rtx_REG (SImode, IP_REGNUM));
27546 if (TARGET_IWMMXT)
27548 /* The frame pointer is guaranteed to be non-double-word aligned, as
27549 it is set to double-word-aligned old_stack_pointer - 4. */
27550 rtx_insn *insn;
27551 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27553 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27554 if (reg_needs_saving_p (i))
27556 rtx addr = gen_frame_mem (V2SImode,
27557 plus_constant (Pmode, hard_frame_pointer_rtx,
27558 - lrm_count * 4));
27559 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27560 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27561 gen_rtx_REG (V2SImode, i),
27562 NULL_RTX);
27563 lrm_count += 2;
27567 /* saved_regs_mask should contain IP which contains old stack pointer
27568 at the time of activation creation. Since SP and IP are adjacent registers,
27569 we can restore the value directly into SP. */
27570 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27571 saved_regs_mask &= ~(1 << IP_REGNUM);
27572 saved_regs_mask |= (1 << SP_REGNUM);
27574 /* There are two registers left in saved_regs_mask - LR and PC. We
27575 only need to restore LR (the return address), but to
27576 save time we can load it directly into PC, unless we need a
27577 special function exit sequence, or we are not really returning. */
27578 if (really_return
27579 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27580 && !crtl->calls_eh_return)
27581 /* Delete LR from the register mask, so that LR on
27582 the stack is loaded into the PC in the register mask. */
27583 saved_regs_mask &= ~(1 << LR_REGNUM);
27584 else
27585 saved_regs_mask &= ~(1 << PC_REGNUM);
27587 num_regs = bit_count (saved_regs_mask);
27588 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27590 rtx_insn *insn;
27591 emit_insn (gen_blockage ());
27592 /* Unwind the stack to just below the saved registers. */
27593 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27594 hard_frame_pointer_rtx,
27595 GEN_INT (- 4 * num_regs)));
27597 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27598 stack_pointer_rtx, hard_frame_pointer_rtx);
27601 arm_emit_multi_reg_pop (saved_regs_mask);
27603 if (IS_INTERRUPT (func_type))
27605 /* Interrupt handlers will have pushed the
27606 IP onto the stack, so restore it now. */
27607 rtx_insn *insn;
27608 rtx addr = gen_rtx_MEM (SImode,
27609 gen_rtx_POST_INC (SImode,
27610 stack_pointer_rtx));
27611 set_mem_alias_set (addr, get_frame_alias_set ());
27612 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27613 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27614 gen_rtx_REG (SImode, IP_REGNUM),
27615 NULL_RTX);
27618 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27619 return;
27621 if (crtl->calls_eh_return)
27622 emit_insn (gen_addsi3 (stack_pointer_rtx,
27623 stack_pointer_rtx,
27624 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27626 if (IS_STACKALIGN (func_type))
27627 /* Restore the original stack pointer. Before prologue, the stack was
27628 realigned and the original stack pointer saved in r0. For details,
27629 see comment in arm_expand_prologue. */
27630 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27632 emit_jump_insn (simple_return_rtx);
27635 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27636 function is not a sibcall. */
27637 void
27638 arm_expand_epilogue (bool really_return)
27640 unsigned long func_type;
27641 unsigned long saved_regs_mask;
27642 int num_regs = 0;
27643 int i;
27644 int amount;
27645 arm_stack_offsets *offsets;
27647 func_type = arm_current_func_type ();
27649 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27650 let output_return_instruction take care of instruction emission if any. */
27651 if (IS_NAKED (func_type)
27652 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27654 if (really_return)
27655 emit_jump_insn (simple_return_rtx);
27656 return;
27659 /* If we are throwing an exception, then we really must be doing a
27660 return, so we can't tail-call. */
27661 gcc_assert (!crtl->calls_eh_return || really_return);
27663 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27665 arm_expand_epilogue_apcs_frame (really_return);
27666 return;
27669 /* Get frame offsets for ARM. */
27670 offsets = arm_get_frame_offsets ();
27671 saved_regs_mask = offsets->saved_regs_mask;
27672 num_regs = bit_count (saved_regs_mask);
27674 if (frame_pointer_needed)
27676 rtx_insn *insn;
27677 /* Restore stack pointer if necessary. */
27678 if (TARGET_ARM)
27680 /* In ARM mode, frame pointer points to first saved register.
27681 Restore stack pointer to last saved register. */
27682 amount = offsets->frame - offsets->saved_regs;
27684 /* Force out any pending memory operations that reference stacked data
27685 before stack de-allocation occurs. */
27686 emit_insn (gen_blockage ());
27687 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27688 hard_frame_pointer_rtx,
27689 GEN_INT (amount)));
27690 arm_add_cfa_adjust_cfa_note (insn, amount,
27691 stack_pointer_rtx,
27692 hard_frame_pointer_rtx);
27694 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27695 deleted. */
27696 emit_insn (gen_force_register_use (stack_pointer_rtx));
27698 else
27700 /* In Thumb-2 mode, the frame pointer points to the last saved
27701 register. */
27702 amount = offsets->locals_base - offsets->saved_regs;
27703 if (amount)
27705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27706 hard_frame_pointer_rtx,
27707 GEN_INT (amount)));
27708 arm_add_cfa_adjust_cfa_note (insn, amount,
27709 hard_frame_pointer_rtx,
27710 hard_frame_pointer_rtx);
27713 /* Force out any pending memory operations that reference stacked data
27714 before stack de-allocation occurs. */
27715 emit_insn (gen_blockage ());
27716 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27717 hard_frame_pointer_rtx));
27718 arm_add_cfa_adjust_cfa_note (insn, 0,
27719 stack_pointer_rtx,
27720 hard_frame_pointer_rtx);
27721 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27722 deleted. */
27723 emit_insn (gen_force_register_use (stack_pointer_rtx));
27726 else
27728 /* Pop off outgoing args and local frame to adjust stack pointer to
27729 last saved register. */
27730 amount = offsets->outgoing_args - offsets->saved_regs;
27731 if (amount)
27733 rtx_insn *tmp;
27734 /* Force out any pending memory operations that reference stacked data
27735 before stack de-allocation occurs. */
27736 emit_insn (gen_blockage ());
27737 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27738 stack_pointer_rtx,
27739 GEN_INT (amount)));
27740 arm_add_cfa_adjust_cfa_note (tmp, amount,
27741 stack_pointer_rtx, stack_pointer_rtx);
27742 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27743 not deleted. */
27744 emit_insn (gen_force_register_use (stack_pointer_rtx));
27748 if (TARGET_VFP_BASE)
27750 /* Generate VFP register multi-pop. */
27751 int end_reg = LAST_VFP_REGNUM + 1;
27753 /* Scan the registers in reverse order. We need to match
27754 any groupings made in the prologue and generate matching
27755 vldm operations. The need to match groups is because,
27756 unlike pop, vldm can only do consecutive regs. */
27757 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27758 /* Look for a case where a reg does not need restoring. */
27759 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27761 /* Restore the regs discovered so far (from reg+2 to
27762 end_reg). */
27763 if (end_reg > i + 2)
27764 arm_emit_vfp_multi_reg_pop (i + 2,
27765 (end_reg - (i + 2)) / 2,
27766 stack_pointer_rtx);
27767 end_reg = i;
27770 /* Restore the remaining regs that we have discovered (or possibly
27771 even all of them, if the conditional in the for loop never
27772 fired). */
27773 if (end_reg > i + 2)
27774 arm_emit_vfp_multi_reg_pop (i + 2,
27775 (end_reg - (i + 2)) / 2,
27776 stack_pointer_rtx);
27779 if (TARGET_IWMMXT)
27780 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27781 if (reg_needs_saving_p (i))
27783 rtx_insn *insn;
27784 rtx addr = gen_rtx_MEM (V2SImode,
27785 gen_rtx_POST_INC (SImode,
27786 stack_pointer_rtx));
27787 set_mem_alias_set (addr, get_frame_alias_set ());
27788 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27789 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27790 gen_rtx_REG (V2SImode, i),
27791 NULL_RTX);
27792 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27793 stack_pointer_rtx, stack_pointer_rtx);
27796 if (saved_regs_mask)
27798 rtx insn;
27799 bool return_in_pc = false;
27801 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27802 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27803 && !IS_CMSE_ENTRY (func_type)
27804 && !IS_STACKALIGN (func_type)
27805 && really_return
27806 && crtl->args.pretend_args_size == 0
27807 && saved_regs_mask & (1 << LR_REGNUM)
27808 && !crtl->calls_eh_return)
27810 saved_regs_mask &= ~(1 << LR_REGNUM);
27811 saved_regs_mask |= (1 << PC_REGNUM);
27812 return_in_pc = true;
27815 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27817 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27818 if (saved_regs_mask & (1 << i))
27820 rtx addr = gen_rtx_MEM (SImode,
27821 gen_rtx_POST_INC (SImode,
27822 stack_pointer_rtx));
27823 set_mem_alias_set (addr, get_frame_alias_set ());
27825 if (i == PC_REGNUM)
27827 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27828 XVECEXP (insn, 0, 0) = ret_rtx;
27829 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27830 addr);
27831 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27832 insn = emit_jump_insn (insn);
27834 else
27836 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27837 addr));
27838 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27839 gen_rtx_REG (SImode, i),
27840 NULL_RTX);
27841 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27842 stack_pointer_rtx,
27843 stack_pointer_rtx);
27847 else
27849 if (TARGET_LDRD
27850 && current_tune->prefer_ldrd_strd
27851 && !optimize_function_for_size_p (cfun))
27853 if (TARGET_THUMB2)
27854 thumb2_emit_ldrd_pop (saved_regs_mask);
27855 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27856 arm_emit_ldrd_pop (saved_regs_mask);
27857 else
27858 arm_emit_multi_reg_pop (saved_regs_mask);
27860 else
27861 arm_emit_multi_reg_pop (saved_regs_mask);
27864 if (return_in_pc)
27865 return;
27868 amount
27869 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27870 if (amount)
27872 int i, j;
27873 rtx dwarf = NULL_RTX;
27874 rtx_insn *tmp =
27875 emit_insn (gen_addsi3 (stack_pointer_rtx,
27876 stack_pointer_rtx,
27877 GEN_INT (amount)));
27879 RTX_FRAME_RELATED_P (tmp) = 1;
27881 if (cfun->machine->uses_anonymous_args)
27883 /* Restore pretend args. Refer arm_expand_prologue on how to save
27884 pretend_args in stack. */
27885 int num_regs = crtl->args.pretend_args_size / 4;
27886 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27887 for (j = 0, i = 0; j < num_regs; i++)
27888 if (saved_regs_mask & (1 << i))
27890 rtx reg = gen_rtx_REG (SImode, i);
27891 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27892 j++;
27894 REG_NOTES (tmp) = dwarf;
27896 arm_add_cfa_adjust_cfa_note (tmp, amount,
27897 stack_pointer_rtx, stack_pointer_rtx);
27900 if (IS_CMSE_ENTRY (func_type))
27902 /* CMSE_ENTRY always returns. */
27903 gcc_assert (really_return);
27904 /* Clear all caller-saved regs that are not used to return. */
27905 cmse_nonsecure_entry_clear_before_return ();
27907 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27908 VLDR. */
27909 if (TARGET_HAVE_FPCXT_CMSE)
27911 rtx_insn *insn;
27913 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27914 GEN_INT (FPCXTNS_ENUM)));
27915 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27916 plus_constant (Pmode, stack_pointer_rtx, 4));
27917 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27918 RTX_FRAME_RELATED_P (insn) = 1;
27922 if (!really_return)
27923 return;
27925 if (crtl->calls_eh_return)
27926 emit_insn (gen_addsi3 (stack_pointer_rtx,
27927 stack_pointer_rtx,
27928 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27930 if (IS_STACKALIGN (func_type))
27931 /* Restore the original stack pointer. Before prologue, the stack was
27932 realigned and the original stack pointer saved in r0. For details,
27933 see comment in arm_expand_prologue. */
27934 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27936 emit_jump_insn (simple_return_rtx);
27939 /* Implementation of insn prologue_thumb1_interwork. This is the first
27940 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27942 const char *
27943 thumb1_output_interwork (void)
27945 const char * name;
27946 FILE *f = asm_out_file;
27948 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27949 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27950 == SYMBOL_REF);
27951 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27953 /* Generate code sequence to switch us into Thumb mode. */
27954 /* The .code 32 directive has already been emitted by
27955 ASM_DECLARE_FUNCTION_NAME. */
27956 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27957 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27959 /* Generate a label, so that the debugger will notice the
27960 change in instruction sets. This label is also used by
27961 the assembler to bypass the ARM code when this function
27962 is called from a Thumb encoded function elsewhere in the
27963 same file. Hence the definition of STUB_NAME here must
27964 agree with the definition in gas/config/tc-arm.c. */
27966 #define STUB_NAME ".real_start_of"
27968 fprintf (f, "\t.code\t16\n");
27969 #ifdef ARM_PE
27970 if (arm_dllexport_name_p (name))
27971 name = arm_strip_name_encoding (name);
27972 #endif
27973 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27974 fprintf (f, "\t.thumb_func\n");
27975 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27977 return "";
27980 /* Handle the case of a double word load into a low register from
27981 a computed memory address. The computed address may involve a
27982 register which is overwritten by the load. */
27983 const char *
27984 thumb_load_double_from_address (rtx *operands)
27986 rtx addr;
27987 rtx base;
27988 rtx offset;
27989 rtx arg1;
27990 rtx arg2;
27992 gcc_assert (REG_P (operands[0]));
27993 gcc_assert (MEM_P (operands[1]));
27995 /* Get the memory address. */
27996 addr = XEXP (operands[1], 0);
27998 /* Work out how the memory address is computed. */
27999 switch (GET_CODE (addr))
28001 case REG:
28002 operands[2] = adjust_address (operands[1], SImode, 4);
28004 if (REGNO (operands[0]) == REGNO (addr))
28006 output_asm_insn ("ldr\t%H0, %2", operands);
28007 output_asm_insn ("ldr\t%0, %1", operands);
28009 else
28011 output_asm_insn ("ldr\t%0, %1", operands);
28012 output_asm_insn ("ldr\t%H0, %2", operands);
28014 break;
28016 case CONST:
28017 /* Compute <address> + 4 for the high order load. */
28018 operands[2] = adjust_address (operands[1], SImode, 4);
28020 output_asm_insn ("ldr\t%0, %1", operands);
28021 output_asm_insn ("ldr\t%H0, %2", operands);
28022 break;
28024 case PLUS:
28025 arg1 = XEXP (addr, 0);
28026 arg2 = XEXP (addr, 1);
28028 if (CONSTANT_P (arg1))
28029 base = arg2, offset = arg1;
28030 else
28031 base = arg1, offset = arg2;
28033 gcc_assert (REG_P (base));
28035 /* Catch the case of <address> = <reg> + <reg> */
28036 if (REG_P (offset))
28038 int reg_offset = REGNO (offset);
28039 int reg_base = REGNO (base);
28040 int reg_dest = REGNO (operands[0]);
28042 /* Add the base and offset registers together into the
28043 higher destination register. */
28044 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28045 reg_dest + 1, reg_base, reg_offset);
28047 /* Load the lower destination register from the address in
28048 the higher destination register. */
28049 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28050 reg_dest, reg_dest + 1);
28052 /* Load the higher destination register from its own address
28053 plus 4. */
28054 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28055 reg_dest + 1, reg_dest + 1);
28057 else
28059 /* Compute <address> + 4 for the high order load. */
28060 operands[2] = adjust_address (operands[1], SImode, 4);
28062 /* If the computed address is held in the low order register
28063 then load the high order register first, otherwise always
28064 load the low order register first. */
28065 if (REGNO (operands[0]) == REGNO (base))
28067 output_asm_insn ("ldr\t%H0, %2", operands);
28068 output_asm_insn ("ldr\t%0, %1", operands);
28070 else
28072 output_asm_insn ("ldr\t%0, %1", operands);
28073 output_asm_insn ("ldr\t%H0, %2", operands);
28076 break;
28078 case LABEL_REF:
28079 /* With no registers to worry about we can just load the value
28080 directly. */
28081 operands[2] = adjust_address (operands[1], SImode, 4);
28083 output_asm_insn ("ldr\t%H0, %2", operands);
28084 output_asm_insn ("ldr\t%0, %1", operands);
28085 break;
28087 default:
28088 gcc_unreachable ();
28091 return "";
28094 const char *
28095 thumb_output_move_mem_multiple (int n, rtx *operands)
28097 switch (n)
28099 case 2:
28100 if (REGNO (operands[4]) > REGNO (operands[5]))
28101 std::swap (operands[4], operands[5]);
28103 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28104 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28105 break;
28107 case 3:
28108 if (REGNO (operands[4]) > REGNO (operands[5]))
28109 std::swap (operands[4], operands[5]);
28110 if (REGNO (operands[5]) > REGNO (operands[6]))
28111 std::swap (operands[5], operands[6]);
28112 if (REGNO (operands[4]) > REGNO (operands[5]))
28113 std::swap (operands[4], operands[5]);
28115 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28116 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28117 break;
28119 default:
28120 gcc_unreachable ();
28123 return "";
28126 /* Output a call-via instruction for thumb state. */
28127 const char *
28128 thumb_call_via_reg (rtx reg)
28130 int regno = REGNO (reg);
28131 rtx *labelp;
28133 gcc_assert (regno < LR_REGNUM);
28135 /* If we are in the normal text section we can use a single instance
28136 per compilation unit. If we are doing function sections, then we need
28137 an entry per section, since we can't rely on reachability. */
28138 if (in_section == text_section)
28140 thumb_call_reg_needed = 1;
28142 if (thumb_call_via_label[regno] == NULL)
28143 thumb_call_via_label[regno] = gen_label_rtx ();
28144 labelp = thumb_call_via_label + regno;
28146 else
28148 if (cfun->machine->call_via[regno] == NULL)
28149 cfun->machine->call_via[regno] = gen_label_rtx ();
28150 labelp = cfun->machine->call_via + regno;
28153 output_asm_insn ("bl\t%a0", labelp);
28154 return "";
28157 /* Routines for generating rtl. */
28158 void
28159 thumb_expand_cpymemqi (rtx *operands)
28161 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28162 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28163 HOST_WIDE_INT len = INTVAL (operands[2]);
28164 HOST_WIDE_INT offset = 0;
28166 while (len >= 12)
28168 emit_insn (gen_cpymem12b (out, in, out, in));
28169 len -= 12;
28172 if (len >= 8)
28174 emit_insn (gen_cpymem8b (out, in, out, in));
28175 len -= 8;
28178 if (len >= 4)
28180 rtx reg = gen_reg_rtx (SImode);
28181 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28182 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28183 len -= 4;
28184 offset += 4;
28187 if (len >= 2)
28189 rtx reg = gen_reg_rtx (HImode);
28190 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28191 plus_constant (Pmode, in,
28192 offset))));
28193 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28194 offset)),
28195 reg));
28196 len -= 2;
28197 offset += 2;
28200 if (len)
28202 rtx reg = gen_reg_rtx (QImode);
28203 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28204 plus_constant (Pmode, in,
28205 offset))));
28206 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28207 offset)),
28208 reg));
28212 void
28213 thumb_reload_out_hi (rtx *operands)
28215 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28218 /* Return the length of a function name prefix
28219 that starts with the character 'c'. */
28220 static int
28221 arm_get_strip_length (int c)
28223 switch (c)
28225 ARM_NAME_ENCODING_LENGTHS
28226 default: return 0;
28230 /* Return a pointer to a function's name with any
28231 and all prefix encodings stripped from it. */
28232 const char *
28233 arm_strip_name_encoding (const char *name)
28235 int skip;
28237 while ((skip = arm_get_strip_length (* name)))
28238 name += skip;
28240 return name;
28243 /* If there is a '*' anywhere in the name's prefix, then
28244 emit the stripped name verbatim, otherwise prepend an
28245 underscore if leading underscores are being used. */
28246 void
28247 arm_asm_output_labelref (FILE *stream, const char *name)
28249 int skip;
28250 int verbatim = 0;
28252 while ((skip = arm_get_strip_length (* name)))
28254 verbatim |= (*name == '*');
28255 name += skip;
28258 if (verbatim)
28259 fputs (name, stream);
28260 else
28261 asm_fprintf (stream, "%U%s", name);
28264 /* This function is used to emit an EABI tag and its associated value.
28265 We emit the numerical value of the tag in case the assembler does not
28266 support textual tags. (Eg gas prior to 2.20). If requested we include
28267 the tag name in a comment so that anyone reading the assembler output
28268 will know which tag is being set.
28270 This function is not static because arm-c.cc needs it too. */
28272 void
28273 arm_emit_eabi_attribute (const char *name, int num, int val)
28275 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28276 if (flag_verbose_asm || flag_debug_asm)
28277 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28278 asm_fprintf (asm_out_file, "\n");
28281 /* This function is used to print CPU tuning information as comment
28282 in assembler file. Pointers are not printed for now. */
28284 void
28285 arm_print_tune_info (void)
28287 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28288 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28289 current_tune->constant_limit);
28290 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28291 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28292 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28293 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28294 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28295 "prefetch.l1_cache_size:\t%d\n",
28296 current_tune->prefetch.l1_cache_size);
28297 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28298 "prefetch.l1_cache_line_size:\t%d\n",
28299 current_tune->prefetch.l1_cache_line_size);
28300 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28301 "prefer_constant_pool:\t%d\n",
28302 (int) current_tune->prefer_constant_pool);
28303 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28304 "branch_cost:\t(s:speed, p:predictable)\n");
28305 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28306 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28307 current_tune->branch_cost (false, false));
28308 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28309 current_tune->branch_cost (false, true));
28310 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28311 current_tune->branch_cost (true, false));
28312 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28313 current_tune->branch_cost (true, true));
28314 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28315 "prefer_ldrd_strd:\t%d\n",
28316 (int) current_tune->prefer_ldrd_strd);
28317 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28318 "logical_op_non_short_circuit:\t[%d,%d]\n",
28319 (int) current_tune->logical_op_non_short_circuit_thumb,
28320 (int) current_tune->logical_op_non_short_circuit_arm);
28321 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28322 "disparage_flag_setting_t16_encodings:\t%d\n",
28323 (int) current_tune->disparage_flag_setting_t16_encodings);
28324 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28325 "string_ops_prefer_neon:\t%d\n",
28326 (int) current_tune->string_ops_prefer_neon);
28327 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28328 "max_insns_inline_memset:\t%d\n",
28329 current_tune->max_insns_inline_memset);
28330 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28331 current_tune->fusible_ops);
28332 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28333 (int) current_tune->sched_autopref);
28336 /* The last set of target options used to emit .arch directives, etc. This
28337 could be a function-local static if it were not required to expose it as a
28338 root to the garbage collector. */
28339 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28341 /* Print .arch and .arch_extension directives corresponding to the
28342 current architecture configuration. */
28343 static void
28344 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28346 arm_build_target build_target;
28347 /* If the target options haven't changed since the last time we were called
28348 there is nothing to do. This should be sufficient to suppress the
28349 majority of redundant work. */
28350 if (last_asm_targ_options == targ_options)
28351 return;
28353 last_asm_targ_options = targ_options;
28355 build_target.isa = sbitmap_alloc (isa_num_bits);
28356 arm_configure_build_target (&build_target, targ_options, false);
28358 if (build_target.core_name
28359 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28361 const char* truncated_name
28362 = arm_rewrite_selected_cpu (build_target.core_name);
28363 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28366 const arch_option *arch
28367 = arm_parse_arch_option_name (all_architectures, "-march",
28368 build_target.arch_name);
28369 auto_sbitmap opt_bits (isa_num_bits);
28371 gcc_assert (arch);
28373 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28375 /* Keep backward compatability for assemblers which don't support
28376 armv7ve. Fortunately, none of the following extensions are reset
28377 by a .fpu directive. */
28378 asm_fprintf (stream, "\t.arch armv7-a\n");
28379 asm_fprintf (stream, "\t.arch_extension virt\n");
28380 asm_fprintf (stream, "\t.arch_extension idiv\n");
28381 asm_fprintf (stream, "\t.arch_extension sec\n");
28382 asm_fprintf (stream, "\t.arch_extension mp\n");
28384 else
28385 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28387 /* The .fpu directive will reset any architecture extensions from the
28388 assembler that relate to the fp/vector extensions. So put this out before
28389 any .arch_extension directives. */
28390 const char *fpu_name = (TARGET_SOFT_FLOAT
28391 ? "softvfp"
28392 : arm_identify_fpu_from_isa (build_target.isa));
28393 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28395 if (!arch->common.extensions)
28396 return;
28398 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28399 opt->name != NULL;
28400 opt++)
28402 if (!opt->remove)
28404 arm_initialize_isa (opt_bits, opt->isa_bits);
28406 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28407 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28408 floating point instructions is disabled. So the following check
28409 restricts the printing of ".arch_extension mve" and
28410 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28411 this special behaviour because the feature bit "mve" and
28412 "mve_float" are not part of "fpu bits", so they are not cleared
28413 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28414 TARGET_HAVE_MVE_FLOAT are disabled. */
28415 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28416 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28417 && !TARGET_HAVE_MVE_FLOAT))
28418 continue;
28420 /* If every feature bit of this option is set in the target ISA
28421 specification, print out the option name. However, don't print
28422 anything if all the bits are part of the FPU specification. */
28423 if (bitmap_subset_p (opt_bits, build_target.isa)
28424 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28425 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28430 static void
28431 arm_file_start (void)
28433 int val;
28435 arm_print_asm_arch_directives
28436 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28438 if (TARGET_BPABI)
28440 /* If we have a named cpu, but we the assembler does not support that
28441 name via .cpu, put out a cpu name attribute; but don't do this if the
28442 name starts with the fictitious prefix, 'generic'. */
28443 if (arm_active_target.core_name
28444 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28445 && !startswith (arm_active_target.core_name, "generic"))
28447 const char* truncated_name
28448 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28449 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28450 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28451 truncated_name);
28454 if (print_tune_info)
28455 arm_print_tune_info ();
28457 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28458 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28460 if (TARGET_HARD_FLOAT_ABI)
28461 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28463 /* Some of these attributes only apply when the corresponding features
28464 are used. However we don't have any easy way of figuring this out.
28465 Conservatively record the setting that would have been used. */
28467 if (flag_rounding_math)
28468 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28470 if (!flag_unsafe_math_optimizations)
28472 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28473 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28475 if (flag_signaling_nans)
28476 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28478 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28479 flag_finite_math_only ? 1 : 3);
28481 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28482 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28483 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28484 flag_short_enums ? 1 : 2);
28486 /* Tag_ABI_optimization_goals. */
28487 if (optimize_size)
28488 val = 4;
28489 else if (optimize >= 2)
28490 val = 2;
28491 else if (optimize)
28492 val = 1;
28493 else
28494 val = 6;
28495 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28497 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28498 unaligned_access);
28500 if (arm_fp16_format)
28501 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28502 (int) arm_fp16_format);
28504 if (arm_lang_output_object_attributes_hook)
28505 arm_lang_output_object_attributes_hook();
28508 default_file_start ();
28511 static void
28512 arm_file_end (void)
28514 int regno;
28516 /* Just in case the last function output in the assembler had non-default
28517 architecture directives, we force the assembler state back to the default
28518 set, so that any 'calculated' build attributes are based on the default
28519 options rather than the special options for that function. */
28520 arm_print_asm_arch_directives
28521 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28523 if (NEED_INDICATE_EXEC_STACK)
28524 /* Add .note.GNU-stack. */
28525 file_end_indicate_exec_stack ();
28527 if (! thumb_call_reg_needed)
28528 return;
28530 switch_to_section (text_section);
28531 asm_fprintf (asm_out_file, "\t.code 16\n");
28532 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28534 for (regno = 0; regno < LR_REGNUM; regno++)
28536 rtx label = thumb_call_via_label[regno];
28538 if (label != 0)
28540 targetm.asm_out.internal_label (asm_out_file, "L",
28541 CODE_LABEL_NUMBER (label));
28542 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28547 #ifndef ARM_PE
28548 /* Symbols in the text segment can be accessed without indirecting via the
28549 constant pool; it may take an extra binary operation, but this is still
28550 faster than indirecting via memory. Don't do this when not optimizing,
28551 since we won't be calculating al of the offsets necessary to do this
28552 simplification. */
28554 static void
28555 arm_encode_section_info (tree decl, rtx rtl, int first)
28557 if (optimize > 0 && TREE_CONSTANT (decl))
28558 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28560 default_encode_section_info (decl, rtl, first);
28562 #endif /* !ARM_PE */
28564 static void
28565 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28567 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28568 && !strcmp (prefix, "L"))
28570 arm_ccfsm_state = 0;
28571 arm_target_insn = NULL;
28573 default_internal_label (stream, prefix, labelno);
28576 /* Define classes to generate code as RTL or output asm to a file.
28577 Using templates then allows to use the same code to output code
28578 sequences in the two formats. */
28579 class thumb1_const_rtl
28581 public:
28582 thumb1_const_rtl (rtx dst) : dst (dst) {}
28584 void mov (HOST_WIDE_INT val)
28586 emit_set_insn (dst, GEN_INT (val));
28589 void add (HOST_WIDE_INT val)
28591 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28594 void ashift (HOST_WIDE_INT shift)
28596 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28599 void neg ()
28601 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28604 private:
28605 rtx dst;
28608 class thumb1_const_print
28610 public:
28611 thumb1_const_print (FILE *f, int regno)
28613 t_file = f;
28614 dst_regname = reg_names[regno];
28617 void mov (HOST_WIDE_INT val)
28619 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28620 dst_regname, val);
28623 void add (HOST_WIDE_INT val)
28625 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28626 dst_regname, val);
28629 void ashift (HOST_WIDE_INT shift)
28631 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28632 dst_regname, shift);
28635 void neg ()
28637 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28640 private:
28641 FILE *t_file;
28642 const char *dst_regname;
28645 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28646 Avoid generating useless code when one of the bytes is zero. */
28647 template <class T>
28648 void
28649 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28651 bool mov_done_p = false;
28652 unsigned HOST_WIDE_INT val = op1;
28653 int shift = 0;
28654 int i;
28656 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28658 if (val <= 255)
28660 dst.mov (val);
28661 return;
28664 /* For negative numbers with the first nine bits set, build the
28665 opposite of OP1, then negate it, it's generally shorter and not
28666 longer. */
28667 if ((val & 0xFF800000) == 0xFF800000)
28669 thumb1_gen_const_int_1 (dst, -op1);
28670 dst.neg ();
28671 return;
28674 /* In the general case, we need 7 instructions to build
28675 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28676 do better if VAL is small enough, or
28677 right-shiftable by a suitable amount. If the
28678 right-shift enables to encode at least one less byte,
28679 it's worth it: we save a adds and a lsls at the
28680 expense of a final lsls. */
28681 int final_shift = number_of_first_bit_set (val);
28683 int leading_zeroes = clz_hwi (val);
28684 int number_of_bytes_needed
28685 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28686 / BITS_PER_UNIT) + 1;
28687 int number_of_bytes_needed2
28688 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28689 / BITS_PER_UNIT) + 1;
28691 if (number_of_bytes_needed2 < number_of_bytes_needed)
28692 val >>= final_shift;
28693 else
28694 final_shift = 0;
28696 /* If we are in a very small range, we can use either a single movs
28697 or movs+adds. */
28698 if (val <= 510)
28700 if (val > 255)
28702 unsigned HOST_WIDE_INT high = val - 255;
28704 dst.mov (high);
28705 dst.add (255);
28707 else
28708 dst.mov (val);
28710 if (final_shift > 0)
28711 dst.ashift (final_shift);
28713 else
28715 /* General case, emit upper 3 bytes as needed. */
28716 for (i = 0; i < 3; i++)
28718 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28720 if (byte)
28722 /* We are about to emit new bits, stop accumulating a
28723 shift amount, and left-shift only if we have already
28724 emitted some upper bits. */
28725 if (mov_done_p)
28727 dst.ashift (shift);
28728 dst.add (byte);
28730 else
28731 dst.mov (byte);
28733 /* Stop accumulating shift amount since we've just
28734 emitted some bits. */
28735 shift = 0;
28737 mov_done_p = true;
28740 if (mov_done_p)
28741 shift += 8;
28744 /* Emit lower byte. */
28745 if (!mov_done_p)
28746 dst.mov (val & 0xff);
28747 else
28749 dst.ashift (shift);
28750 if (val & 0xff)
28751 dst.add (val & 0xff);
28754 if (final_shift > 0)
28755 dst.ashift (final_shift);
28759 /* Proxies for thumb1.md, since the thumb1_const_print and
28760 thumb1_const_rtl classes are not exported. */
28761 void
28762 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28764 thumb1_const_rtl t (dst);
28765 thumb1_gen_const_int_1 (t, op1);
28768 void
28769 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28771 thumb1_const_print t (asm_out_file, REGNO (dst));
28772 thumb1_gen_const_int_1 (t, op1);
28775 /* Output code to add DELTA to the first argument, and then jump
28776 to FUNCTION. Used for C++ multiple inheritance. */
28778 static void
28779 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28780 HOST_WIDE_INT, tree function)
28782 static int thunk_label = 0;
28783 char label[256];
28784 char labelpc[256];
28785 int mi_delta = delta;
28786 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28787 int shift = 0;
28788 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28789 ? 1 : 0);
28790 if (mi_delta < 0)
28791 mi_delta = - mi_delta;
28793 final_start_function (emit_barrier (), file, 1);
28795 if (TARGET_THUMB1)
28797 int labelno = thunk_label++;
28798 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28799 /* Thunks are entered in arm mode when available. */
28800 if (TARGET_THUMB1_ONLY)
28802 /* push r3 so we can use it as a temporary. */
28803 /* TODO: Omit this save if r3 is not used. */
28804 fputs ("\tpush {r3}\n", file);
28806 /* With -mpure-code, we cannot load the address from the
28807 constant pool: we build it explicitly. */
28808 if (target_pure_code)
28810 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28811 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28812 fputc ('\n', file);
28813 fputs ("\tlsls r3, #8\n", file);
28814 fputs ("\tadds\tr3, #:upper0_7:#", file);
28815 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28816 fputc ('\n', file);
28817 fputs ("\tlsls r3, #8\n", file);
28818 fputs ("\tadds\tr3, #:lower8_15:#", file);
28819 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28820 fputc ('\n', file);
28821 fputs ("\tlsls r3, #8\n", file);
28822 fputs ("\tadds\tr3, #:lower0_7:#", file);
28823 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28824 fputc ('\n', file);
28826 else
28827 fputs ("\tldr\tr3, ", file);
28829 else
28831 fputs ("\tldr\tr12, ", file);
28834 if (!target_pure_code)
28836 assemble_name (file, label);
28837 fputc ('\n', file);
28840 if (flag_pic)
28842 /* If we are generating PIC, the ldr instruction below loads
28843 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28844 the address of the add + 8, so we have:
28846 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28847 = target + 1.
28849 Note that we have "+ 1" because some versions of GNU ld
28850 don't set the low bit of the result for R_ARM_REL32
28851 relocations against thumb function symbols.
28852 On ARMv6M this is +4, not +8. */
28853 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28854 assemble_name (file, labelpc);
28855 fputs (":\n", file);
28856 if (TARGET_THUMB1_ONLY)
28858 /* This is 2 insns after the start of the thunk, so we know it
28859 is 4-byte aligned. */
28860 fputs ("\tadd\tr3, pc, r3\n", file);
28861 fputs ("\tmov r12, r3\n", file);
28863 else
28864 fputs ("\tadd\tr12, pc, r12\n", file);
28866 else if (TARGET_THUMB1_ONLY)
28867 fputs ("\tmov r12, r3\n", file);
28869 if (TARGET_THUMB1_ONLY)
28871 if (mi_delta > 255)
28873 /* With -mpure-code, we cannot load MI_DELTA from the
28874 constant pool: we build it explicitly. */
28875 if (target_pure_code)
28877 thumb1_const_print r3 (file, 3);
28878 thumb1_gen_const_int_1 (r3, mi_delta);
28880 else
28882 fputs ("\tldr\tr3, ", file);
28883 assemble_name (file, label);
28884 fputs ("+4\n", file);
28886 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28887 mi_op, this_regno, this_regno);
28889 else if (mi_delta != 0)
28891 /* Thumb1 unified syntax requires s suffix in instruction name when
28892 one of the operands is immediate. */
28893 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28894 mi_op, this_regno, this_regno,
28895 mi_delta);
28898 else
28900 /* TODO: Use movw/movt for large constants when available. */
28901 while (mi_delta != 0)
28903 if ((mi_delta & (3 << shift)) == 0)
28904 shift += 2;
28905 else
28907 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28908 mi_op, this_regno, this_regno,
28909 mi_delta & (0xff << shift));
28910 mi_delta &= ~(0xff << shift);
28911 shift += 8;
28915 if (TARGET_THUMB1)
28917 if (TARGET_THUMB1_ONLY)
28918 fputs ("\tpop\t{r3}\n", file);
28920 fprintf (file, "\tbx\tr12\n");
28922 /* With -mpure-code, we don't need to emit literals for the
28923 function address and delta since we emitted code to build
28924 them. */
28925 if (!target_pure_code)
28927 ASM_OUTPUT_ALIGN (file, 2);
28928 assemble_name (file, label);
28929 fputs (":\n", file);
28930 if (flag_pic)
28932 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28933 rtx tem = XEXP (DECL_RTL (function), 0);
28934 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28935 pipeline offset is four rather than eight. Adjust the offset
28936 accordingly. */
28937 tem = plus_constant (GET_MODE (tem), tem,
28938 TARGET_THUMB1_ONLY ? -3 : -7);
28939 tem = gen_rtx_MINUS (GET_MODE (tem),
28940 tem,
28941 gen_rtx_SYMBOL_REF (Pmode,
28942 ggc_strdup (labelpc)));
28943 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28945 else
28946 /* Output ".word .LTHUNKn". */
28947 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28949 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28950 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28953 else
28955 fputs ("\tb\t", file);
28956 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28957 if (NEED_PLT_RELOC)
28958 fputs ("(PLT)", file);
28959 fputc ('\n', file);
28962 final_end_function ();
28965 /* MI thunk handling for TARGET_32BIT. */
28967 static void
28968 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28969 HOST_WIDE_INT vcall_offset, tree function)
28971 const bool long_call_p = arm_is_long_call_p (function);
28973 /* On ARM, this_regno is R0 or R1 depending on
28974 whether the function returns an aggregate or not.
28976 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28977 function)
28978 ? R1_REGNUM : R0_REGNUM);
28980 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28981 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28982 reload_completed = 1;
28983 emit_note (NOTE_INSN_PROLOGUE_END);
28985 /* Add DELTA to THIS_RTX. */
28986 if (delta != 0)
28987 arm_split_constant (PLUS, Pmode, NULL_RTX,
28988 delta, this_rtx, this_rtx, false);
28990 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28991 if (vcall_offset != 0)
28993 /* Load *THIS_RTX. */
28994 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28995 /* Compute *THIS_RTX + VCALL_OFFSET. */
28996 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28997 false);
28998 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
28999 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29000 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29003 /* Generate a tail call to the target function. */
29004 if (!TREE_USED (function))
29006 assemble_external (function);
29007 TREE_USED (function) = 1;
29009 rtx funexp = XEXP (DECL_RTL (function), 0);
29010 if (long_call_p)
29012 emit_move_insn (temp, funexp);
29013 funexp = temp;
29015 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29016 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29017 SIBLING_CALL_P (insn) = 1;
29018 emit_barrier ();
29020 /* Indirect calls require a bit of fixup in PIC mode. */
29021 if (long_call_p)
29023 split_all_insns_noflow ();
29024 arm_reorg ();
29027 insn = get_insns ();
29028 shorten_branches (insn);
29029 final_start_function (insn, file, 1);
29030 final (insn, file, 1);
29031 final_end_function ();
29033 /* Stop pretending this is a post-reload pass. */
29034 reload_completed = 0;
29037 /* Output code to add DELTA to the first argument, and then jump
29038 to FUNCTION. Used for C++ multiple inheritance. */
29040 static void
29041 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29042 HOST_WIDE_INT vcall_offset, tree function)
29044 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29046 assemble_start_function (thunk, fnname);
29047 if (TARGET_32BIT)
29048 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29049 else
29050 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29051 assemble_end_function (thunk, fnname);
29055 arm_emit_vector_const (FILE *file, rtx x)
29057 int i;
29058 const char * pattern;
29060 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29062 switch (GET_MODE (x))
29064 case E_V2SImode: pattern = "%08x"; break;
29065 case E_V4HImode: pattern = "%04x"; break;
29066 case E_V8QImode: pattern = "%02x"; break;
29067 default: gcc_unreachable ();
29070 fprintf (file, "0x");
29071 for (i = CONST_VECTOR_NUNITS (x); i--;)
29073 rtx element;
29075 element = CONST_VECTOR_ELT (x, i);
29076 fprintf (file, pattern, INTVAL (element));
29079 return 1;
29082 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29083 HFmode constant pool entries are actually loaded with ldr. */
29084 void
29085 arm_emit_fp16_const (rtx c)
29087 long bits;
29089 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29090 if (WORDS_BIG_ENDIAN)
29091 assemble_zeros (2);
29092 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29093 if (!WORDS_BIG_ENDIAN)
29094 assemble_zeros (2);
29097 const char *
29098 arm_output_load_gr (rtx *operands)
29100 rtx reg;
29101 rtx offset;
29102 rtx wcgr;
29103 rtx sum;
29105 if (!MEM_P (operands [1])
29106 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29107 || !REG_P (reg = XEXP (sum, 0))
29108 || !CONST_INT_P (offset = XEXP (sum, 1))
29109 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29110 return "wldrw%?\t%0, %1";
29112 /* Fix up an out-of-range load of a GR register. */
29113 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29114 wcgr = operands[0];
29115 operands[0] = reg;
29116 output_asm_insn ("ldr%?\t%0, %1", operands);
29118 operands[0] = wcgr;
29119 operands[1] = reg;
29120 output_asm_insn ("tmcr%?\t%0, %1", operands);
29121 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29123 return "";
29126 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29128 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29129 named arg and all anonymous args onto the stack.
29130 XXX I know the prologue shouldn't be pushing registers, but it is faster
29131 that way. */
29133 static void
29134 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29135 const function_arg_info &arg,
29136 int *pretend_size,
29137 int second_time ATTRIBUTE_UNUSED)
29139 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29140 int nregs;
29142 cfun->machine->uses_anonymous_args = 1;
29143 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29145 nregs = pcum->aapcs_ncrn;
29146 if (nregs & 1)
29148 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29149 if (res < 0 && warn_psabi)
29150 inform (input_location, "parameter passing for argument of "
29151 "type %qT changed in GCC 7.1", arg.type);
29152 else if (res > 0)
29154 nregs++;
29155 if (res > 1 && warn_psabi)
29156 inform (input_location,
29157 "parameter passing for argument of type "
29158 "%qT changed in GCC 9.1", arg.type);
29162 else
29163 nregs = pcum->nregs;
29165 if (nregs < NUM_ARG_REGS)
29166 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29169 /* We can't rely on the caller doing the proper promotion when
29170 using APCS or ATPCS. */
29172 static bool
29173 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29175 return !TARGET_AAPCS_BASED;
29178 static machine_mode
29179 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29180 machine_mode mode,
29181 int *punsignedp ATTRIBUTE_UNUSED,
29182 const_tree fntype ATTRIBUTE_UNUSED,
29183 int for_return ATTRIBUTE_UNUSED)
29185 if (GET_MODE_CLASS (mode) == MODE_INT
29186 && GET_MODE_SIZE (mode) < 4)
29187 return SImode;
29189 return mode;
29193 static bool
29194 arm_default_short_enums (void)
29196 return ARM_DEFAULT_SHORT_ENUMS;
29200 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29202 static bool
29203 arm_align_anon_bitfield (void)
29205 return TARGET_AAPCS_BASED;
29209 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29211 static tree
29212 arm_cxx_guard_type (void)
29214 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29218 /* The EABI says test the least significant bit of a guard variable. */
29220 static bool
29221 arm_cxx_guard_mask_bit (void)
29223 return TARGET_AAPCS_BASED;
29227 /* The EABI specifies that all array cookies are 8 bytes long. */
29229 static tree
29230 arm_get_cookie_size (tree type)
29232 tree size;
29234 if (!TARGET_AAPCS_BASED)
29235 return default_cxx_get_cookie_size (type);
29237 size = build_int_cst (sizetype, 8);
29238 return size;
29242 /* The EABI says that array cookies should also contain the element size. */
29244 static bool
29245 arm_cookie_has_size (void)
29247 return TARGET_AAPCS_BASED;
29251 /* The EABI says constructors and destructors should return a pointer to
29252 the object constructed/destroyed. */
29254 static bool
29255 arm_cxx_cdtor_returns_this (void)
29257 return TARGET_AAPCS_BASED;
29260 /* The EABI says that an inline function may never be the key
29261 method. */
29263 static bool
29264 arm_cxx_key_method_may_be_inline (void)
29266 return !TARGET_AAPCS_BASED;
29269 static void
29270 arm_cxx_determine_class_data_visibility (tree decl)
29272 if (!TARGET_AAPCS_BASED
29273 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29274 return;
29276 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29277 is exported. However, on systems without dynamic vague linkage,
29278 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29279 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29280 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29281 else
29282 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29283 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29286 static bool
29287 arm_cxx_class_data_always_comdat (void)
29289 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29290 vague linkage if the class has no key function. */
29291 return !TARGET_AAPCS_BASED;
29295 /* The EABI says __aeabi_atexit should be used to register static
29296 destructors. */
29298 static bool
29299 arm_cxx_use_aeabi_atexit (void)
29301 return TARGET_AAPCS_BASED;
29305 void
29306 arm_set_return_address (rtx source, rtx scratch)
29308 arm_stack_offsets *offsets;
29309 HOST_WIDE_INT delta;
29310 rtx addr, mem;
29311 unsigned long saved_regs;
29313 offsets = arm_get_frame_offsets ();
29314 saved_regs = offsets->saved_regs_mask;
29316 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29317 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29318 else
29320 if (frame_pointer_needed)
29321 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29322 else
29324 /* LR will be the first saved register. */
29325 delta = offsets->outgoing_args - (offsets->frame + 4);
29328 if (delta >= 4096)
29330 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29331 GEN_INT (delta & ~4095)));
29332 addr = scratch;
29333 delta &= 4095;
29335 else
29336 addr = stack_pointer_rtx;
29338 addr = plus_constant (Pmode, addr, delta);
29341 /* The store needs to be marked to prevent DSE from deleting
29342 it as dead if it is based on fp. */
29343 mem = gen_frame_mem (Pmode, addr);
29344 MEM_VOLATILE_P (mem) = true;
29345 emit_move_insn (mem, source);
29350 void
29351 thumb_set_return_address (rtx source, rtx scratch)
29353 arm_stack_offsets *offsets;
29354 HOST_WIDE_INT delta;
29355 HOST_WIDE_INT limit;
29356 int reg;
29357 rtx addr, mem;
29358 unsigned long mask;
29360 emit_use (source);
29362 offsets = arm_get_frame_offsets ();
29363 mask = offsets->saved_regs_mask;
29364 if (mask & (1 << LR_REGNUM))
29366 limit = 1024;
29367 /* Find the saved regs. */
29368 if (frame_pointer_needed)
29370 delta = offsets->soft_frame - offsets->saved_args;
29371 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29372 if (TARGET_THUMB1)
29373 limit = 128;
29375 else
29377 delta = offsets->outgoing_args - offsets->saved_args;
29378 reg = SP_REGNUM;
29380 /* Allow for the stack frame. */
29381 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29382 delta -= 16;
29383 /* The link register is always the first saved register. */
29384 delta -= 4;
29386 /* Construct the address. */
29387 addr = gen_rtx_REG (SImode, reg);
29388 if (delta > limit)
29390 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29391 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29392 addr = scratch;
29394 else
29395 addr = plus_constant (Pmode, addr, delta);
29397 /* The store needs to be marked to prevent DSE from deleting
29398 it as dead if it is based on fp. */
29399 mem = gen_frame_mem (Pmode, addr);
29400 MEM_VOLATILE_P (mem) = true;
29401 emit_move_insn (mem, source);
29403 else
29404 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29407 /* Implements target hook vector_mode_supported_p. */
29408 bool
29409 arm_vector_mode_supported_p (machine_mode mode)
29411 /* Neon also supports V2SImode, etc. listed in the clause below. */
29412 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29413 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29414 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29415 || mode == V8BFmode))
29416 return true;
29418 if ((TARGET_NEON || TARGET_IWMMXT)
29419 && ((mode == V2SImode)
29420 || (mode == V4HImode)
29421 || (mode == V8QImode)))
29422 return true;
29424 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29425 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29426 || mode == V2HAmode))
29427 return true;
29429 if (TARGET_HAVE_MVE
29430 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29431 || mode == V16QImode
29432 || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29433 return true;
29435 if (TARGET_HAVE_MVE_FLOAT
29436 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29437 return true;
29439 return false;
29442 /* Implements target hook array_mode_supported_p. */
29444 static bool
29445 arm_array_mode_supported_p (machine_mode mode,
29446 unsigned HOST_WIDE_INT nelems)
29448 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29449 for now, as the lane-swapping logic needs to be extended in the expanders.
29450 See PR target/82518. */
29451 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29452 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29453 && (nelems >= 2 && nelems <= 4))
29454 return true;
29456 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29457 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29458 return true;
29460 return false;
29463 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29464 registers when autovectorizing for Neon, at least until multiple vector
29465 widths are supported properly by the middle-end. */
29467 static machine_mode
29468 arm_preferred_simd_mode (scalar_mode mode)
29470 if (TARGET_NEON)
29471 switch (mode)
29473 case E_HFmode:
29474 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29475 case E_SFmode:
29476 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29477 case E_SImode:
29478 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29479 case E_HImode:
29480 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29481 case E_QImode:
29482 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29483 case E_DImode:
29484 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29485 return V2DImode;
29486 break;
29488 default:;
29491 if (TARGET_REALLY_IWMMXT)
29492 switch (mode)
29494 case E_SImode:
29495 return V2SImode;
29496 case E_HImode:
29497 return V4HImode;
29498 case E_QImode:
29499 return V8QImode;
29501 default:;
29504 if (TARGET_HAVE_MVE)
29505 switch (mode)
29507 case E_QImode:
29508 return V16QImode;
29509 case E_HImode:
29510 return V8HImode;
29511 case E_SImode:
29512 return V4SImode;
29514 default:;
29517 if (TARGET_HAVE_MVE_FLOAT)
29518 switch (mode)
29520 case E_HFmode:
29521 return V8HFmode;
29522 case E_SFmode:
29523 return V4SFmode;
29525 default:;
29528 return word_mode;
29531 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29533 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29534 using r0-r4 for function arguments, r7 for the stack frame and don't have
29535 enough left over to do doubleword arithmetic. For Thumb-2 all the
29536 potentially problematic instructions accept high registers so this is not
29537 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29538 that require many low registers. */
29539 static bool
29540 arm_class_likely_spilled_p (reg_class_t rclass)
29542 if ((TARGET_THUMB1 && rclass == LO_REGS)
29543 || rclass == CC_REG)
29544 return true;
29546 return default_class_likely_spilled_p (rclass);
29549 /* Implements target hook small_register_classes_for_mode_p. */
29550 bool
29551 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29553 return TARGET_THUMB1;
29556 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29557 ARM insns and therefore guarantee that the shift count is modulo 256.
29558 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29559 guarantee no particular behavior for out-of-range counts. */
29561 static unsigned HOST_WIDE_INT
29562 arm_shift_truncation_mask (machine_mode mode)
29564 return mode == SImode ? 255 : 0;
29568 /* Map internal gcc register numbers to DWARF2 register numbers. */
29570 unsigned int
29571 arm_debugger_regno (unsigned int regno)
29573 if (regno < 16)
29574 return regno;
29576 if (IS_VFP_REGNUM (regno))
29578 /* See comment in arm_dwarf_register_span. */
29579 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29580 return 64 + regno - FIRST_VFP_REGNUM;
29581 else
29582 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29585 if (IS_IWMMXT_GR_REGNUM (regno))
29586 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29588 if (IS_IWMMXT_REGNUM (regno))
29589 return 112 + regno - FIRST_IWMMXT_REGNUM;
29591 return DWARF_FRAME_REGISTERS;
29594 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29595 GCC models tham as 64 32-bit registers, so we need to describe this to
29596 the DWARF generation code. Other registers can use the default. */
29597 static rtx
29598 arm_dwarf_register_span (rtx rtl)
29600 machine_mode mode;
29601 unsigned regno;
29602 rtx parts[16];
29603 int nregs;
29604 int i;
29606 regno = REGNO (rtl);
29607 if (!IS_VFP_REGNUM (regno))
29608 return NULL_RTX;
29610 /* XXX FIXME: The EABI defines two VFP register ranges:
29611 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29612 256-287: D0-D31
29613 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29614 corresponding D register. Until GDB supports this, we shall use the
29615 legacy encodings. We also use these encodings for D0-D15 for
29616 compatibility with older debuggers. */
29617 mode = GET_MODE (rtl);
29618 if (GET_MODE_SIZE (mode) < 8)
29619 return NULL_RTX;
29621 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29623 nregs = GET_MODE_SIZE (mode) / 4;
29624 for (i = 0; i < nregs; i += 2)
29625 if (TARGET_BIG_END)
29627 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29628 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29630 else
29632 parts[i] = gen_rtx_REG (SImode, regno + i);
29633 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29636 else
29638 nregs = GET_MODE_SIZE (mode) / 8;
29639 for (i = 0; i < nregs; i++)
29640 parts[i] = gen_rtx_REG (DImode, regno + i);
29643 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29646 #if ARM_UNWIND_INFO
29647 /* Emit unwind directives for a store-multiple instruction or stack pointer
29648 push during alignment.
29649 These should only ever be generated by the function prologue code, so
29650 expect them to have a particular form.
29651 The store-multiple instruction sometimes pushes pc as the last register,
29652 although it should not be tracked into unwind information, or for -Os
29653 sometimes pushes some dummy registers before first register that needs
29654 to be tracked in unwind information; such dummy registers are there just
29655 to avoid separate stack adjustment, and will not be restored in the
29656 epilogue. */
29658 static void
29659 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29661 int i;
29662 HOST_WIDE_INT offset;
29663 HOST_WIDE_INT nregs;
29664 int reg_size;
29665 unsigned reg;
29666 unsigned lastreg;
29667 unsigned padfirst = 0, padlast = 0;
29668 rtx e;
29670 e = XVECEXP (p, 0, 0);
29671 gcc_assert (GET_CODE (e) == SET);
29673 /* First insn will adjust the stack pointer. */
29674 gcc_assert (GET_CODE (e) == SET
29675 && REG_P (SET_DEST (e))
29676 && REGNO (SET_DEST (e)) == SP_REGNUM
29677 && GET_CODE (SET_SRC (e)) == PLUS);
29679 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29680 nregs = XVECLEN (p, 0) - 1;
29681 gcc_assert (nregs);
29683 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29684 if (reg < 16)
29686 /* For -Os dummy registers can be pushed at the beginning to
29687 avoid separate stack pointer adjustment. */
29688 e = XVECEXP (p, 0, 1);
29689 e = XEXP (SET_DEST (e), 0);
29690 if (GET_CODE (e) == PLUS)
29691 padfirst = INTVAL (XEXP (e, 1));
29692 gcc_assert (padfirst == 0 || optimize_size);
29693 /* The function prologue may also push pc, but not annotate it as it is
29694 never restored. We turn this into a stack pointer adjustment. */
29695 e = XVECEXP (p, 0, nregs);
29696 e = XEXP (SET_DEST (e), 0);
29697 if (GET_CODE (e) == PLUS)
29698 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29699 else
29700 padlast = offset - 4;
29701 gcc_assert (padlast == 0 || padlast == 4);
29702 if (padlast == 4)
29703 fprintf (out_file, "\t.pad #4\n");
29704 reg_size = 4;
29705 fprintf (out_file, "\t.save {");
29707 else if (IS_VFP_REGNUM (reg))
29709 reg_size = 8;
29710 fprintf (out_file, "\t.vsave {");
29712 else
29713 /* Unknown register type. */
29714 gcc_unreachable ();
29716 /* If the stack increment doesn't match the size of the saved registers,
29717 something has gone horribly wrong. */
29718 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29720 offset = padfirst;
29721 lastreg = 0;
29722 /* The remaining insns will describe the stores. */
29723 for (i = 1; i <= nregs; i++)
29725 /* Expect (set (mem <addr>) (reg)).
29726 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29727 e = XVECEXP (p, 0, i);
29728 gcc_assert (GET_CODE (e) == SET
29729 && MEM_P (SET_DEST (e))
29730 && REG_P (SET_SRC (e)));
29732 reg = REGNO (SET_SRC (e));
29733 gcc_assert (reg >= lastreg);
29735 if (i != 1)
29736 fprintf (out_file, ", ");
29737 /* We can't use %r for vfp because we need to use the
29738 double precision register names. */
29739 if (IS_VFP_REGNUM (reg))
29740 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29741 else
29742 asm_fprintf (out_file, "%r", reg);
29744 if (flag_checking)
29746 /* Check that the addresses are consecutive. */
29747 e = XEXP (SET_DEST (e), 0);
29748 if (GET_CODE (e) == PLUS)
29749 gcc_assert (REG_P (XEXP (e, 0))
29750 && REGNO (XEXP (e, 0)) == SP_REGNUM
29751 && CONST_INT_P (XEXP (e, 1))
29752 && offset == INTVAL (XEXP (e, 1)));
29753 else
29754 gcc_assert (i == 1
29755 && REG_P (e)
29756 && REGNO (e) == SP_REGNUM);
29757 offset += reg_size;
29760 fprintf (out_file, "}\n");
29761 if (padfirst)
29762 fprintf (out_file, "\t.pad #%d\n", padfirst);
29765 /* Emit unwind directives for a SET. */
29767 static void
29768 arm_unwind_emit_set (FILE * out_file, rtx p)
29770 rtx e0;
29771 rtx e1;
29772 unsigned reg;
29774 e0 = XEXP (p, 0);
29775 e1 = XEXP (p, 1);
29776 switch (GET_CODE (e0))
29778 case MEM:
29779 /* Pushing a single register. */
29780 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29781 || !REG_P (XEXP (XEXP (e0, 0), 0))
29782 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29783 abort ();
29785 asm_fprintf (out_file, "\t.save ");
29786 if (IS_VFP_REGNUM (REGNO (e1)))
29787 asm_fprintf(out_file, "{d%d}\n",
29788 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29789 else
29790 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29791 break;
29793 case REG:
29794 if (REGNO (e0) == SP_REGNUM)
29796 /* A stack increment. */
29797 if (GET_CODE (e1) != PLUS
29798 || !REG_P (XEXP (e1, 0))
29799 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29800 || !CONST_INT_P (XEXP (e1, 1)))
29801 abort ();
29803 asm_fprintf (out_file, "\t.pad #%wd\n",
29804 -INTVAL (XEXP (e1, 1)));
29806 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29808 HOST_WIDE_INT offset;
29810 if (GET_CODE (e1) == PLUS)
29812 if (!REG_P (XEXP (e1, 0))
29813 || !CONST_INT_P (XEXP (e1, 1)))
29814 abort ();
29815 reg = REGNO (XEXP (e1, 0));
29816 offset = INTVAL (XEXP (e1, 1));
29817 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29818 HARD_FRAME_POINTER_REGNUM, reg,
29819 offset);
29821 else if (REG_P (e1))
29823 reg = REGNO (e1);
29824 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29825 HARD_FRAME_POINTER_REGNUM, reg);
29827 else
29828 abort ();
29830 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29832 /* Move from sp to reg. */
29833 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29835 else if (GET_CODE (e1) == PLUS
29836 && REG_P (XEXP (e1, 0))
29837 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29838 && CONST_INT_P (XEXP (e1, 1)))
29840 /* Set reg to offset from sp. */
29841 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29842 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29844 else
29845 abort ();
29846 break;
29848 default:
29849 abort ();
29854 /* Emit unwind directives for the given insn. */
29856 static void
29857 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29859 rtx note, pat;
29860 bool handled_one = false;
29862 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29863 return;
29865 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29866 && (TREE_NOTHROW (current_function_decl)
29867 || crtl->all_throwers_are_sibcalls))
29868 return;
29870 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29871 return;
29873 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29875 switch (REG_NOTE_KIND (note))
29877 case REG_FRAME_RELATED_EXPR:
29878 pat = XEXP (note, 0);
29879 goto found;
29881 case REG_CFA_REGISTER:
29882 pat = XEXP (note, 0);
29883 if (pat == NULL)
29885 pat = PATTERN (insn);
29886 if (GET_CODE (pat) == PARALLEL)
29887 pat = XVECEXP (pat, 0, 0);
29890 /* Only emitted for IS_STACKALIGN re-alignment. */
29892 rtx dest, src;
29893 unsigned reg;
29895 src = SET_SRC (pat);
29896 dest = SET_DEST (pat);
29898 gcc_assert (src == stack_pointer_rtx);
29899 reg = REGNO (dest);
29900 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29901 reg + 0x90, reg);
29903 handled_one = true;
29904 break;
29906 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29907 to get correct dwarf information for shrink-wrap. We should not
29908 emit unwind information for it because these are used either for
29909 pretend arguments or notes to adjust sp and restore registers from
29910 stack. */
29911 case REG_CFA_DEF_CFA:
29912 case REG_CFA_ADJUST_CFA:
29913 case REG_CFA_RESTORE:
29914 return;
29916 case REG_CFA_EXPRESSION:
29917 case REG_CFA_OFFSET:
29918 /* ??? Only handling here what we actually emit. */
29919 gcc_unreachable ();
29921 default:
29922 break;
29925 if (handled_one)
29926 return;
29927 pat = PATTERN (insn);
29928 found:
29930 switch (GET_CODE (pat))
29932 case SET:
29933 arm_unwind_emit_set (out_file, pat);
29934 break;
29936 case SEQUENCE:
29937 /* Store multiple. */
29938 arm_unwind_emit_sequence (out_file, pat);
29939 break;
29941 default:
29942 abort();
29947 /* Output a reference from a function exception table to the type_info
29948 object X. The EABI specifies that the symbol should be relocated by
29949 an R_ARM_TARGET2 relocation. */
29951 static bool
29952 arm_output_ttype (rtx x)
29954 fputs ("\t.word\t", asm_out_file);
29955 output_addr_const (asm_out_file, x);
29956 /* Use special relocations for symbol references. */
29957 if (!CONST_INT_P (x))
29958 fputs ("(TARGET2)", asm_out_file);
29959 fputc ('\n', asm_out_file);
29961 return TRUE;
29964 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29966 static void
29967 arm_asm_emit_except_personality (rtx personality)
29969 fputs ("\t.personality\t", asm_out_file);
29970 output_addr_const (asm_out_file, personality);
29971 fputc ('\n', asm_out_file);
29973 #endif /* ARM_UNWIND_INFO */
29975 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29977 static void
29978 arm_asm_init_sections (void)
29980 #if ARM_UNWIND_INFO
29981 exception_section = get_unnamed_section (0, output_section_asm_op,
29982 "\t.handlerdata");
29983 #endif /* ARM_UNWIND_INFO */
29985 #ifdef OBJECT_FORMAT_ELF
29986 if (target_pure_code)
29987 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29988 #endif
29991 /* Output unwind directives for the start/end of a function. */
29993 void
29994 arm_output_fn_unwind (FILE * f, bool prologue)
29996 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29997 return;
29999 if (prologue)
30000 fputs ("\t.fnstart\n", f);
30001 else
30003 /* If this function will never be unwound, then mark it as such.
30004 The came condition is used in arm_unwind_emit to suppress
30005 the frame annotations. */
30006 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30007 && (TREE_NOTHROW (current_function_decl)
30008 || crtl->all_throwers_are_sibcalls))
30009 fputs("\t.cantunwind\n", f);
30011 fputs ("\t.fnend\n", f);
30015 static bool
30016 arm_emit_tls_decoration (FILE *fp, rtx x)
30018 enum tls_reloc reloc;
30019 rtx val;
30021 val = XVECEXP (x, 0, 0);
30022 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30024 output_addr_const (fp, val);
30026 switch (reloc)
30028 case TLS_GD32:
30029 fputs ("(tlsgd)", fp);
30030 break;
30031 case TLS_GD32_FDPIC:
30032 fputs ("(tlsgd_fdpic)", fp);
30033 break;
30034 case TLS_LDM32:
30035 fputs ("(tlsldm)", fp);
30036 break;
30037 case TLS_LDM32_FDPIC:
30038 fputs ("(tlsldm_fdpic)", fp);
30039 break;
30040 case TLS_LDO32:
30041 fputs ("(tlsldo)", fp);
30042 break;
30043 case TLS_IE32:
30044 fputs ("(gottpoff)", fp);
30045 break;
30046 case TLS_IE32_FDPIC:
30047 fputs ("(gottpoff_fdpic)", fp);
30048 break;
30049 case TLS_LE32:
30050 fputs ("(tpoff)", fp);
30051 break;
30052 case TLS_DESCSEQ:
30053 fputs ("(tlsdesc)", fp);
30054 break;
30055 default:
30056 gcc_unreachable ();
30059 switch (reloc)
30061 case TLS_GD32:
30062 case TLS_LDM32:
30063 case TLS_IE32:
30064 case TLS_DESCSEQ:
30065 fputs (" + (. - ", fp);
30066 output_addr_const (fp, XVECEXP (x, 0, 2));
30067 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30068 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30069 output_addr_const (fp, XVECEXP (x, 0, 3));
30070 fputc (')', fp);
30071 break;
30072 default:
30073 break;
30076 return TRUE;
30079 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30081 static void
30082 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30084 gcc_assert (size == 4);
30085 fputs ("\t.word\t", file);
30086 output_addr_const (file, x);
30087 fputs ("(tlsldo)", file);
30090 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30092 static bool
30093 arm_output_addr_const_extra (FILE *fp, rtx x)
30095 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30096 return arm_emit_tls_decoration (fp, x);
30097 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30099 char label[256];
30100 int labelno = INTVAL (XVECEXP (x, 0, 0));
30102 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30103 assemble_name_raw (fp, label);
30105 return TRUE;
30107 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30109 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30110 if (GOT_PCREL)
30111 fputs ("+.", fp);
30112 fputs ("-(", fp);
30113 output_addr_const (fp, XVECEXP (x, 0, 0));
30114 fputc (')', fp);
30115 return TRUE;
30117 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30119 output_addr_const (fp, XVECEXP (x, 0, 0));
30120 if (GOT_PCREL)
30121 fputs ("+.", fp);
30122 fputs ("-(", fp);
30123 output_addr_const (fp, XVECEXP (x, 0, 1));
30124 fputc (')', fp);
30125 return TRUE;
30127 else if (GET_CODE (x) == CONST_VECTOR)
30128 return arm_emit_vector_const (fp, x);
30130 return FALSE;
30133 /* Output assembly for a shift instruction.
30134 SET_FLAGS determines how the instruction modifies the condition codes.
30135 0 - Do not set condition codes.
30136 1 - Set condition codes.
30137 2 - Use smallest instruction. */
30138 const char *
30139 arm_output_shift(rtx * operands, int set_flags)
30141 char pattern[100];
30142 static const char flag_chars[3] = {'?', '.', '!'};
30143 const char *shift;
30144 HOST_WIDE_INT val;
30145 char c;
30147 c = flag_chars[set_flags];
30148 shift = shift_op(operands[3], &val);
30149 if (shift)
30151 if (val != -1)
30152 operands[2] = GEN_INT(val);
30153 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30155 else
30156 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30158 output_asm_insn (pattern, operands);
30159 return "";
30162 /* Output assembly for a WMMX immediate shift instruction. */
30163 const char *
30164 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30166 int shift = INTVAL (operands[2]);
30167 char templ[50];
30168 machine_mode opmode = GET_MODE (operands[0]);
30170 gcc_assert (shift >= 0);
30172 /* If the shift value in the register versions is > 63 (for D qualifier),
30173 31 (for W qualifier) or 15 (for H qualifier). */
30174 if (((opmode == V4HImode) && (shift > 15))
30175 || ((opmode == V2SImode) && (shift > 31))
30176 || ((opmode == DImode) && (shift > 63)))
30178 if (wror_or_wsra)
30180 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30181 output_asm_insn (templ, operands);
30182 if (opmode == DImode)
30184 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30185 output_asm_insn (templ, operands);
30188 else
30190 /* The destination register will contain all zeros. */
30191 sprintf (templ, "wzero\t%%0");
30192 output_asm_insn (templ, operands);
30194 return "";
30197 if ((opmode == DImode) && (shift > 32))
30199 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30200 output_asm_insn (templ, operands);
30201 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30202 output_asm_insn (templ, operands);
30204 else
30206 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30207 output_asm_insn (templ, operands);
30209 return "";
30212 /* Output assembly for a WMMX tinsr instruction. */
30213 const char *
30214 arm_output_iwmmxt_tinsr (rtx *operands)
30216 int mask = INTVAL (operands[3]);
30217 int i;
30218 char templ[50];
30219 int units = mode_nunits[GET_MODE (operands[0])];
30220 gcc_assert ((mask & (mask - 1)) == 0);
30221 for (i = 0; i < units; ++i)
30223 if ((mask & 0x01) == 1)
30225 break;
30227 mask >>= 1;
30229 gcc_assert (i < units);
30231 switch (GET_MODE (operands[0]))
30233 case E_V8QImode:
30234 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30235 break;
30236 case E_V4HImode:
30237 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30238 break;
30239 case E_V2SImode:
30240 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30241 break;
30242 default:
30243 gcc_unreachable ();
30244 break;
30246 output_asm_insn (templ, operands);
30248 return "";
30251 /* Output a Thumb-1 casesi dispatch sequence. */
30252 const char *
30253 thumb1_output_casesi (rtx *operands)
30255 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30257 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30259 switch (GET_MODE(diff_vec))
30261 case E_QImode:
30262 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30263 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30264 case E_HImode:
30265 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30266 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30267 case E_SImode:
30268 return "bl\t%___gnu_thumb1_case_si";
30269 default:
30270 gcc_unreachable ();
30274 /* Output a Thumb-2 casesi instruction. */
30275 const char *
30276 thumb2_output_casesi (rtx *operands)
30278 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30280 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30282 output_asm_insn ("cmp\t%0, %1", operands);
30283 output_asm_insn ("bhi\t%l3", operands);
30284 switch (GET_MODE(diff_vec))
30286 case E_QImode:
30287 return "tbb\t[%|pc, %0]";
30288 case E_HImode:
30289 return "tbh\t[%|pc, %0, lsl #1]";
30290 case E_SImode:
30291 if (flag_pic)
30293 output_asm_insn ("adr\t%4, %l2", operands);
30294 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30295 output_asm_insn ("add\t%4, %4, %5", operands);
30296 return "bx\t%4";
30298 else
30300 output_asm_insn ("adr\t%4, %l2", operands);
30301 return "ldr\t%|pc, [%4, %0, lsl #2]";
30303 default:
30304 gcc_unreachable ();
30308 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30309 per-core tuning structs. */
30310 static int
30311 arm_issue_rate (void)
30313 return current_tune->issue_rate;
30316 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30317 static int
30318 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30320 if (DEBUG_INSN_P (insn))
30321 return more;
30323 rtx_code code = GET_CODE (PATTERN (insn));
30324 if (code == USE || code == CLOBBER)
30325 return more;
30327 if (get_attr_type (insn) == TYPE_NO_INSN)
30328 return more;
30330 return more - 1;
30333 /* Return how many instructions should scheduler lookahead to choose the
30334 best one. */
30335 static int
30336 arm_first_cycle_multipass_dfa_lookahead (void)
30338 int issue_rate = arm_issue_rate ();
30340 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30343 /* Enable modeling of L2 auto-prefetcher. */
30344 static int
30345 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30347 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30350 const char *
30351 arm_mangle_type (const_tree type)
30353 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30354 has to be managled as if it is in the "std" namespace. */
30355 if (TARGET_AAPCS_BASED
30356 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30357 return "St9__va_list";
30359 /* Half-precision floating point types. */
30360 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30362 if (TYPE_MODE (type) == BFmode)
30363 return "u6__bf16";
30364 else
30365 return "Dh";
30368 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30369 builtin type. */
30370 if (TYPE_NAME (type) != NULL)
30371 return arm_mangle_builtin_type (type);
30373 /* Use the default mangling. */
30374 return NULL;
30377 /* Order of allocation of core registers for Thumb: this allocation is
30378 written over the corresponding initial entries of the array
30379 initialized with REG_ALLOC_ORDER. We allocate all low registers
30380 first. Saving and restoring a low register is usually cheaper than
30381 using a call-clobbered high register. */
30383 static const int thumb_core_reg_alloc_order[] =
30385 3, 2, 1, 0, 4, 5, 6, 7,
30386 12, 14, 8, 9, 10, 11
30389 /* Adjust register allocation order when compiling for Thumb. */
30391 void
30392 arm_order_regs_for_local_alloc (void)
30394 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30395 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30396 if (TARGET_THUMB)
30397 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30398 sizeof (thumb_core_reg_alloc_order));
30401 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30403 bool
30404 arm_frame_pointer_required (void)
30406 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30407 return true;
30409 /* If the function receives nonlocal gotos, it needs to save the frame
30410 pointer in the nonlocal_goto_save_area object. */
30411 if (cfun->has_nonlocal_label)
30412 return true;
30414 /* The frame pointer is required for non-leaf APCS frames. */
30415 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30416 return true;
30418 /* If we are probing the stack in the prologue, we will have a faulting
30419 instruction prior to the stack adjustment and this requires a frame
30420 pointer if we want to catch the exception using the EABI unwinder. */
30421 if (!IS_INTERRUPT (arm_current_func_type ())
30422 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30423 || flag_stack_clash_protection)
30424 && arm_except_unwind_info (&global_options) == UI_TARGET
30425 && cfun->can_throw_non_call_exceptions)
30427 HOST_WIDE_INT size = get_frame_size ();
30429 /* That's irrelevant if there is no stack adjustment. */
30430 if (size <= 0)
30431 return false;
30433 /* That's relevant only if there is a stack probe. */
30434 if (crtl->is_leaf && !cfun->calls_alloca)
30436 /* We don't have the final size of the frame so adjust. */
30437 size += 32 * UNITS_PER_WORD;
30438 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30439 return true;
30441 else
30442 return true;
30445 return false;
30448 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30449 All modes except THUMB1 have conditional execution.
30450 If we have conditional arithmetic, return false before reload to
30451 enable some ifcvt transformations. */
30452 static bool
30453 arm_have_conditional_execution (void)
30455 bool has_cond_exec, enable_ifcvt_trans;
30457 /* Only THUMB1 cannot support conditional execution. */
30458 has_cond_exec = !TARGET_THUMB1;
30460 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30461 before reload. */
30462 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30464 return has_cond_exec && !enable_ifcvt_trans;
30467 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30468 static HOST_WIDE_INT
30469 arm_vector_alignment (const_tree type)
30471 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30473 if (TARGET_AAPCS_BASED)
30474 align = MIN (align, 64);
30476 return align;
30479 static unsigned int
30480 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30482 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30484 modes->safe_push (V16QImode);
30485 modes->safe_push (V8QImode);
30487 return 0;
30490 static bool
30491 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30493 /* Vectors which aren't in packed structures will not be less aligned than
30494 the natural alignment of their element type, so this is safe. */
30495 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30496 return !is_packed;
30498 return default_builtin_vector_alignment_reachable (type, is_packed);
30501 static bool
30502 arm_builtin_support_vector_misalignment (machine_mode mode,
30503 const_tree type, int misalignment,
30504 bool is_packed)
30506 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30508 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30510 if (is_packed)
30511 return align == 1;
30513 /* If the misalignment is unknown, we should be able to handle the access
30514 so long as it is not to a member of a packed data structure. */
30515 if (misalignment == -1)
30516 return true;
30518 /* Return true if the misalignment is a multiple of the natural alignment
30519 of the vector's element type. This is probably always going to be
30520 true in practice, since we've already established that this isn't a
30521 packed access. */
30522 return ((misalignment % align) == 0);
30525 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30526 is_packed);
30529 static void
30530 arm_conditional_register_usage (void)
30532 int regno;
30534 if (TARGET_THUMB1 && optimize_size)
30536 /* When optimizing for size on Thumb-1, it's better not
30537 to use the HI regs, because of the overhead of
30538 stacking them. */
30539 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30540 fixed_regs[regno] = call_used_regs[regno] = 1;
30543 /* The link register can be clobbered by any branch insn,
30544 but we have no way to track that at present, so mark
30545 it as unavailable. */
30546 if (TARGET_THUMB1)
30547 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30549 if (TARGET_32BIT && TARGET_VFP_BASE)
30551 /* VFPv3 registers are disabled when earlier VFP
30552 versions are selected due to the definition of
30553 LAST_VFP_REGNUM. */
30554 for (regno = FIRST_VFP_REGNUM;
30555 regno <= LAST_VFP_REGNUM; ++ regno)
30557 fixed_regs[regno] = 0;
30558 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30559 || regno >= FIRST_VFP_REGNUM + 32;
30561 if (TARGET_HAVE_MVE)
30562 fixed_regs[VPR_REGNUM] = 0;
30565 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30567 regno = FIRST_IWMMXT_GR_REGNUM;
30568 /* The 2002/10/09 revision of the XScale ABI has wCG0
30569 and wCG1 as call-preserved registers. The 2002/11/21
30570 revision changed this so that all wCG registers are
30571 scratch registers. */
30572 for (regno = FIRST_IWMMXT_GR_REGNUM;
30573 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30574 fixed_regs[regno] = 0;
30575 /* The XScale ABI has wR0 - wR9 as scratch registers,
30576 the rest as call-preserved registers. */
30577 for (regno = FIRST_IWMMXT_REGNUM;
30578 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30580 fixed_regs[regno] = 0;
30581 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30585 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30587 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30588 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30590 else if (TARGET_APCS_STACK)
30592 fixed_regs[10] = 1;
30593 call_used_regs[10] = 1;
30595 /* -mcaller-super-interworking reserves r11 for calls to
30596 _interwork_r11_call_via_rN(). Making the register global
30597 is an easy way of ensuring that it remains valid for all
30598 calls. */
30599 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30600 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30602 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30603 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30604 if (TARGET_CALLER_INTERWORKING)
30605 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30608 /* The Q and GE bits are only accessed via special ACLE patterns. */
30609 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30610 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30612 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30615 static reg_class_t
30616 arm_preferred_rename_class (reg_class_t rclass)
30618 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30619 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30620 and code size can be reduced. */
30621 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30622 return LO_REGS;
30623 else
30624 return NO_REGS;
30627 /* Compute the attribute "length" of insn "*push_multi".
30628 So this function MUST be kept in sync with that insn pattern. */
30630 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30632 int i, regno, hi_reg;
30633 int num_saves = XVECLEN (parallel_op, 0);
30635 /* ARM mode. */
30636 if (TARGET_ARM)
30637 return 4;
30638 /* Thumb1 mode. */
30639 if (TARGET_THUMB1)
30640 return 2;
30642 /* Thumb2 mode. */
30643 regno = REGNO (first_op);
30644 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30645 list is 8-bit. Normally this means all registers in the list must be
30646 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30647 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30648 with 16-bit encoding. */
30649 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30650 for (i = 1; i < num_saves && !hi_reg; i++)
30652 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30653 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30656 if (!hi_reg)
30657 return 2;
30658 return 4;
30661 /* Compute the attribute "length" of insn. Currently, this function is used
30662 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30663 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30664 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30665 true if OPERANDS contains insn which explicit updates base register. */
30668 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30670 /* ARM mode. */
30671 if (TARGET_ARM)
30672 return 4;
30673 /* Thumb1 mode. */
30674 if (TARGET_THUMB1)
30675 return 2;
30677 rtx parallel_op = operands[0];
30678 /* Initialize to elements number of PARALLEL. */
30679 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30680 /* Initialize the value to base register. */
30681 unsigned regno = REGNO (operands[1]);
30682 /* Skip return and write back pattern.
30683 We only need register pop pattern for later analysis. */
30684 unsigned first_indx = 0;
30685 first_indx += return_pc ? 1 : 0;
30686 first_indx += write_back_p ? 1 : 0;
30688 /* A pop operation can be done through LDM or POP. If the base register is SP
30689 and if it's with write back, then a LDM will be alias of POP. */
30690 bool pop_p = (regno == SP_REGNUM && write_back_p);
30691 bool ldm_p = !pop_p;
30693 /* Check base register for LDM. */
30694 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30695 return 4;
30697 /* Check each register in the list. */
30698 for (; indx >= first_indx; indx--)
30700 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30701 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30702 comment in arm_attr_length_push_multi. */
30703 if (REGNO_REG_CLASS (regno) == HI_REGS
30704 && (regno != PC_REGNUM || ldm_p))
30705 return 4;
30708 return 2;
30711 /* Compute the number of instructions emitted by output_move_double. */
30713 arm_count_output_move_double_insns (rtx *operands)
30715 int count;
30716 rtx ops[2];
30717 /* output_move_double may modify the operands array, so call it
30718 here on a copy of the array. */
30719 ops[0] = operands[0];
30720 ops[1] = operands[1];
30721 output_move_double (ops, false, &count);
30722 return count;
30725 /* Same as above, but operands are a register/memory pair in SImode.
30726 Assumes operands has the base register in position 0 and memory in position
30727 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30729 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30731 int count;
30732 rtx ops[2];
30733 int regnum, memnum;
30734 if (load)
30735 regnum = 0, memnum = 1;
30736 else
30737 regnum = 1, memnum = 0;
30738 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30739 ops[memnum] = adjust_address (operands[2], DImode, 0);
30740 output_move_double (ops, false, &count);
30741 return count;
30746 vfp3_const_double_for_fract_bits (rtx operand)
30748 REAL_VALUE_TYPE r0;
30750 if (!CONST_DOUBLE_P (operand))
30751 return 0;
30753 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30754 if (exact_real_inverse (DFmode, &r0)
30755 && !REAL_VALUE_NEGATIVE (r0))
30757 if (exact_real_truncate (DFmode, &r0))
30759 HOST_WIDE_INT value = real_to_integer (&r0);
30760 value = value & 0xffffffff;
30761 if ((value != 0) && ( (value & (value - 1)) == 0))
30763 int ret = exact_log2 (value);
30764 gcc_assert (IN_RANGE (ret, 0, 31));
30765 return ret;
30769 return 0;
30772 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30773 log2 is in [1, 32], return that log2. Otherwise return -1.
30774 This is used in the patterns for vcvt.s32.f32 floating-point to
30775 fixed-point conversions. */
30778 vfp3_const_double_for_bits (rtx x)
30780 const REAL_VALUE_TYPE *r;
30782 if (!CONST_DOUBLE_P (x))
30783 return -1;
30785 r = CONST_DOUBLE_REAL_VALUE (x);
30787 if (REAL_VALUE_NEGATIVE (*r)
30788 || REAL_VALUE_ISNAN (*r)
30789 || REAL_VALUE_ISINF (*r)
30790 || !real_isinteger (r, SFmode))
30791 return -1;
30793 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30795 /* The exact_log2 above will have returned -1 if this is
30796 not an exact log2. */
30797 if (!IN_RANGE (hwint, 1, 32))
30798 return -1;
30800 return hwint;
30804 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30806 static void
30807 arm_pre_atomic_barrier (enum memmodel model)
30809 if (need_atomic_barrier_p (model, true))
30810 emit_insn (gen_memory_barrier ());
30813 static void
30814 arm_post_atomic_barrier (enum memmodel model)
30816 if (need_atomic_barrier_p (model, false))
30817 emit_insn (gen_memory_barrier ());
30820 /* Emit the load-exclusive and store-exclusive instructions.
30821 Use acquire and release versions if necessary. */
30823 static void
30824 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30826 rtx (*gen) (rtx, rtx);
30828 if (acq)
30830 switch (mode)
30832 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30833 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30834 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30835 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30836 default:
30837 gcc_unreachable ();
30840 else
30842 switch (mode)
30844 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30845 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30846 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30847 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30848 default:
30849 gcc_unreachable ();
30853 emit_insn (gen (rval, mem));
30856 static void
30857 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30858 rtx mem, bool rel)
30860 rtx (*gen) (rtx, rtx, rtx);
30862 if (rel)
30864 switch (mode)
30866 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30867 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30868 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30869 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30870 default:
30871 gcc_unreachable ();
30874 else
30876 switch (mode)
30878 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30879 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30880 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30881 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30882 default:
30883 gcc_unreachable ();
30887 emit_insn (gen (bval, rval, mem));
30890 /* Mark the previous jump instruction as unlikely. */
30892 static void
30893 emit_unlikely_jump (rtx insn)
30895 rtx_insn *jump = emit_jump_insn (insn);
30896 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30899 /* Expand a compare and swap pattern. */
30901 void
30902 arm_expand_compare_and_swap (rtx operands[])
30904 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30905 machine_mode mode, cmp_mode;
30907 bval = operands[0];
30908 rval = operands[1];
30909 mem = operands[2];
30910 oldval = operands[3];
30911 newval = operands[4];
30912 is_weak = operands[5];
30913 mod_s = operands[6];
30914 mod_f = operands[7];
30915 mode = GET_MODE (mem);
30917 /* Normally the succ memory model must be stronger than fail, but in the
30918 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30919 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30921 if (TARGET_HAVE_LDACQ
30922 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30923 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30924 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30926 switch (mode)
30928 case E_QImode:
30929 case E_HImode:
30930 /* For narrow modes, we're going to perform the comparison in SImode,
30931 so do the zero-extension now. */
30932 rval = gen_reg_rtx (SImode);
30933 oldval = convert_modes (SImode, mode, oldval, true);
30934 /* FALLTHRU */
30936 case E_SImode:
30937 /* Force the value into a register if needed. We waited until after
30938 the zero-extension above to do this properly. */
30939 if (!arm_add_operand (oldval, SImode))
30940 oldval = force_reg (SImode, oldval);
30941 break;
30943 case E_DImode:
30944 if (!cmpdi_operand (oldval, mode))
30945 oldval = force_reg (mode, oldval);
30946 break;
30948 default:
30949 gcc_unreachable ();
30952 if (TARGET_THUMB1)
30953 cmp_mode = E_SImode;
30954 else
30955 cmp_mode = CC_Zmode;
30957 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30958 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30959 oldval, newval, is_weak, mod_s, mod_f));
30961 if (mode == QImode || mode == HImode)
30962 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30964 /* In all cases, we arrange for success to be signaled by Z set.
30965 This arrangement allows for the boolean result to be used directly
30966 in a subsequent branch, post optimization. For Thumb-1 targets, the
30967 boolean negation of the result is also stored in bval because Thumb-1
30968 backend lacks dependency tracking for CC flag due to flag-setting not
30969 being represented at RTL level. */
30970 if (TARGET_THUMB1)
30971 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30972 else
30974 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30975 emit_insn (gen_rtx_SET (bval, x));
30979 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30980 another memory store between the load-exclusive and store-exclusive can
30981 reset the monitor from Exclusive to Open state. This means we must wait
30982 until after reload to split the pattern, lest we get a register spill in
30983 the middle of the atomic sequence. Success of the compare and swap is
30984 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30985 for Thumb-1 targets (ie. negation of the boolean value returned by
30986 atomic_compare_and_swapmode standard pattern in operand 0). */
30988 void
30989 arm_split_compare_and_swap (rtx operands[])
30991 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30992 machine_mode mode;
30993 enum memmodel mod_s, mod_f;
30994 bool is_weak;
30995 rtx_code_label *label1, *label2;
30996 rtx x, cond;
30998 rval = operands[1];
30999 mem = operands[2];
31000 oldval = operands[3];
31001 newval = operands[4];
31002 is_weak = (operands[5] != const0_rtx);
31003 mod_s_rtx = operands[6];
31004 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31005 mod_f = memmodel_from_int (INTVAL (operands[7]));
31006 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31007 mode = GET_MODE (mem);
31009 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31011 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31012 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31014 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31015 a full barrier is emitted after the store-release. */
31016 if (is_armv8_sync)
31017 use_acquire = false;
31019 /* Checks whether a barrier is needed and emits one accordingly. */
31020 if (!(use_acquire || use_release))
31021 arm_pre_atomic_barrier (mod_s);
31023 label1 = NULL;
31024 if (!is_weak)
31026 label1 = gen_label_rtx ();
31027 emit_label (label1);
31029 label2 = gen_label_rtx ();
31031 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31033 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31034 as required to communicate with arm_expand_compare_and_swap. */
31035 if (TARGET_32BIT)
31037 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31038 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31039 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31040 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31041 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31043 else
31045 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31046 if (thumb1_cmpneg_operand (oldval, SImode))
31048 rtx src = rval;
31049 if (!satisfies_constraint_L (oldval))
31051 gcc_assert (satisfies_constraint_J (oldval));
31053 /* For such immediates, ADDS needs the source and destination regs
31054 to be the same.
31056 Normally this would be handled by RA, but this is all happening
31057 after RA. */
31058 emit_move_insn (neg_bval, rval);
31059 src = neg_bval;
31062 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31063 label2, cond));
31065 else
31067 emit_move_insn (neg_bval, const1_rtx);
31068 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31072 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31074 /* Weak or strong, we want EQ to be true for success, so that we
31075 match the flags that we got from the compare above. */
31076 if (TARGET_32BIT)
31078 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31079 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31080 emit_insn (gen_rtx_SET (cond, x));
31083 if (!is_weak)
31085 /* Z is set to boolean value of !neg_bval, as required to communicate
31086 with arm_expand_compare_and_swap. */
31087 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31088 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31091 if (!is_mm_relaxed (mod_f))
31092 emit_label (label2);
31094 /* Checks whether a barrier is needed and emits one accordingly. */
31095 if (is_armv8_sync
31096 || !(use_acquire || use_release))
31097 arm_post_atomic_barrier (mod_s);
31099 if (is_mm_relaxed (mod_f))
31100 emit_label (label2);
31103 /* Split an atomic operation pattern. Operation is given by CODE and is one
31104 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31105 operation). Operation is performed on the content at MEM and on VALUE
31106 following the memory model MODEL_RTX. The content at MEM before and after
31107 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31108 success of the operation is returned in COND. Using a scratch register or
31109 an operand register for these determines what result is returned for that
31110 pattern. */
31112 void
31113 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31114 rtx value, rtx model_rtx, rtx cond)
31116 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31117 machine_mode mode = GET_MODE (mem);
31118 machine_mode wmode = (mode == DImode ? DImode : SImode);
31119 rtx_code_label *label;
31120 bool all_low_regs, bind_old_new;
31121 rtx x;
31123 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31125 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31126 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31128 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31129 a full barrier is emitted after the store-release. */
31130 if (is_armv8_sync)
31131 use_acquire = false;
31133 /* Checks whether a barrier is needed and emits one accordingly. */
31134 if (!(use_acquire || use_release))
31135 arm_pre_atomic_barrier (model);
31137 label = gen_label_rtx ();
31138 emit_label (label);
31140 if (new_out)
31141 new_out = gen_lowpart (wmode, new_out);
31142 if (old_out)
31143 old_out = gen_lowpart (wmode, old_out);
31144 else
31145 old_out = new_out;
31146 value = simplify_gen_subreg (wmode, value, mode, 0);
31148 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31150 /* Does the operation require destination and first operand to use the same
31151 register? This is decided by register constraints of relevant insn
31152 patterns in thumb1.md. */
31153 gcc_assert (!new_out || REG_P (new_out));
31154 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31155 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31156 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31157 bind_old_new =
31158 (TARGET_THUMB1
31159 && code != SET
31160 && code != MINUS
31161 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31163 /* We want to return the old value while putting the result of the operation
31164 in the same register as the old value so copy the old value over to the
31165 destination register and use that register for the operation. */
31166 if (old_out && bind_old_new)
31168 emit_move_insn (new_out, old_out);
31169 old_out = new_out;
31172 switch (code)
31174 case SET:
31175 new_out = value;
31176 break;
31178 case NOT:
31179 x = gen_rtx_AND (wmode, old_out, value);
31180 emit_insn (gen_rtx_SET (new_out, x));
31181 x = gen_rtx_NOT (wmode, new_out);
31182 emit_insn (gen_rtx_SET (new_out, x));
31183 break;
31185 case MINUS:
31186 if (CONST_INT_P (value))
31188 value = gen_int_mode (-INTVAL (value), wmode);
31189 code = PLUS;
31191 /* FALLTHRU */
31193 case PLUS:
31194 if (mode == DImode)
31196 /* DImode plus/minus need to clobber flags. */
31197 /* The adddi3 and subdi3 patterns are incorrectly written so that
31198 they require matching operands, even when we could easily support
31199 three operands. Thankfully, this can be fixed up post-splitting,
31200 as the individual add+adc patterns do accept three operands and
31201 post-reload cprop can make these moves go away. */
31202 emit_move_insn (new_out, old_out);
31203 if (code == PLUS)
31204 x = gen_adddi3 (new_out, new_out, value);
31205 else
31206 x = gen_subdi3 (new_out, new_out, value);
31207 emit_insn (x);
31208 break;
31210 /* FALLTHRU */
31212 default:
31213 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31214 emit_insn (gen_rtx_SET (new_out, x));
31215 break;
31218 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31219 use_release);
31221 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31222 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31224 /* Checks whether a barrier is needed and emits one accordingly. */
31225 if (is_armv8_sync
31226 || !(use_acquire || use_release))
31227 arm_post_atomic_barrier (model);
31230 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31231 opt_machine_mode
31232 arm_mode_to_pred_mode (machine_mode mode)
31234 switch (GET_MODE_NUNITS (mode))
31236 case 16: return V16BImode;
31237 case 8: return V8BImode;
31238 case 4: return V4BImode;
31240 return opt_machine_mode ();
31243 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31244 If CAN_INVERT, store either the result or its inverse in TARGET
31245 and return true if TARGET contains the inverse. If !CAN_INVERT,
31246 always store the result in TARGET, never its inverse.
31248 Note that the handling of floating-point comparisons is not
31249 IEEE compliant. */
31251 bool
31252 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31253 bool can_invert)
31255 machine_mode cmp_result_mode = GET_MODE (target);
31256 machine_mode cmp_mode = GET_MODE (op0);
31258 bool inverted;
31260 /* MVE supports more comparisons than Neon. */
31261 if (TARGET_HAVE_MVE)
31262 inverted = false;
31263 else
31264 switch (code)
31266 /* For these we need to compute the inverse of the requested
31267 comparison. */
31268 case UNORDERED:
31269 case UNLT:
31270 case UNLE:
31271 case UNGT:
31272 case UNGE:
31273 case UNEQ:
31274 case NE:
31275 code = reverse_condition_maybe_unordered (code);
31276 if (!can_invert)
31278 /* Recursively emit the inverted comparison into a temporary
31279 and then store its inverse in TARGET. This avoids reusing
31280 TARGET (which for integer NE could be one of the inputs). */
31281 rtx tmp = gen_reg_rtx (cmp_result_mode);
31282 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31283 gcc_unreachable ();
31284 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31285 return false;
31287 inverted = true;
31288 break;
31290 default:
31291 inverted = false;
31292 break;
31295 switch (code)
31297 /* These are natively supported by Neon for zero comparisons, but otherwise
31298 require the operands to be swapped. For MVE, we can only compare
31299 registers. */
31300 case LE:
31301 case LT:
31302 if (!TARGET_HAVE_MVE)
31303 if (op1 != CONST0_RTX (cmp_mode))
31305 code = swap_condition (code);
31306 std::swap (op0, op1);
31308 /* Fall through. */
31310 /* These are natively supported by Neon for both register and zero
31311 operands. MVE supports registers only. */
31312 case EQ:
31313 case GE:
31314 case GT:
31315 case NE:
31316 if (TARGET_HAVE_MVE)
31318 switch (GET_MODE_CLASS (cmp_mode))
31320 case MODE_VECTOR_INT:
31321 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31322 op0, force_reg (cmp_mode, op1)));
31323 break;
31324 case MODE_VECTOR_FLOAT:
31325 if (TARGET_HAVE_MVE_FLOAT)
31326 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31327 op0, force_reg (cmp_mode, op1)));
31328 else
31329 gcc_unreachable ();
31330 break;
31331 default:
31332 gcc_unreachable ();
31335 else
31336 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31337 return inverted;
31339 /* These are natively supported for register operands only.
31340 Comparisons with zero aren't useful and should be folded
31341 or canonicalized by target-independent code. */
31342 case GEU:
31343 case GTU:
31344 if (TARGET_HAVE_MVE)
31345 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31346 op0, force_reg (cmp_mode, op1)));
31347 else
31348 emit_insn (gen_neon_vc (code, cmp_mode, target,
31349 op0, force_reg (cmp_mode, op1)));
31350 return inverted;
31352 /* These require the operands to be swapped and likewise do not
31353 support comparisons with zero. */
31354 case LEU:
31355 case LTU:
31356 if (TARGET_HAVE_MVE)
31357 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31358 force_reg (cmp_mode, op1), op0));
31359 else
31360 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31361 target, force_reg (cmp_mode, op1), op0));
31362 return inverted;
31364 /* These need a combination of two comparisons. */
31365 case LTGT:
31366 case ORDERED:
31368 /* Operands are LTGT iff (a > b || a > b).
31369 Operands are ORDERED iff (a > b || a <= b). */
31370 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31371 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31372 rtx_code alt_code = (code == LTGT ? LT : LE);
31373 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31374 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31375 gcc_unreachable ();
31376 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31377 gt_res, alt_res)));
31378 return inverted;
31381 default:
31382 gcc_unreachable ();
31386 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31387 CMP_RESULT_MODE is the mode of the comparison result. */
31389 void
31390 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31392 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31393 arm_expand_vector_compare, and another one here. */
31394 rtx mask;
31396 if (TARGET_HAVE_MVE)
31397 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31398 else
31399 mask = gen_reg_rtx (cmp_result_mode);
31401 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31402 operands[4], operands[5], true);
31403 if (inverted)
31404 std::swap (operands[1], operands[2]);
31405 if (TARGET_NEON)
31406 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31407 mask, operands[1], operands[2]));
31408 else
31410 machine_mode cmp_mode = GET_MODE (operands[0]);
31412 switch (GET_MODE_CLASS (cmp_mode))
31414 case MODE_VECTOR_INT:
31415 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31416 operands[1], operands[2], mask));
31417 break;
31418 case MODE_VECTOR_FLOAT:
31419 if (TARGET_HAVE_MVE_FLOAT)
31420 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31421 operands[1], operands[2], mask));
31422 else
31423 gcc_unreachable ();
31424 break;
31425 default:
31426 gcc_unreachable ();
31431 #define MAX_VECT_LEN 16
31433 struct expand_vec_perm_d
31435 rtx target, op0, op1;
31436 vec_perm_indices perm;
31437 machine_mode vmode;
31438 bool one_vector_p;
31439 bool testing_p;
31442 /* Generate a variable permutation. */
31444 static void
31445 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31447 machine_mode vmode = GET_MODE (target);
31448 bool one_vector_p = rtx_equal_p (op0, op1);
31450 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31451 gcc_checking_assert (GET_MODE (op0) == vmode);
31452 gcc_checking_assert (GET_MODE (op1) == vmode);
31453 gcc_checking_assert (GET_MODE (sel) == vmode);
31454 gcc_checking_assert (TARGET_NEON);
31456 if (one_vector_p)
31458 if (vmode == V8QImode)
31459 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31460 else
31461 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31463 else
31465 rtx pair;
31467 if (vmode == V8QImode)
31469 pair = gen_reg_rtx (V16QImode);
31470 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31471 pair = gen_lowpart (TImode, pair);
31472 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31474 else
31476 pair = gen_reg_rtx (OImode);
31477 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31478 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31483 void
31484 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31486 machine_mode vmode = GET_MODE (target);
31487 unsigned int nelt = GET_MODE_NUNITS (vmode);
31488 bool one_vector_p = rtx_equal_p (op0, op1);
31489 rtx mask;
31491 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31492 numbering of elements for big-endian, we must reverse the order. */
31493 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31495 /* The VTBL instruction does not use a modulo index, so we must take care
31496 of that ourselves. */
31497 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31498 mask = gen_const_vec_duplicate (vmode, mask);
31499 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31501 arm_expand_vec_perm_1 (target, op0, op1, sel);
31504 /* Map lane ordering between architectural lane order, and GCC lane order,
31505 taking into account ABI. See comment above output_move_neon for details. */
31507 static int
31508 neon_endian_lane_map (machine_mode mode, int lane)
31510 if (BYTES_BIG_ENDIAN)
31512 int nelems = GET_MODE_NUNITS (mode);
31513 /* Reverse lane order. */
31514 lane = (nelems - 1 - lane);
31515 /* Reverse D register order, to match ABI. */
31516 if (GET_MODE_SIZE (mode) == 16)
31517 lane = lane ^ (nelems / 2);
31519 return lane;
31522 /* Some permutations index into pairs of vectors, this is a helper function
31523 to map indexes into those pairs of vectors. */
31525 static int
31526 neon_pair_endian_lane_map (machine_mode mode, int lane)
31528 int nelem = GET_MODE_NUNITS (mode);
31529 if (BYTES_BIG_ENDIAN)
31530 lane =
31531 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31532 return lane;
31535 /* Generate or test for an insn that supports a constant permutation. */
31537 /* Recognize patterns for the VUZP insns. */
31539 static bool
31540 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31542 unsigned int i, odd, mask, nelt = d->perm.length ();
31543 rtx out0, out1, in0, in1;
31544 int first_elem;
31545 int swap_nelt;
31547 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31548 return false;
31550 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31551 big endian pattern on 64 bit vectors, so we correct for that. */
31552 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31553 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31555 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31557 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31558 odd = 0;
31559 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31560 odd = 1;
31561 else
31562 return false;
31563 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31565 for (i = 0; i < nelt; i++)
31567 unsigned elt =
31568 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31569 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31570 return false;
31573 /* Success! */
31574 if (d->testing_p)
31575 return true;
31577 in0 = d->op0;
31578 in1 = d->op1;
31579 if (swap_nelt != 0)
31580 std::swap (in0, in1);
31582 out0 = d->target;
31583 out1 = gen_reg_rtx (d->vmode);
31584 if (odd)
31585 std::swap (out0, out1);
31587 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31588 return true;
31591 /* Recognize patterns for the VZIP insns. */
31593 static bool
31594 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31596 unsigned int i, high, mask, nelt = d->perm.length ();
31597 rtx out0, out1, in0, in1;
31598 int first_elem;
31599 bool is_swapped;
31601 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31602 return false;
31604 is_swapped = BYTES_BIG_ENDIAN;
31606 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31608 high = nelt / 2;
31609 if (first_elem == neon_endian_lane_map (d->vmode, high))
31611 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31612 high = 0;
31613 else
31614 return false;
31615 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31617 for (i = 0; i < nelt / 2; i++)
31619 unsigned elt =
31620 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31621 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31622 != elt)
31623 return false;
31624 elt =
31625 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31626 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31627 != elt)
31628 return false;
31631 /* Success! */
31632 if (d->testing_p)
31633 return true;
31635 in0 = d->op0;
31636 in1 = d->op1;
31637 if (is_swapped)
31638 std::swap (in0, in1);
31640 out0 = d->target;
31641 out1 = gen_reg_rtx (d->vmode);
31642 if (high)
31643 std::swap (out0, out1);
31645 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31646 return true;
31649 /* Recognize patterns for the VREV insns. */
31650 static bool
31651 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31653 unsigned int i, j, diff, nelt = d->perm.length ();
31654 rtx (*gen) (machine_mode, rtx, rtx);
31656 if (!d->one_vector_p)
31657 return false;
31659 diff = d->perm[0];
31660 switch (diff)
31662 case 7:
31663 switch (d->vmode)
31665 case E_V16QImode:
31666 case E_V8QImode:
31667 gen = gen_neon_vrev64;
31668 break;
31669 default:
31670 return false;
31672 break;
31673 case 3:
31674 switch (d->vmode)
31676 case E_V16QImode:
31677 case E_V8QImode:
31678 gen = gen_neon_vrev32;
31679 break;
31680 case E_V8HImode:
31681 case E_V4HImode:
31682 case E_V8HFmode:
31683 case E_V4HFmode:
31684 gen = gen_neon_vrev64;
31685 break;
31686 default:
31687 return false;
31689 break;
31690 case 1:
31691 switch (d->vmode)
31693 case E_V16QImode:
31694 case E_V8QImode:
31695 gen = gen_neon_vrev16;
31696 break;
31697 case E_V8HImode:
31698 case E_V4HImode:
31699 gen = gen_neon_vrev32;
31700 break;
31701 case E_V4SImode:
31702 case E_V2SImode:
31703 case E_V4SFmode:
31704 case E_V2SFmode:
31705 gen = gen_neon_vrev64;
31706 break;
31707 default:
31708 return false;
31710 break;
31711 default:
31712 return false;
31715 for (i = 0; i < nelt ; i += diff + 1)
31716 for (j = 0; j <= diff; j += 1)
31718 /* This is guaranteed to be true as the value of diff
31719 is 7, 3, 1 and we should have enough elements in the
31720 queue to generate this. Getting a vector mask with a
31721 value of diff other than these values implies that
31722 something is wrong by the time we get here. */
31723 gcc_assert (i + j < nelt);
31724 if (d->perm[i + j] != i + diff - j)
31725 return false;
31728 /* Success! */
31729 if (d->testing_p)
31730 return true;
31732 emit_insn (gen (d->vmode, d->target, d->op0));
31733 return true;
31736 /* Recognize patterns for the VTRN insns. */
31738 static bool
31739 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31741 unsigned int i, odd, mask, nelt = d->perm.length ();
31742 rtx out0, out1, in0, in1;
31744 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31745 return false;
31747 /* Note that these are little-endian tests. Adjust for big-endian later. */
31748 if (d->perm[0] == 0)
31749 odd = 0;
31750 else if (d->perm[0] == 1)
31751 odd = 1;
31752 else
31753 return false;
31754 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31756 for (i = 0; i < nelt; i += 2)
31758 if (d->perm[i] != i + odd)
31759 return false;
31760 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31761 return false;
31764 /* Success! */
31765 if (d->testing_p)
31766 return true;
31768 in0 = d->op0;
31769 in1 = d->op1;
31770 if (BYTES_BIG_ENDIAN)
31772 std::swap (in0, in1);
31773 odd = !odd;
31776 out0 = d->target;
31777 out1 = gen_reg_rtx (d->vmode);
31778 if (odd)
31779 std::swap (out0, out1);
31781 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31782 return true;
31785 /* Recognize patterns for the VEXT insns. */
31787 static bool
31788 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31790 unsigned int i, nelt = d->perm.length ();
31791 rtx offset;
31793 unsigned int location;
31795 unsigned int next = d->perm[0] + 1;
31797 /* TODO: Handle GCC's numbering of elements for big-endian. */
31798 if (BYTES_BIG_ENDIAN)
31799 return false;
31801 /* Check if the extracted indexes are increasing by one. */
31802 for (i = 1; i < nelt; next++, i++)
31804 /* If we hit the most significant element of the 2nd vector in
31805 the previous iteration, no need to test further. */
31806 if (next == 2 * nelt)
31807 return false;
31809 /* If we are operating on only one vector: it could be a
31810 rotation. If there are only two elements of size < 64, let
31811 arm_evpc_neon_vrev catch it. */
31812 if (d->one_vector_p && (next == nelt))
31814 if ((nelt == 2) && (d->vmode != V2DImode))
31815 return false;
31816 else
31817 next = 0;
31820 if (d->perm[i] != next)
31821 return false;
31824 location = d->perm[0];
31826 /* Success! */
31827 if (d->testing_p)
31828 return true;
31830 offset = GEN_INT (location);
31832 if(d->vmode == E_DImode)
31833 return false;
31835 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31836 return true;
31839 /* The NEON VTBL instruction is a fully variable permuation that's even
31840 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31841 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31842 can do slightly better by expanding this as a constant where we don't
31843 have to apply a mask. */
31845 static bool
31846 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31848 rtx rperm[MAX_VECT_LEN], sel;
31849 machine_mode vmode = d->vmode;
31850 unsigned int i, nelt = d->perm.length ();
31852 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31853 numbering of elements for big-endian, we must reverse the order. */
31854 if (BYTES_BIG_ENDIAN)
31855 return false;
31857 if (d->testing_p)
31858 return true;
31860 /* Generic code will try constant permutation twice. Once with the
31861 original mode and again with the elements lowered to QImode.
31862 So wait and don't do the selector expansion ourselves. */
31863 if (vmode != V8QImode && vmode != V16QImode)
31864 return false;
31866 for (i = 0; i < nelt; ++i)
31867 rperm[i] = GEN_INT (d->perm[i]);
31868 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31869 sel = force_reg (vmode, sel);
31871 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31872 return true;
31875 static bool
31876 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31878 /* Check if the input mask matches vext before reordering the
31879 operands. */
31880 if (TARGET_NEON)
31881 if (arm_evpc_neon_vext (d))
31882 return true;
31884 /* The pattern matching functions above are written to look for a small
31885 number to begin the sequence (0, 1, N/2). If we begin with an index
31886 from the second operand, we can swap the operands. */
31887 unsigned int nelt = d->perm.length ();
31888 if (d->perm[0] >= nelt)
31890 d->perm.rotate_inputs (1);
31891 std::swap (d->op0, d->op1);
31894 if (TARGET_NEON)
31896 if (arm_evpc_neon_vuzp (d))
31897 return true;
31898 if (arm_evpc_neon_vzip (d))
31899 return true;
31900 if (arm_evpc_neon_vrev (d))
31901 return true;
31902 if (arm_evpc_neon_vtrn (d))
31903 return true;
31904 return arm_evpc_neon_vtbl (d);
31906 return false;
31909 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31911 static bool
31912 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
31913 rtx target, rtx op0, rtx op1,
31914 const vec_perm_indices &sel)
31916 if (vmode != op_mode)
31917 return false;
31919 struct expand_vec_perm_d d;
31920 int i, nelt, which;
31922 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31923 return false;
31925 d.target = target;
31926 if (op0)
31928 rtx nop0 = force_reg (vmode, op0);
31929 if (op0 == op1)
31930 op1 = nop0;
31931 op0 = nop0;
31933 if (op1)
31934 op1 = force_reg (vmode, op1);
31935 d.op0 = op0;
31936 d.op1 = op1;
31938 d.vmode = vmode;
31939 gcc_assert (VECTOR_MODE_P (d.vmode));
31940 d.testing_p = !target;
31942 nelt = GET_MODE_NUNITS (d.vmode);
31943 for (i = which = 0; i < nelt; ++i)
31945 int ei = sel[i] & (2 * nelt - 1);
31946 which |= (ei < nelt ? 1 : 2);
31949 switch (which)
31951 default:
31952 gcc_unreachable();
31954 case 3:
31955 d.one_vector_p = false;
31956 if (d.testing_p || !rtx_equal_p (op0, op1))
31957 break;
31959 /* The elements of PERM do not suggest that only the first operand
31960 is used, but both operands are identical. Allow easier matching
31961 of the permutation by folding the permutation into the single
31962 input vector. */
31963 /* FALLTHRU */
31964 case 2:
31965 d.op0 = op1;
31966 d.one_vector_p = true;
31967 break;
31969 case 1:
31970 d.op1 = op0;
31971 d.one_vector_p = true;
31972 break;
31975 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31977 if (!d.testing_p)
31978 return arm_expand_vec_perm_const_1 (&d);
31980 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31981 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31982 if (!d.one_vector_p)
31983 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31985 start_sequence ();
31986 bool ret = arm_expand_vec_perm_const_1 (&d);
31987 end_sequence ();
31989 return ret;
31992 bool
31993 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31995 /* If we are soft float and we do not have ldrd
31996 then all auto increment forms are ok. */
31997 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31998 return true;
32000 switch (code)
32002 /* Post increment and Pre Decrement are supported for all
32003 instruction forms except for vector forms. */
32004 case ARM_POST_INC:
32005 case ARM_PRE_DEC:
32006 if (VECTOR_MODE_P (mode))
32008 if (code != ARM_PRE_DEC)
32009 return true;
32010 else
32011 return false;
32014 return true;
32016 case ARM_POST_DEC:
32017 case ARM_PRE_INC:
32018 /* Without LDRD and mode size greater than
32019 word size, there is no point in auto-incrementing
32020 because ldm and stm will not have these forms. */
32021 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32022 return false;
32024 /* Vector and floating point modes do not support
32025 these auto increment forms. */
32026 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32027 return false;
32029 return true;
32031 default:
32032 return false;
32036 return false;
32039 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32040 on ARM, since we know that shifts by negative amounts are no-ops.
32041 Additionally, the default expansion code is not available or suitable
32042 for post-reload insn splits (this can occur when the register allocator
32043 chooses not to do a shift in NEON).
32045 This function is used in both initial expand and post-reload splits, and
32046 handles all kinds of 64-bit shifts.
32048 Input requirements:
32049 - It is safe for the input and output to be the same register, but
32050 early-clobber rules apply for the shift amount and scratch registers.
32051 - Shift by register requires both scratch registers. In all other cases
32052 the scratch registers may be NULL.
32053 - Ashiftrt by a register also clobbers the CC register. */
32054 void
32055 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32056 rtx amount, rtx scratch1, rtx scratch2)
32058 rtx out_high = gen_highpart (SImode, out);
32059 rtx out_low = gen_lowpart (SImode, out);
32060 rtx in_high = gen_highpart (SImode, in);
32061 rtx in_low = gen_lowpart (SImode, in);
32063 /* Terminology:
32064 in = the register pair containing the input value.
32065 out = the destination register pair.
32066 up = the high- or low-part of each pair.
32067 down = the opposite part to "up".
32068 In a shift, we can consider bits to shift from "up"-stream to
32069 "down"-stream, so in a left-shift "up" is the low-part and "down"
32070 is the high-part of each register pair. */
32072 rtx out_up = code == ASHIFT ? out_low : out_high;
32073 rtx out_down = code == ASHIFT ? out_high : out_low;
32074 rtx in_up = code == ASHIFT ? in_low : in_high;
32075 rtx in_down = code == ASHIFT ? in_high : in_low;
32077 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32078 gcc_assert (out
32079 && (REG_P (out) || SUBREG_P (out))
32080 && GET_MODE (out) == DImode);
32081 gcc_assert (in
32082 && (REG_P (in) || SUBREG_P (in))
32083 && GET_MODE (in) == DImode);
32084 gcc_assert (amount
32085 && (((REG_P (amount) || SUBREG_P (amount))
32086 && GET_MODE (amount) == SImode)
32087 || CONST_INT_P (amount)));
32088 gcc_assert (scratch1 == NULL
32089 || (GET_CODE (scratch1) == SCRATCH)
32090 || (GET_MODE (scratch1) == SImode
32091 && REG_P (scratch1)));
32092 gcc_assert (scratch2 == NULL
32093 || (GET_CODE (scratch2) == SCRATCH)
32094 || (GET_MODE (scratch2) == SImode
32095 && REG_P (scratch2)));
32096 gcc_assert (!REG_P (out) || !REG_P (amount)
32097 || !HARD_REGISTER_P (out)
32098 || (REGNO (out) != REGNO (amount)
32099 && REGNO (out) + 1 != REGNO (amount)));
32101 /* Macros to make following code more readable. */
32102 #define SUB_32(DEST,SRC) \
32103 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32104 #define RSB_32(DEST,SRC) \
32105 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32106 #define SUB_S_32(DEST,SRC) \
32107 gen_addsi3_compare0 ((DEST), (SRC), \
32108 GEN_INT (-32))
32109 #define SET(DEST,SRC) \
32110 gen_rtx_SET ((DEST), (SRC))
32111 #define SHIFT(CODE,SRC,AMOUNT) \
32112 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32113 #define LSHIFT(CODE,SRC,AMOUNT) \
32114 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32115 SImode, (SRC), (AMOUNT))
32116 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32117 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32118 SImode, (SRC), (AMOUNT))
32119 #define ORR(A,B) \
32120 gen_rtx_IOR (SImode, (A), (B))
32121 #define BRANCH(COND,LABEL) \
32122 gen_arm_cond_branch ((LABEL), \
32123 gen_rtx_ ## COND (CCmode, cc_reg, \
32124 const0_rtx), \
32125 cc_reg)
32127 /* Shifts by register and shifts by constant are handled separately. */
32128 if (CONST_INT_P (amount))
32130 /* We have a shift-by-constant. */
32132 /* First, handle out-of-range shift amounts.
32133 In both cases we try to match the result an ARM instruction in a
32134 shift-by-register would give. This helps reduce execution
32135 differences between optimization levels, but it won't stop other
32136 parts of the compiler doing different things. This is "undefined
32137 behavior, in any case. */
32138 if (INTVAL (amount) <= 0)
32139 emit_insn (gen_movdi (out, in));
32140 else if (INTVAL (amount) >= 64)
32142 if (code == ASHIFTRT)
32144 rtx const31_rtx = GEN_INT (31);
32145 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32146 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32148 else
32149 emit_insn (gen_movdi (out, const0_rtx));
32152 /* Now handle valid shifts. */
32153 else if (INTVAL (amount) < 32)
32155 /* Shifts by a constant less than 32. */
32156 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32158 /* Clearing the out register in DImode first avoids lots
32159 of spilling and results in less stack usage.
32160 Later this redundant insn is completely removed.
32161 Do that only if "in" and "out" are different registers. */
32162 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32163 emit_insn (SET (out, const0_rtx));
32164 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32165 emit_insn (SET (out_down,
32166 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32167 out_down)));
32168 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32170 else
32172 /* Shifts by a constant greater than 31. */
32173 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32175 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32176 emit_insn (SET (out, const0_rtx));
32177 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32178 if (code == ASHIFTRT)
32179 emit_insn (gen_ashrsi3 (out_up, in_up,
32180 GEN_INT (31)));
32181 else
32182 emit_insn (SET (out_up, const0_rtx));
32185 else
32187 /* We have a shift-by-register. */
32188 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32190 /* This alternative requires the scratch registers. */
32191 gcc_assert (scratch1 && REG_P (scratch1));
32192 gcc_assert (scratch2 && REG_P (scratch2));
32194 /* We will need the values "amount-32" and "32-amount" later.
32195 Swapping them around now allows the later code to be more general. */
32196 switch (code)
32198 case ASHIFT:
32199 emit_insn (SUB_32 (scratch1, amount));
32200 emit_insn (RSB_32 (scratch2, amount));
32201 break;
32202 case ASHIFTRT:
32203 emit_insn (RSB_32 (scratch1, amount));
32204 /* Also set CC = amount > 32. */
32205 emit_insn (SUB_S_32 (scratch2, amount));
32206 break;
32207 case LSHIFTRT:
32208 emit_insn (RSB_32 (scratch1, amount));
32209 emit_insn (SUB_32 (scratch2, amount));
32210 break;
32211 default:
32212 gcc_unreachable ();
32215 /* Emit code like this:
32217 arithmetic-left:
32218 out_down = in_down << amount;
32219 out_down = (in_up << (amount - 32)) | out_down;
32220 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32221 out_up = in_up << amount;
32223 arithmetic-right:
32224 out_down = in_down >> amount;
32225 out_down = (in_up << (32 - amount)) | out_down;
32226 if (amount < 32)
32227 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32228 out_up = in_up << amount;
32230 logical-right:
32231 out_down = in_down >> amount;
32232 out_down = (in_up << (32 - amount)) | out_down;
32233 if (amount < 32)
32234 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32235 out_up = in_up << amount;
32237 The ARM and Thumb2 variants are the same but implemented slightly
32238 differently. If this were only called during expand we could just
32239 use the Thumb2 case and let combine do the right thing, but this
32240 can also be called from post-reload splitters. */
32242 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32244 if (!TARGET_THUMB2)
32246 /* Emit code for ARM mode. */
32247 emit_insn (SET (out_down,
32248 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32249 if (code == ASHIFTRT)
32251 rtx_code_label *done_label = gen_label_rtx ();
32252 emit_jump_insn (BRANCH (LT, done_label));
32253 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32254 out_down)));
32255 emit_label (done_label);
32257 else
32258 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32259 out_down)));
32261 else
32263 /* Emit code for Thumb2 mode.
32264 Thumb2 can't do shift and or in one insn. */
32265 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32266 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32268 if (code == ASHIFTRT)
32270 rtx_code_label *done_label = gen_label_rtx ();
32271 emit_jump_insn (BRANCH (LT, done_label));
32272 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32273 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32274 emit_label (done_label);
32276 else
32278 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32279 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32283 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32286 #undef SUB_32
32287 #undef RSB_32
32288 #undef SUB_S_32
32289 #undef SET
32290 #undef SHIFT
32291 #undef LSHIFT
32292 #undef REV_LSHIFT
32293 #undef ORR
32294 #undef BRANCH
32297 /* Returns true if the pattern is a valid symbolic address, which is either a
32298 symbol_ref or (symbol_ref + addend).
32300 According to the ARM ELF ABI, the initial addend of REL-type relocations
32301 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32302 literal field of the instruction as a 16-bit signed value in the range
32303 -32768 <= A < 32768.
32305 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32306 unsigned range of 0 <= A < 256 as described in the AAELF32
32307 relocation handling documentation: REL-type relocations are encoded
32308 as unsigned in this case. */
32310 bool
32311 arm_valid_symbolic_address_p (rtx addr)
32313 rtx xop0, xop1 = NULL_RTX;
32314 rtx tmp = addr;
32316 if (target_word_relocations)
32317 return false;
32319 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32320 return true;
32322 /* (const (plus: symbol_ref const_int)) */
32323 if (GET_CODE (addr) == CONST)
32324 tmp = XEXP (addr, 0);
32326 if (GET_CODE (tmp) == PLUS)
32328 xop0 = XEXP (tmp, 0);
32329 xop1 = XEXP (tmp, 1);
32331 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32333 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32334 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32335 else
32336 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32340 return false;
32343 /* Returns true if a valid comparison operation and makes
32344 the operands in a form that is valid. */
32345 bool
32346 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32348 enum rtx_code code = GET_CODE (*comparison);
32349 int code_int;
32350 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32351 ? GET_MODE (*op2) : GET_MODE (*op1);
32353 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32355 if (code == UNEQ || code == LTGT)
32356 return false;
32358 code_int = (int)code;
32359 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32360 PUT_CODE (*comparison, (enum rtx_code)code_int);
32362 switch (mode)
32364 case E_SImode:
32365 if (!arm_add_operand (*op1, mode))
32366 *op1 = force_reg (mode, *op1);
32367 if (!arm_add_operand (*op2, mode))
32368 *op2 = force_reg (mode, *op2);
32369 return true;
32371 case E_DImode:
32372 /* gen_compare_reg() will sort out any invalid operands. */
32373 return true;
32375 case E_HFmode:
32376 if (!TARGET_VFP_FP16INST)
32377 break;
32378 /* FP16 comparisons are done in SF mode. */
32379 mode = SFmode;
32380 *op1 = convert_to_mode (mode, *op1, 1);
32381 *op2 = convert_to_mode (mode, *op2, 1);
32382 /* Fall through. */
32383 case E_SFmode:
32384 case E_DFmode:
32385 if (!vfp_compare_operand (*op1, mode))
32386 *op1 = force_reg (mode, *op1);
32387 if (!vfp_compare_operand (*op2, mode))
32388 *op2 = force_reg (mode, *op2);
32389 return true;
32390 default:
32391 break;
32394 return false;
32398 /* Maximum number of instructions to set block of memory. */
32399 static int
32400 arm_block_set_max_insns (void)
32402 if (optimize_function_for_size_p (cfun))
32403 return 4;
32404 else
32405 return current_tune->max_insns_inline_memset;
32408 /* Return TRUE if it's profitable to set block of memory for
32409 non-vectorized case. VAL is the value to set the memory
32410 with. LENGTH is the number of bytes to set. ALIGN is the
32411 alignment of the destination memory in bytes. UNALIGNED_P
32412 is TRUE if we can only set the memory with instructions
32413 meeting alignment requirements. USE_STRD_P is TRUE if we
32414 can use strd to set the memory. */
32415 static bool
32416 arm_block_set_non_vect_profit_p (rtx val,
32417 unsigned HOST_WIDE_INT length,
32418 unsigned HOST_WIDE_INT align,
32419 bool unaligned_p, bool use_strd_p)
32421 int num = 0;
32422 /* For leftovers in bytes of 0-7, we can set the memory block using
32423 strb/strh/str with minimum instruction number. */
32424 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32426 if (unaligned_p)
32428 num = arm_const_inline_cost (SET, val);
32429 num += length / align + length % align;
32431 else if (use_strd_p)
32433 num = arm_const_double_inline_cost (val);
32434 num += (length >> 3) + leftover[length & 7];
32436 else
32438 num = arm_const_inline_cost (SET, val);
32439 num += (length >> 2) + leftover[length & 3];
32442 /* We may be able to combine last pair STRH/STRB into a single STR
32443 by shifting one byte back. */
32444 if (unaligned_access && length > 3 && (length & 3) == 3)
32445 num--;
32447 return (num <= arm_block_set_max_insns ());
32450 /* Return TRUE if it's profitable to set block of memory for
32451 vectorized case. LENGTH is the number of bytes to set.
32452 ALIGN is the alignment of destination memory in bytes.
32453 MODE is the vector mode used to set the memory. */
32454 static bool
32455 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32456 unsigned HOST_WIDE_INT align,
32457 machine_mode mode)
32459 int num;
32460 bool unaligned_p = ((align & 3) != 0);
32461 unsigned int nelt = GET_MODE_NUNITS (mode);
32463 /* Instruction loading constant value. */
32464 num = 1;
32465 /* Instructions storing the memory. */
32466 num += (length + nelt - 1) / nelt;
32467 /* Instructions adjusting the address expression. Only need to
32468 adjust address expression if it's 4 bytes aligned and bytes
32469 leftover can only be stored by mis-aligned store instruction. */
32470 if (!unaligned_p && (length & 3) != 0)
32471 num++;
32473 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32474 if (!unaligned_p && mode == V16QImode)
32475 num--;
32477 return (num <= arm_block_set_max_insns ());
32480 /* Set a block of memory using vectorization instructions for the
32481 unaligned case. We fill the first LENGTH bytes of the memory
32482 area starting from DSTBASE with byte constant VALUE. ALIGN is
32483 the alignment requirement of memory. Return TRUE if succeeded. */
32484 static bool
32485 arm_block_set_unaligned_vect (rtx dstbase,
32486 unsigned HOST_WIDE_INT length,
32487 unsigned HOST_WIDE_INT value,
32488 unsigned HOST_WIDE_INT align)
32490 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32491 rtx dst, mem;
32492 rtx val_vec, reg;
32493 rtx (*gen_func) (rtx, rtx);
32494 machine_mode mode;
32495 unsigned HOST_WIDE_INT v = value;
32496 unsigned int offset = 0;
32497 gcc_assert ((align & 0x3) != 0);
32498 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32499 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32500 if (length >= nelt_v16)
32502 mode = V16QImode;
32503 gen_func = gen_movmisalignv16qi;
32505 else
32507 mode = V8QImode;
32508 gen_func = gen_movmisalignv8qi;
32510 nelt_mode = GET_MODE_NUNITS (mode);
32511 gcc_assert (length >= nelt_mode);
32512 /* Skip if it isn't profitable. */
32513 if (!arm_block_set_vect_profit_p (length, align, mode))
32514 return false;
32516 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32517 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32519 v = sext_hwi (v, BITS_PER_WORD);
32521 reg = gen_reg_rtx (mode);
32522 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32523 /* Emit instruction loading the constant value. */
32524 emit_move_insn (reg, val_vec);
32526 /* Handle nelt_mode bytes in a vector. */
32527 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32529 emit_insn ((*gen_func) (mem, reg));
32530 if (i + 2 * nelt_mode <= length)
32532 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32533 offset += nelt_mode;
32534 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32538 /* If there are not less than nelt_v8 bytes leftover, we must be in
32539 V16QI mode. */
32540 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32542 /* Handle (8, 16) bytes leftover. */
32543 if (i + nelt_v8 < length)
32545 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32546 offset += length - i;
32547 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32549 /* We are shifting bytes back, set the alignment accordingly. */
32550 if ((length & 1) != 0 && align >= 2)
32551 set_mem_align (mem, BITS_PER_UNIT);
32553 emit_insn (gen_movmisalignv16qi (mem, reg));
32555 /* Handle (0, 8] bytes leftover. */
32556 else if (i < length && i + nelt_v8 >= length)
32558 if (mode == V16QImode)
32559 reg = gen_lowpart (V8QImode, reg);
32561 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32562 + (nelt_mode - nelt_v8))));
32563 offset += (length - i) + (nelt_mode - nelt_v8);
32564 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32566 /* We are shifting bytes back, set the alignment accordingly. */
32567 if ((length & 1) != 0 && align >= 2)
32568 set_mem_align (mem, BITS_PER_UNIT);
32570 emit_insn (gen_movmisalignv8qi (mem, reg));
32573 return true;
32576 /* Set a block of memory using vectorization instructions for the
32577 aligned case. We fill the first LENGTH bytes of the memory area
32578 starting from DSTBASE with byte constant VALUE. ALIGN is the
32579 alignment requirement of memory. Return TRUE if succeeded. */
32580 static bool
32581 arm_block_set_aligned_vect (rtx dstbase,
32582 unsigned HOST_WIDE_INT length,
32583 unsigned HOST_WIDE_INT value,
32584 unsigned HOST_WIDE_INT align)
32586 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32587 rtx dst, addr, mem;
32588 rtx val_vec, reg;
32589 machine_mode mode;
32590 unsigned int offset = 0;
32592 gcc_assert ((align & 0x3) == 0);
32593 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32594 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32595 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32596 mode = V16QImode;
32597 else
32598 mode = V8QImode;
32600 nelt_mode = GET_MODE_NUNITS (mode);
32601 gcc_assert (length >= nelt_mode);
32602 /* Skip if it isn't profitable. */
32603 if (!arm_block_set_vect_profit_p (length, align, mode))
32604 return false;
32606 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32608 reg = gen_reg_rtx (mode);
32609 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32610 /* Emit instruction loading the constant value. */
32611 emit_move_insn (reg, val_vec);
32613 i = 0;
32614 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32615 if (mode == V16QImode)
32617 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32618 emit_insn (gen_movmisalignv16qi (mem, reg));
32619 i += nelt_mode;
32620 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32621 if (i + nelt_v8 < length && i + nelt_v16 > length)
32623 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32624 offset += length - nelt_mode;
32625 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32626 /* We are shifting bytes back, set the alignment accordingly. */
32627 if ((length & 0x3) == 0)
32628 set_mem_align (mem, BITS_PER_UNIT * 4);
32629 else if ((length & 0x1) == 0)
32630 set_mem_align (mem, BITS_PER_UNIT * 2);
32631 else
32632 set_mem_align (mem, BITS_PER_UNIT);
32634 emit_insn (gen_movmisalignv16qi (mem, reg));
32635 return true;
32637 /* Fall through for bytes leftover. */
32638 mode = V8QImode;
32639 nelt_mode = GET_MODE_NUNITS (mode);
32640 reg = gen_lowpart (V8QImode, reg);
32643 /* Handle 8 bytes in a vector. */
32644 for (; (i + nelt_mode <= length); i += nelt_mode)
32646 addr = plus_constant (Pmode, dst, i);
32647 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32648 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32649 emit_move_insn (mem, reg);
32650 else
32651 emit_insn (gen_unaligned_storev8qi (mem, reg));
32654 /* Handle single word leftover by shifting 4 bytes back. We can
32655 use aligned access for this case. */
32656 if (i + UNITS_PER_WORD == length)
32658 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32659 offset += i - UNITS_PER_WORD;
32660 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32661 /* We are shifting 4 bytes back, set the alignment accordingly. */
32662 if (align > UNITS_PER_WORD)
32663 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32665 emit_insn (gen_unaligned_storev8qi (mem, reg));
32667 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32668 We have to use unaligned access for this case. */
32669 else if (i < length)
32671 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32672 offset += length - nelt_mode;
32673 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32674 /* We are shifting bytes back, set the alignment accordingly. */
32675 if ((length & 1) == 0)
32676 set_mem_align (mem, BITS_PER_UNIT * 2);
32677 else
32678 set_mem_align (mem, BITS_PER_UNIT);
32680 emit_insn (gen_movmisalignv8qi (mem, reg));
32683 return true;
32686 /* Set a block of memory using plain strh/strb instructions, only
32687 using instructions allowed by ALIGN on processor. We fill the
32688 first LENGTH bytes of the memory area starting from DSTBASE
32689 with byte constant VALUE. ALIGN is the alignment requirement
32690 of memory. */
32691 static bool
32692 arm_block_set_unaligned_non_vect (rtx dstbase,
32693 unsigned HOST_WIDE_INT length,
32694 unsigned HOST_WIDE_INT value,
32695 unsigned HOST_WIDE_INT align)
32697 unsigned int i;
32698 rtx dst, addr, mem;
32699 rtx val_exp, val_reg, reg;
32700 machine_mode mode;
32701 HOST_WIDE_INT v = value;
32703 gcc_assert (align == 1 || align == 2);
32705 if (align == 2)
32706 v |= (value << BITS_PER_UNIT);
32708 v = sext_hwi (v, BITS_PER_WORD);
32709 val_exp = GEN_INT (v);
32710 /* Skip if it isn't profitable. */
32711 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32712 align, true, false))
32713 return false;
32715 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32716 mode = (align == 2 ? HImode : QImode);
32717 val_reg = force_reg (SImode, val_exp);
32718 reg = gen_lowpart (mode, val_reg);
32720 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32722 addr = plus_constant (Pmode, dst, i);
32723 mem = adjust_automodify_address (dstbase, mode, addr, i);
32724 emit_move_insn (mem, reg);
32727 /* Handle single byte leftover. */
32728 if (i + 1 == length)
32730 reg = gen_lowpart (QImode, val_reg);
32731 addr = plus_constant (Pmode, dst, i);
32732 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32733 emit_move_insn (mem, reg);
32734 i++;
32737 gcc_assert (i == length);
32738 return true;
32741 /* Set a block of memory using plain strd/str/strh/strb instructions,
32742 to permit unaligned copies on processors which support unaligned
32743 semantics for those instructions. We fill the first LENGTH bytes
32744 of the memory area starting from DSTBASE with byte constant VALUE.
32745 ALIGN is the alignment requirement of memory. */
32746 static bool
32747 arm_block_set_aligned_non_vect (rtx dstbase,
32748 unsigned HOST_WIDE_INT length,
32749 unsigned HOST_WIDE_INT value,
32750 unsigned HOST_WIDE_INT align)
32752 unsigned int i;
32753 rtx dst, addr, mem;
32754 rtx val_exp, val_reg, reg;
32755 unsigned HOST_WIDE_INT v;
32756 bool use_strd_p;
32758 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32759 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32761 v = (value | (value << 8) | (value << 16) | (value << 24));
32762 if (length < UNITS_PER_WORD)
32763 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32765 if (use_strd_p)
32766 v |= (v << BITS_PER_WORD);
32767 else
32768 v = sext_hwi (v, BITS_PER_WORD);
32770 val_exp = GEN_INT (v);
32771 /* Skip if it isn't profitable. */
32772 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32773 align, false, use_strd_p))
32775 if (!use_strd_p)
32776 return false;
32778 /* Try without strd. */
32779 v = (v >> BITS_PER_WORD);
32780 v = sext_hwi (v, BITS_PER_WORD);
32781 val_exp = GEN_INT (v);
32782 use_strd_p = false;
32783 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32784 align, false, use_strd_p))
32785 return false;
32788 i = 0;
32789 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32790 /* Handle double words using strd if possible. */
32791 if (use_strd_p)
32793 val_reg = force_reg (DImode, val_exp);
32794 reg = val_reg;
32795 for (; (i + 8 <= length); i += 8)
32797 addr = plus_constant (Pmode, dst, i);
32798 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32799 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32800 emit_move_insn (mem, reg);
32801 else
32802 emit_insn (gen_unaligned_storedi (mem, reg));
32805 else
32806 val_reg = force_reg (SImode, val_exp);
32808 /* Handle words. */
32809 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32810 for (; (i + 4 <= length); i += 4)
32812 addr = plus_constant (Pmode, dst, i);
32813 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32814 if ((align & 3) == 0)
32815 emit_move_insn (mem, reg);
32816 else
32817 emit_insn (gen_unaligned_storesi (mem, reg));
32820 /* Merge last pair of STRH and STRB into a STR if possible. */
32821 if (unaligned_access && i > 0 && (i + 3) == length)
32823 addr = plus_constant (Pmode, dst, i - 1);
32824 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32825 /* We are shifting one byte back, set the alignment accordingly. */
32826 if ((align & 1) == 0)
32827 set_mem_align (mem, BITS_PER_UNIT);
32829 /* Most likely this is an unaligned access, and we can't tell at
32830 compilation time. */
32831 emit_insn (gen_unaligned_storesi (mem, reg));
32832 return true;
32835 /* Handle half word leftover. */
32836 if (i + 2 <= length)
32838 reg = gen_lowpart (HImode, val_reg);
32839 addr = plus_constant (Pmode, dst, i);
32840 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32841 if ((align & 1) == 0)
32842 emit_move_insn (mem, reg);
32843 else
32844 emit_insn (gen_unaligned_storehi (mem, reg));
32846 i += 2;
32849 /* Handle single byte leftover. */
32850 if (i + 1 == length)
32852 reg = gen_lowpart (QImode, val_reg);
32853 addr = plus_constant (Pmode, dst, i);
32854 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32855 emit_move_insn (mem, reg);
32858 return true;
32861 /* Set a block of memory using vectorization instructions for both
32862 aligned and unaligned cases. We fill the first LENGTH bytes of
32863 the memory area starting from DSTBASE with byte constant VALUE.
32864 ALIGN is the alignment requirement of memory. */
32865 static bool
32866 arm_block_set_vect (rtx dstbase,
32867 unsigned HOST_WIDE_INT length,
32868 unsigned HOST_WIDE_INT value,
32869 unsigned HOST_WIDE_INT align)
32871 /* Check whether we need to use unaligned store instruction. */
32872 if (((align & 3) != 0 || (length & 3) != 0)
32873 /* Check whether unaligned store instruction is available. */
32874 && (!unaligned_access || BYTES_BIG_ENDIAN))
32875 return false;
32877 if ((align & 3) == 0)
32878 return arm_block_set_aligned_vect (dstbase, length, value, align);
32879 else
32880 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32883 /* Expand string store operation. Firstly we try to do that by using
32884 vectorization instructions, then try with ARM unaligned access and
32885 double-word store if profitable. OPERANDS[0] is the destination,
32886 OPERANDS[1] is the number of bytes, operands[2] is the value to
32887 initialize the memory, OPERANDS[3] is the known alignment of the
32888 destination. */
32889 bool
32890 arm_gen_setmem (rtx *operands)
32892 rtx dstbase = operands[0];
32893 unsigned HOST_WIDE_INT length;
32894 unsigned HOST_WIDE_INT value;
32895 unsigned HOST_WIDE_INT align;
32897 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32898 return false;
32900 length = UINTVAL (operands[1]);
32901 if (length > 64)
32902 return false;
32904 value = (UINTVAL (operands[2]) & 0xFF);
32905 align = UINTVAL (operands[3]);
32906 if (TARGET_NEON && length >= 8
32907 && current_tune->string_ops_prefer_neon
32908 && arm_block_set_vect (dstbase, length, value, align))
32909 return true;
32911 if (!unaligned_access && (align & 3) != 0)
32912 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32914 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32918 static bool
32919 arm_macro_fusion_p (void)
32921 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32924 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32925 for MOVW / MOVT macro fusion. */
32927 static bool
32928 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32930 /* We are trying to fuse
32931 movw imm / movt imm
32932 instructions as a group that gets scheduled together. */
32934 rtx set_dest = SET_DEST (curr_set);
32936 if (GET_MODE (set_dest) != SImode)
32937 return false;
32939 /* We are trying to match:
32940 prev (movw) == (set (reg r0) (const_int imm16))
32941 curr (movt) == (set (zero_extract (reg r0)
32942 (const_int 16)
32943 (const_int 16))
32944 (const_int imm16_1))
32946 prev (movw) == (set (reg r1)
32947 (high (symbol_ref ("SYM"))))
32948 curr (movt) == (set (reg r0)
32949 (lo_sum (reg r1)
32950 (symbol_ref ("SYM")))) */
32952 if (GET_CODE (set_dest) == ZERO_EXTRACT)
32954 if (CONST_INT_P (SET_SRC (curr_set))
32955 && CONST_INT_P (SET_SRC (prev_set))
32956 && REG_P (XEXP (set_dest, 0))
32957 && REG_P (SET_DEST (prev_set))
32958 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32959 return true;
32962 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32963 && REG_P (SET_DEST (curr_set))
32964 && REG_P (SET_DEST (prev_set))
32965 && GET_CODE (SET_SRC (prev_set)) == HIGH
32966 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32967 return true;
32969 return false;
32972 static bool
32973 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32975 rtx prev_set = single_set (prev);
32976 rtx curr_set = single_set (curr);
32978 if (!prev_set
32979 || !curr_set)
32980 return false;
32982 if (any_condjump_p (curr))
32983 return false;
32985 if (!arm_macro_fusion_p ())
32986 return false;
32988 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32989 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32990 return true;
32992 return false;
32995 /* Return true iff the instruction fusion described by OP is enabled. */
32996 bool
32997 arm_fusion_enabled_p (tune_params::fuse_ops op)
32999 return current_tune->fusible_ops & op;
33002 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33003 scheduled for speculative execution. Reject the long-running division
33004 and square-root instructions. */
33006 static bool
33007 arm_sched_can_speculate_insn (rtx_insn *insn)
33009 switch (get_attr_type (insn))
33011 case TYPE_SDIV:
33012 case TYPE_UDIV:
33013 case TYPE_FDIVS:
33014 case TYPE_FDIVD:
33015 case TYPE_FSQRTS:
33016 case TYPE_FSQRTD:
33017 case TYPE_NEON_FP_SQRT_S:
33018 case TYPE_NEON_FP_SQRT_D:
33019 case TYPE_NEON_FP_SQRT_S_Q:
33020 case TYPE_NEON_FP_SQRT_D_Q:
33021 case TYPE_NEON_FP_DIV_S:
33022 case TYPE_NEON_FP_DIV_D:
33023 case TYPE_NEON_FP_DIV_S_Q:
33024 case TYPE_NEON_FP_DIV_D_Q:
33025 return false;
33026 default:
33027 return true;
33031 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33033 static unsigned HOST_WIDE_INT
33034 arm_asan_shadow_offset (void)
33036 return HOST_WIDE_INT_1U << 29;
33040 /* This is a temporary fix for PR60655. Ideally we need
33041 to handle most of these cases in the generic part but
33042 currently we reject minus (..) (sym_ref). We try to
33043 ameliorate the case with minus (sym_ref1) (sym_ref2)
33044 where they are in the same section. */
33046 static bool
33047 arm_const_not_ok_for_debug_p (rtx p)
33049 tree decl_op0 = NULL;
33050 tree decl_op1 = NULL;
33052 if (GET_CODE (p) == UNSPEC)
33053 return true;
33054 if (GET_CODE (p) == MINUS)
33056 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33058 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33059 if (decl_op1
33060 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33061 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33063 if ((VAR_P (decl_op1)
33064 || TREE_CODE (decl_op1) == CONST_DECL)
33065 && (VAR_P (decl_op0)
33066 || TREE_CODE (decl_op0) == CONST_DECL))
33067 return (get_variable_section (decl_op1, false)
33068 != get_variable_section (decl_op0, false));
33070 if (TREE_CODE (decl_op1) == LABEL_DECL
33071 && TREE_CODE (decl_op0) == LABEL_DECL)
33072 return (DECL_CONTEXT (decl_op1)
33073 != DECL_CONTEXT (decl_op0));
33076 return true;
33080 return false;
33083 /* return TRUE if x is a reference to a value in a constant pool */
33084 extern bool
33085 arm_is_constant_pool_ref (rtx x)
33087 return (MEM_P (x)
33088 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33089 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33092 /* Remember the last target of arm_set_current_function. */
33093 static GTY(()) tree arm_previous_fndecl;
33095 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33097 void
33098 save_restore_target_globals (tree new_tree)
33100 /* If we have a previous state, use it. */
33101 if (TREE_TARGET_GLOBALS (new_tree))
33102 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33103 else if (new_tree == target_option_default_node)
33104 restore_target_globals (&default_target_globals);
33105 else
33107 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33108 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33111 arm_option_params_internal ();
33114 /* Invalidate arm_previous_fndecl. */
33116 void
33117 arm_reset_previous_fndecl (void)
33119 arm_previous_fndecl = NULL_TREE;
33122 /* Establish appropriate back-end context for processing the function
33123 FNDECL. The argument might be NULL to indicate processing at top
33124 level, outside of any function scope. */
33126 static void
33127 arm_set_current_function (tree fndecl)
33129 if (!fndecl || fndecl == arm_previous_fndecl)
33130 return;
33132 tree old_tree = (arm_previous_fndecl
33133 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33134 : NULL_TREE);
33136 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33138 /* If current function has no attributes but previous one did,
33139 use the default node. */
33140 if (! new_tree && old_tree)
33141 new_tree = target_option_default_node;
33143 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33144 the default have been handled by save_restore_target_globals from
33145 arm_pragma_target_parse. */
33146 if (old_tree == new_tree)
33147 return;
33149 arm_previous_fndecl = fndecl;
33151 /* First set the target options. */
33152 cl_target_option_restore (&global_options, &global_options_set,
33153 TREE_TARGET_OPTION (new_tree));
33155 save_restore_target_globals (new_tree);
33157 arm_override_options_after_change_1 (&global_options, &global_options_set);
33160 /* Implement TARGET_OPTION_PRINT. */
33162 static void
33163 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33165 int flags = ptr->x_target_flags;
33166 const char *fpu_name;
33168 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33169 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33171 fprintf (file, "%*sselected isa %s\n", indent, "",
33172 TARGET_THUMB2_P (flags) ? "thumb2" :
33173 TARGET_THUMB_P (flags) ? "thumb1" :
33174 "arm");
33176 if (ptr->x_arm_arch_string)
33177 fprintf (file, "%*sselected architecture %s\n", indent, "",
33178 ptr->x_arm_arch_string);
33180 if (ptr->x_arm_cpu_string)
33181 fprintf (file, "%*sselected CPU %s\n", indent, "",
33182 ptr->x_arm_cpu_string);
33184 if (ptr->x_arm_tune_string)
33185 fprintf (file, "%*sselected tune %s\n", indent, "",
33186 ptr->x_arm_tune_string);
33188 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33191 /* Hook to determine if one function can safely inline another. */
33193 static bool
33194 arm_can_inline_p (tree caller, tree callee)
33196 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33197 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33198 bool can_inline = true;
33200 struct cl_target_option *caller_opts
33201 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33202 : target_option_default_node);
33204 struct cl_target_option *callee_opts
33205 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33206 : target_option_default_node);
33208 if (callee_opts == caller_opts)
33209 return true;
33211 /* Callee's ISA features should be a subset of the caller's. */
33212 struct arm_build_target caller_target;
33213 struct arm_build_target callee_target;
33214 caller_target.isa = sbitmap_alloc (isa_num_bits);
33215 callee_target.isa = sbitmap_alloc (isa_num_bits);
33217 arm_configure_build_target (&caller_target, caller_opts, false);
33218 arm_configure_build_target (&callee_target, callee_opts, false);
33219 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33220 can_inline = false;
33222 sbitmap_free (caller_target.isa);
33223 sbitmap_free (callee_target.isa);
33225 /* OK to inline between different modes.
33226 Function with mode specific instructions, e.g using asm,
33227 must be explicitly protected with noinline. */
33228 return can_inline;
33231 /* Hook to fix function's alignment affected by target attribute. */
33233 static void
33234 arm_relayout_function (tree fndecl)
33236 if (DECL_USER_ALIGN (fndecl))
33237 return;
33239 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33241 if (!callee_tree)
33242 callee_tree = target_option_default_node;
33244 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33245 SET_DECL_ALIGN
33246 (fndecl,
33247 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33250 /* Inner function to process the attribute((target(...))), take an argument and
33251 set the current options from the argument. If we have a list, recursively
33252 go over the list. */
33254 static bool
33255 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33257 if (TREE_CODE (args) == TREE_LIST)
33259 bool ret = true;
33261 for (; args; args = TREE_CHAIN (args))
33262 if (TREE_VALUE (args)
33263 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33264 ret = false;
33265 return ret;
33268 else if (TREE_CODE (args) != STRING_CST)
33270 error ("attribute %<target%> argument not a string");
33271 return false;
33274 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33275 char *q;
33277 while ((q = strtok (argstr, ",")) != NULL)
33279 argstr = NULL;
33280 if (!strcmp (q, "thumb"))
33282 opts->x_target_flags |= MASK_THUMB;
33283 if (TARGET_FDPIC && !arm_arch_thumb2)
33284 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33287 else if (!strcmp (q, "arm"))
33288 opts->x_target_flags &= ~MASK_THUMB;
33290 else if (!strcmp (q, "general-regs-only"))
33291 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33293 else if (startswith (q, "fpu="))
33295 int fpu_index;
33296 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33297 &fpu_index, CL_TARGET))
33299 error ("invalid fpu for target attribute or pragma %qs", q);
33300 return false;
33302 if (fpu_index == TARGET_FPU_auto)
33304 /* This doesn't really make sense until we support
33305 general dynamic selection of the architecture and all
33306 sub-features. */
33307 sorry ("auto fpu selection not currently permitted here");
33308 return false;
33310 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33312 else if (startswith (q, "arch="))
33314 char *arch = q + 5;
33315 const arch_option *arm_selected_arch
33316 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33318 if (!arm_selected_arch)
33320 error ("invalid architecture for target attribute or pragma %qs",
33322 return false;
33325 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33327 else if (q[0] == '+')
33329 opts->x_arm_arch_string
33330 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33332 else
33334 error ("unknown target attribute or pragma %qs", q);
33335 return false;
33339 return true;
33342 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33344 tree
33345 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33346 struct gcc_options *opts_set)
33348 struct cl_target_option cl_opts;
33350 if (!arm_valid_target_attribute_rec (args, opts))
33351 return NULL_TREE;
33353 cl_target_option_save (&cl_opts, opts, opts_set);
33354 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33355 arm_option_check_internal (opts);
33356 /* Do any overrides, such as global options arch=xxx.
33357 We do this since arm_active_target was overridden. */
33358 arm_option_reconfigure_globals ();
33359 arm_options_perform_arch_sanity_checks ();
33360 arm_option_override_internal (opts, opts_set);
33362 return build_target_option_node (opts, opts_set);
33365 static void
33366 add_attribute (const char * mode, tree *attributes)
33368 size_t len = strlen (mode);
33369 tree value = build_string (len, mode);
33371 TREE_TYPE (value) = build_array_type (char_type_node,
33372 build_index_type (size_int (len)));
33374 *attributes = tree_cons (get_identifier ("target"),
33375 build_tree_list (NULL_TREE, value),
33376 *attributes);
33379 /* For testing. Insert thumb or arm modes alternatively on functions. */
33381 static void
33382 arm_insert_attributes (tree fndecl, tree * attributes)
33384 const char *mode;
33386 if (! TARGET_FLIP_THUMB)
33387 return;
33389 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33390 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33391 return;
33393 /* Nested definitions must inherit mode. */
33394 if (current_function_decl)
33396 mode = TARGET_THUMB ? "thumb" : "arm";
33397 add_attribute (mode, attributes);
33398 return;
33401 /* If there is already a setting don't change it. */
33402 if (lookup_attribute ("target", *attributes) != NULL)
33403 return;
33405 mode = thumb_flipper ? "thumb" : "arm";
33406 add_attribute (mode, attributes);
33408 thumb_flipper = !thumb_flipper;
33411 /* Hook to validate attribute((target("string"))). */
33413 static bool
33414 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33415 tree args, int ARG_UNUSED (flags))
33417 bool ret = true;
33418 struct gcc_options func_options, func_options_set;
33419 tree cur_tree, new_optimize;
33420 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33422 /* Get the optimization options of the current function. */
33423 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33425 /* If the function changed the optimization levels as well as setting target
33426 options, start with the optimizations specified. */
33427 if (!func_optimize)
33428 func_optimize = optimization_default_node;
33430 /* Init func_options. */
33431 memset (&func_options, 0, sizeof (func_options));
33432 init_options_struct (&func_options, NULL);
33433 lang_hooks.init_options_struct (&func_options);
33434 memset (&func_options_set, 0, sizeof (func_options_set));
33436 /* Initialize func_options to the defaults. */
33437 cl_optimization_restore (&func_options, &func_options_set,
33438 TREE_OPTIMIZATION (func_optimize));
33440 cl_target_option_restore (&func_options, &func_options_set,
33441 TREE_TARGET_OPTION (target_option_default_node));
33443 /* Set func_options flags with new target mode. */
33444 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33445 &func_options_set);
33447 if (cur_tree == NULL_TREE)
33448 ret = false;
33450 new_optimize = build_optimization_node (&func_options, &func_options_set);
33452 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33454 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33456 return ret;
33459 /* Match an ISA feature bitmap to a named FPU. We always use the
33460 first entry that exactly matches the feature set, so that we
33461 effectively canonicalize the FPU name for the assembler. */
33462 static const char*
33463 arm_identify_fpu_from_isa (sbitmap isa)
33465 auto_sbitmap fpubits (isa_num_bits);
33466 auto_sbitmap cand_fpubits (isa_num_bits);
33468 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33470 /* If there are no ISA feature bits relating to the FPU, we must be
33471 doing soft-float. */
33472 if (bitmap_empty_p (fpubits))
33473 return "softvfp";
33475 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33477 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33478 if (bitmap_equal_p (fpubits, cand_fpubits))
33479 return all_fpus[i].name;
33481 /* We must find an entry, or things have gone wrong. */
33482 gcc_unreachable ();
33485 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33486 by the function fndecl. */
33487 void
33488 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33490 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33492 struct cl_target_option *targ_options;
33493 if (target_parts)
33494 targ_options = TREE_TARGET_OPTION (target_parts);
33495 else
33496 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33497 gcc_assert (targ_options);
33499 arm_print_asm_arch_directives (stream, targ_options);
33501 fprintf (stream, "\t.syntax unified\n");
33503 if (TARGET_THUMB)
33505 if (is_called_in_ARM_mode (decl)
33506 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33507 && cfun->is_thunk))
33508 fprintf (stream, "\t.code 32\n");
33509 else if (TARGET_THUMB1)
33510 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33511 else
33512 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33514 else
33515 fprintf (stream, "\t.arm\n");
33517 if (TARGET_POKE_FUNCTION_NAME)
33518 arm_poke_function_name (stream, (const char *) name);
33521 /* If MEM is in the form of [base+offset], extract the two parts
33522 of address and set to BASE and OFFSET, otherwise return false
33523 after clearing BASE and OFFSET. */
33525 static bool
33526 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33528 rtx addr;
33530 gcc_assert (MEM_P (mem));
33532 addr = XEXP (mem, 0);
33534 /* Strip off const from addresses like (const (addr)). */
33535 if (GET_CODE (addr) == CONST)
33536 addr = XEXP (addr, 0);
33538 if (REG_P (addr))
33540 *base = addr;
33541 *offset = const0_rtx;
33542 return true;
33545 if (GET_CODE (addr) == PLUS
33546 && GET_CODE (XEXP (addr, 0)) == REG
33547 && CONST_INT_P (XEXP (addr, 1)))
33549 *base = XEXP (addr, 0);
33550 *offset = XEXP (addr, 1);
33551 return true;
33554 *base = NULL_RTX;
33555 *offset = NULL_RTX;
33557 return false;
33560 /* If INSN is a load or store of address in the form of [base+offset],
33561 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33562 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33563 otherwise return FALSE. */
33565 static bool
33566 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33568 rtx x, dest, src;
33570 gcc_assert (INSN_P (insn));
33571 x = PATTERN (insn);
33572 if (GET_CODE (x) != SET)
33573 return false;
33575 src = SET_SRC (x);
33576 dest = SET_DEST (x);
33577 if (REG_P (src) && MEM_P (dest))
33579 *is_load = false;
33580 extract_base_offset_in_addr (dest, base, offset);
33582 else if (MEM_P (src) && REG_P (dest))
33584 *is_load = true;
33585 extract_base_offset_in_addr (src, base, offset);
33587 else
33588 return false;
33590 return (*base != NULL_RTX && *offset != NULL_RTX);
33593 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33595 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33596 and PRI are only calculated for these instructions. For other instruction,
33597 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33598 instruction fusion can be supported by returning different priorities.
33600 It's important that irrelevant instructions get the largest FUSION_PRI. */
33602 static void
33603 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33604 int *fusion_pri, int *pri)
33606 int tmp, off_val;
33607 bool is_load;
33608 rtx base, offset;
33610 gcc_assert (INSN_P (insn));
33612 tmp = max_pri - 1;
33613 if (!fusion_load_store (insn, &base, &offset, &is_load))
33615 *pri = tmp;
33616 *fusion_pri = tmp;
33617 return;
33620 /* Load goes first. */
33621 if (is_load)
33622 *fusion_pri = tmp - 1;
33623 else
33624 *fusion_pri = tmp - 2;
33626 tmp /= 2;
33628 /* INSN with smaller base register goes first. */
33629 tmp -= ((REGNO (base) & 0xff) << 20);
33631 /* INSN with smaller offset goes first. */
33632 off_val = (int)(INTVAL (offset));
33633 if (off_val >= 0)
33634 tmp -= (off_val & 0xfffff);
33635 else
33636 tmp += ((- off_val) & 0xfffff);
33638 *pri = tmp;
33639 return;
33643 /* Construct and return a PARALLEL RTX vector with elements numbering the
33644 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33645 the vector - from the perspective of the architecture. This does not
33646 line up with GCC's perspective on lane numbers, so we end up with
33647 different masks depending on our target endian-ness. The diagram
33648 below may help. We must draw the distinction when building masks
33649 which select one half of the vector. An instruction selecting
33650 architectural low-lanes for a big-endian target, must be described using
33651 a mask selecting GCC high-lanes.
33653 Big-Endian Little-Endian
33655 GCC 0 1 2 3 3 2 1 0
33656 | x | x | x | x | | x | x | x | x |
33657 Architecture 3 2 1 0 3 2 1 0
33659 Low Mask: { 2, 3 } { 0, 1 }
33660 High Mask: { 0, 1 } { 2, 3 }
33664 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33666 int nunits = GET_MODE_NUNITS (mode);
33667 rtvec v = rtvec_alloc (nunits / 2);
33668 int high_base = nunits / 2;
33669 int low_base = 0;
33670 int base;
33671 rtx t1;
33672 int i;
33674 if (BYTES_BIG_ENDIAN)
33675 base = high ? low_base : high_base;
33676 else
33677 base = high ? high_base : low_base;
33679 for (i = 0; i < nunits / 2; i++)
33680 RTVEC_ELT (v, i) = GEN_INT (base + i);
33682 t1 = gen_rtx_PARALLEL (mode, v);
33683 return t1;
33686 /* Check OP for validity as a PARALLEL RTX vector with elements
33687 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33688 from the perspective of the architecture. See the diagram above
33689 arm_simd_vect_par_cnst_half_p for more details. */
33691 bool
33692 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33693 bool high)
33695 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33696 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33697 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33698 int i = 0;
33700 if (!VECTOR_MODE_P (mode))
33701 return false;
33703 if (count_op != count_ideal)
33704 return false;
33706 for (i = 0; i < count_ideal; i++)
33708 rtx elt_op = XVECEXP (op, 0, i);
33709 rtx elt_ideal = XVECEXP (ideal, 0, i);
33711 if (!CONST_INT_P (elt_op)
33712 || INTVAL (elt_ideal) != INTVAL (elt_op))
33713 return false;
33715 return true;
33718 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33719 in Thumb1. */
33720 static bool
33721 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33722 const_tree)
33724 /* For now, we punt and not handle this for TARGET_THUMB1. */
33725 if (vcall_offset && TARGET_THUMB1)
33726 return false;
33728 /* Otherwise ok. */
33729 return true;
33732 /* Generate RTL for a conditional branch with rtx comparison CODE in
33733 mode CC_MODE. The destination of the unlikely conditional branch
33734 is LABEL_REF. */
33736 void
33737 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33738 rtx label_ref)
33740 rtx x;
33741 x = gen_rtx_fmt_ee (code, VOIDmode,
33742 gen_rtx_REG (cc_mode, CC_REGNUM),
33743 const0_rtx);
33745 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33746 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33747 pc_rtx);
33748 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33751 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33753 For pure-code sections there is no letter code for this attribute, so
33754 output all the section flags numerically when this is needed. */
33756 static bool
33757 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33760 if (flags & SECTION_ARM_PURECODE)
33762 *num = 0x20000000;
33764 if (!(flags & SECTION_DEBUG))
33765 *num |= 0x2;
33766 if (flags & SECTION_EXCLUDE)
33767 *num |= 0x80000000;
33768 if (flags & SECTION_WRITE)
33769 *num |= 0x1;
33770 if (flags & SECTION_CODE)
33771 *num |= 0x4;
33772 if (flags & SECTION_MERGE)
33773 *num |= 0x10;
33774 if (flags & SECTION_STRINGS)
33775 *num |= 0x20;
33776 if (flags & SECTION_TLS)
33777 *num |= 0x400;
33778 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33779 *num |= 0x200;
33781 return true;
33784 return false;
33787 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33789 If pure-code is passed as an option, make sure all functions are in
33790 sections that have the SHF_ARM_PURECODE attribute. */
33792 static section *
33793 arm_function_section (tree decl, enum node_frequency freq,
33794 bool startup, bool exit)
33796 const char * section_name;
33797 section * sec;
33799 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33800 return default_function_section (decl, freq, startup, exit);
33802 if (!target_pure_code)
33803 return default_function_section (decl, freq, startup, exit);
33806 section_name = DECL_SECTION_NAME (decl);
33808 /* If a function is not in a named section then it falls under the 'default'
33809 text section, also known as '.text'. We can preserve previous behavior as
33810 the default text section already has the SHF_ARM_PURECODE section
33811 attribute. */
33812 if (!section_name)
33814 section *default_sec = default_function_section (decl, freq, startup,
33815 exit);
33817 /* If default_sec is not null, then it must be a special section like for
33818 example .text.startup. We set the pure-code attribute and return the
33819 same section to preserve existing behavior. */
33820 if (default_sec)
33821 default_sec->common.flags |= SECTION_ARM_PURECODE;
33822 return default_sec;
33825 /* Otherwise look whether a section has already been created with
33826 'section_name'. */
33827 sec = get_named_section (decl, section_name, 0);
33828 if (!sec)
33829 /* If that is not the case passing NULL as the section's name to
33830 'get_named_section' will create a section with the declaration's
33831 section name. */
33832 sec = get_named_section (decl, NULL, 0);
33834 /* Set the SHF_ARM_PURECODE attribute. */
33835 sec->common.flags |= SECTION_ARM_PURECODE;
33837 return sec;
33840 /* Implements the TARGET_SECTION_FLAGS hook.
33842 If DECL is a function declaration and pure-code is passed as an option
33843 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33844 section's name and RELOC indicates whether the declarations initializer may
33845 contain runtime relocations. */
33847 static unsigned int
33848 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33850 unsigned int flags = default_section_type_flags (decl, name, reloc);
33852 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33853 flags |= SECTION_ARM_PURECODE;
33855 return flags;
33858 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33860 static void
33861 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33862 rtx op0, rtx op1,
33863 rtx *quot_p, rtx *rem_p)
33865 if (mode == SImode)
33866 gcc_assert (!TARGET_IDIV);
33868 scalar_int_mode libval_mode
33869 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33871 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33872 libval_mode, op0, mode, op1, mode);
33874 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33875 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33876 GET_MODE_SIZE (mode));
33878 gcc_assert (quotient);
33879 gcc_assert (remainder);
33881 *quot_p = quotient;
33882 *rem_p = remainder;
33885 /* This function checks for the availability of the coprocessor builtin passed
33886 in BUILTIN for the current target. Returns true if it is available and
33887 false otherwise. If a BUILTIN is passed for which this function has not
33888 been implemented it will cause an exception. */
33890 bool
33891 arm_coproc_builtin_available (enum unspecv builtin)
33893 /* None of these builtins are available in Thumb mode if the target only
33894 supports Thumb-1. */
33895 if (TARGET_THUMB1)
33896 return false;
33898 switch (builtin)
33900 case VUNSPEC_CDP:
33901 case VUNSPEC_LDC:
33902 case VUNSPEC_LDCL:
33903 case VUNSPEC_STC:
33904 case VUNSPEC_STCL:
33905 case VUNSPEC_MCR:
33906 case VUNSPEC_MRC:
33907 if (arm_arch4)
33908 return true;
33909 break;
33910 case VUNSPEC_CDP2:
33911 case VUNSPEC_LDC2:
33912 case VUNSPEC_LDC2L:
33913 case VUNSPEC_STC2:
33914 case VUNSPEC_STC2L:
33915 case VUNSPEC_MCR2:
33916 case VUNSPEC_MRC2:
33917 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33918 ARMv8-{A,M}. */
33919 if (arm_arch5t)
33920 return true;
33921 break;
33922 case VUNSPEC_MCRR:
33923 case VUNSPEC_MRRC:
33924 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33925 ARMv8-{A,M}. */
33926 if (arm_arch6 || arm_arch5te)
33927 return true;
33928 break;
33929 case VUNSPEC_MCRR2:
33930 case VUNSPEC_MRRC2:
33931 if (arm_arch6)
33932 return true;
33933 break;
33934 default:
33935 gcc_unreachable ();
33937 return false;
33940 /* This function returns true if OP is a valid memory operand for the ldc and
33941 stc coprocessor instructions and false otherwise. */
33943 bool
33944 arm_coproc_ldc_stc_legitimate_address (rtx op)
33946 HOST_WIDE_INT range;
33947 /* Has to be a memory operand. */
33948 if (!MEM_P (op))
33949 return false;
33951 op = XEXP (op, 0);
33953 /* We accept registers. */
33954 if (REG_P (op))
33955 return true;
33957 switch GET_CODE (op)
33959 case PLUS:
33961 /* Or registers with an offset. */
33962 if (!REG_P (XEXP (op, 0)))
33963 return false;
33965 op = XEXP (op, 1);
33967 /* The offset must be an immediate though. */
33968 if (!CONST_INT_P (op))
33969 return false;
33971 range = INTVAL (op);
33973 /* Within the range of [-1020,1020]. */
33974 if (!IN_RANGE (range, -1020, 1020))
33975 return false;
33977 /* And a multiple of 4. */
33978 return (range % 4) == 0;
33980 case PRE_INC:
33981 case POST_INC:
33982 case PRE_DEC:
33983 case POST_DEC:
33984 return REG_P (XEXP (op, 0));
33985 default:
33986 gcc_unreachable ();
33988 return false;
33991 /* Return the diagnostic message string if conversion from FROMTYPE to
33992 TOTYPE is not allowed, NULL otherwise. */
33994 static const char *
33995 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33997 if (element_mode (fromtype) != element_mode (totype))
33999 /* Do no allow conversions to/from BFmode scalar types. */
34000 if (TYPE_MODE (fromtype) == BFmode)
34001 return N_("invalid conversion from type %<bfloat16_t%>");
34002 if (TYPE_MODE (totype) == BFmode)
34003 return N_("invalid conversion to type %<bfloat16_t%>");
34006 /* Conversion allowed. */
34007 return NULL;
34010 /* Return the diagnostic message string if the unary operation OP is
34011 not permitted on TYPE, NULL otherwise. */
34013 static const char *
34014 arm_invalid_unary_op (int op, const_tree type)
34016 /* Reject all single-operand operations on BFmode except for &. */
34017 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34018 return N_("operation not permitted on type %<bfloat16_t%>");
34020 /* Operation allowed. */
34021 return NULL;
34024 /* Return the diagnostic message string if the binary operation OP is
34025 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34027 static const char *
34028 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34029 const_tree type2)
34031 /* Reject all 2-operand operations on BFmode. */
34032 if (element_mode (type1) == BFmode
34033 || element_mode (type2) == BFmode)
34034 return N_("operation not permitted on type %<bfloat16_t%>");
34036 /* Operation allowed. */
34037 return NULL;
34040 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34042 In VFPv1, VFP registers could only be accessed in the mode they were
34043 set, so subregs would be invalid there. However, we don't support
34044 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34046 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34047 VFP registers in little-endian order. We can't describe that accurately to
34048 GCC, so avoid taking subregs of such values.
34050 The only exception is going from a 128-bit to a 64-bit type. In that
34051 case the data layout happens to be consistent for big-endian, so we
34052 explicitly allow that case. */
34054 static bool
34055 arm_can_change_mode_class (machine_mode from, machine_mode to,
34056 reg_class_t rclass)
34058 if (TARGET_BIG_END
34059 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34060 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34061 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34062 && reg_classes_intersect_p (VFP_REGS, rclass))
34063 return false;
34064 return true;
34067 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34068 strcpy from constants will be faster. */
34070 static HOST_WIDE_INT
34071 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34073 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34074 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34075 return MAX (align, BITS_PER_WORD * factor);
34076 return align;
34079 /* Emit a speculation barrier on target architectures that do not have
34080 DSB/ISB directly. Such systems probably don't need a barrier
34081 themselves, but if the code is ever run on a later architecture, it
34082 might become a problem. */
34083 void
34084 arm_emit_speculation_barrier_function ()
34086 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34089 /* Have we recorded an explicit access to the Q bit of APSR?. */
34090 bool
34091 arm_q_bit_access (void)
34093 if (cfun && cfun->decl)
34094 return lookup_attribute ("acle qbit",
34095 DECL_ATTRIBUTES (cfun->decl));
34096 return true;
34099 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34100 bool
34101 arm_ge_bits_access (void)
34103 if (cfun && cfun->decl)
34104 return lookup_attribute ("acle gebits",
34105 DECL_ATTRIBUTES (cfun->decl));
34106 return true;
34109 /* NULL if insn INSN is valid within a low-overhead loop.
34110 Otherwise return why doloop cannot be applied. */
34112 static const char *
34113 arm_invalid_within_doloop (const rtx_insn *insn)
34115 if (!TARGET_HAVE_LOB)
34116 return default_invalid_within_doloop (insn);
34118 if (CALL_P (insn))
34119 return "Function call in the loop.";
34121 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34122 return "LR is used inside loop.";
34124 return NULL;
34127 bool
34128 arm_target_insn_ok_for_lob (rtx insn)
34130 basic_block bb = BLOCK_FOR_INSN (insn);
34131 /* Make sure the basic block of the target insn is a simple latch
34132 having as single predecessor and successor the body of the loop
34133 itself. Only simple loops with a single basic block as body are
34134 supported for 'low over head loop' making sure that LE target is
34135 above LE itself in the generated code. */
34137 return single_succ_p (bb)
34138 && single_pred_p (bb)
34139 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34140 && contains_no_active_insn_p (bb);
34143 #if CHECKING_P
34144 namespace selftest {
34146 /* Scan the static data tables generated by parsecpu.awk looking for
34147 potential issues with the data. We primarily check for
34148 inconsistencies in the option extensions at present (extensions
34149 that duplicate others but aren't marked as aliases). Furthermore,
34150 for correct canonicalization later options must never be a subset
34151 of an earlier option. Any extension should also only specify other
34152 feature bits and never an architecture bit. The architecture is inferred
34153 from the declaration of the extension. */
34154 static void
34155 arm_test_cpu_arch_data (void)
34157 const arch_option *arch;
34158 const cpu_option *cpu;
34159 auto_sbitmap target_isa (isa_num_bits);
34160 auto_sbitmap isa1 (isa_num_bits);
34161 auto_sbitmap isa2 (isa_num_bits);
34163 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34165 const cpu_arch_extension *ext1, *ext2;
34167 if (arch->common.extensions == NULL)
34168 continue;
34170 arm_initialize_isa (target_isa, arch->common.isa_bits);
34172 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34174 if (ext1->alias)
34175 continue;
34177 arm_initialize_isa (isa1, ext1->isa_bits);
34178 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34180 if (ext2->alias || ext1->remove != ext2->remove)
34181 continue;
34183 arm_initialize_isa (isa2, ext2->isa_bits);
34184 /* If the option is a subset of the parent option, it doesn't
34185 add anything and so isn't useful. */
34186 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34188 /* If the extension specifies any architectural bits then
34189 disallow it. Extensions should only specify feature bits. */
34190 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34195 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34197 const cpu_arch_extension *ext1, *ext2;
34199 if (cpu->common.extensions == NULL)
34200 continue;
34202 arm_initialize_isa (target_isa, arch->common.isa_bits);
34204 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34206 if (ext1->alias)
34207 continue;
34209 arm_initialize_isa (isa1, ext1->isa_bits);
34210 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34212 if (ext2->alias || ext1->remove != ext2->remove)
34213 continue;
34215 arm_initialize_isa (isa2, ext2->isa_bits);
34216 /* If the option is a subset of the parent option, it doesn't
34217 add anything and so isn't useful. */
34218 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34220 /* If the extension specifies any architectural bits then
34221 disallow it. Extensions should only specify feature bits. */
34222 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34228 /* Scan the static data tables generated by parsecpu.awk looking for
34229 potential issues with the data. Here we check for consistency between the
34230 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34231 a feature bit that is not defined by any FPU flag. */
34232 static void
34233 arm_test_fpu_data (void)
34235 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34236 auto_sbitmap fpubits (isa_num_bits);
34237 auto_sbitmap tmpset (isa_num_bits);
34239 static const enum isa_feature fpu_bitlist_internal[]
34240 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34241 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34243 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34245 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34246 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34247 bitmap_clear (isa_all_fpubits_internal);
34248 bitmap_copy (isa_all_fpubits_internal, tmpset);
34251 if (!bitmap_empty_p (isa_all_fpubits_internal))
34253 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34254 " group that are not defined by any FPU.\n"
34255 " Check your arm-cpus.in.\n");
34256 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34260 static void
34261 arm_run_selftests (void)
34263 arm_test_cpu_arch_data ();
34264 arm_test_fpu_data ();
34266 } /* Namespace selftest. */
34268 #undef TARGET_RUN_TARGET_SELFTESTS
34269 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34270 #endif /* CHECKING_P */
34272 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34273 global variable based guard use the default else
34274 return a null tree. */
34275 static tree
34276 arm_stack_protect_guard (void)
34278 if (arm_stack_protector_guard == SSP_GLOBAL)
34279 return default_stack_protect_guard ();
34281 return NULL_TREE;
34284 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34285 Unlike the arm version, we do NOT implement asm flag outputs. */
34287 rtx_insn *
34288 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34289 vec<machine_mode> & /*input_modes*/,
34290 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34291 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34293 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34294 if (startswith (constraints[i], "=@cc"))
34296 sorry ("%<asm%> flags not supported in thumb1 mode");
34297 break;
34299 return NULL;
34302 /* Generate code to enable conditional branches in functions over 1 MiB.
34303 Parameters are:
34304 operands: is the operands list of the asm insn (see arm_cond_branch or
34305 arm_cond_branch_reversed).
34306 pos_label: is an index into the operands array where operands[pos_label] is
34307 the asm label of the final jump destination.
34308 dest: is a string which is used to generate the asm label of the intermediate
34309 destination
34310 branch_format: is a string denoting the intermediate branch format, e.g.
34311 "beq", "bne", etc. */
34313 const char *
34314 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34315 const char * branch_format)
34317 rtx_code_label * tmp_label = gen_label_rtx ();
34318 char label_buf[256];
34319 char buffer[128];
34320 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34321 CODE_LABEL_NUMBER (tmp_label));
34322 const char *label_ptr = arm_strip_name_encoding (label_buf);
34323 rtx dest_label = operands[pos_label];
34324 operands[pos_label] = tmp_label;
34326 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34327 output_asm_insn (buffer, operands);
34329 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34330 operands[pos_label] = dest_label;
34331 output_asm_insn (buffer, operands);
34332 return "";
34335 /* If given mode matches, load from memory to LO_REGS.
34336 (i.e [Rn], Rn <= LO_REGS). */
34337 enum reg_class
34338 arm_mode_base_reg_class (machine_mode mode)
34340 if (TARGET_HAVE_MVE
34341 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34342 return LO_REGS;
34344 return MODE_BASE_REG_REG_CLASS (mode);
34347 struct gcc_target targetm = TARGET_INITIALIZER;
34349 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34351 opt_machine_mode
34352 arm_get_mask_mode (machine_mode mode)
34354 if (TARGET_HAVE_MVE)
34355 return arm_mode_to_pred_mode (mode);
34357 return default_get_mask_mode (mode);
34360 #include "gt-arm.h"