[ARM] Fix ICE in Armv8-M Security Extensions code
[official-gcc.git] / gcc / config / arm / arm.c
blob106e3edce0d6f2518eb391c436c5213a78d1275b
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
78 void (*arm_lang_output_object_attributes_hook)(void);
80 struct four_ints
82 int i[4];
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
622 #endif /* ARM_UNWIND_INFO */
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack;
805 static char * minipool_startobj;
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped = 5;
811 extern FILE * asm_out_file;
813 /* True if we are currently building a constant table. */
814 int making_const_table;
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune = TARGET_CPU_arm_none;
819 /* The current tuning set. */
820 const struct tune_params *current_tune;
822 /* Which floating point hardware to schedule for. */
823 int arm_fpu_attr;
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label[14];
827 static int thumb_call_reg_needed;
829 /* The bits in this mask specify which instruction scheduling options should
830 be used. */
831 unsigned int tune_flags = 0;
833 /* The highest ARM architecture version supported by the
834 target. */
835 enum base_architecture arm_base_arch = BASE_ARCH_0;
837 /* Active target architecture and tuning. */
839 struct arm_build_target arm_active_target;
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
845 int arm_arch3m = 0;
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
848 int arm_arch4 = 0;
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
851 int arm_arch4t = 0;
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
854 int arm_arch5 = 0;
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
857 int arm_arch5e = 0;
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
860 int arm_arch5te = 0;
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
863 int arm_arch6 = 0;
865 /* Nonzero if this chip supports the ARM 6K extensions. */
866 int arm_arch6k = 0;
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
869 int arm_arch6kz = 0;
871 /* Nonzero if instructions present in ARMv6-M can be used. */
872 int arm_arch6m = 0;
874 /* Nonzero if this chip supports the ARM 7 extensions. */
875 int arm_arch7 = 0;
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae = 0;
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm = 0;
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
884 int arm_arch7em = 0;
886 /* Nonzero if instructions present in ARMv8 can be used. */
887 int arm_arch8 = 0;
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
890 int arm_arch8_1 = 0;
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
893 int arm_arch8_2 = 0;
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
896 Architecture 8.2. */
897 int arm_fp16_inst = 0;
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched = 0;
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm = 0;
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt = 0;
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2 = 0;
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale = 0;
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale = 0;
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf = 0;
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9 = 0;
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
925 preprocessor.
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork = 0;
931 /* Nonzero if chip supports Thumb 1. */
932 int arm_arch_thumb1;
934 /* Nonzero if chip supports Thumb 2. */
935 int arm_arch_thumb2;
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv;
939 int arm_arch_thumb_hwdiv;
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce;
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits = 0;
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool = false;
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register = INVALID_REGNUM;
954 enum arm_pcs arm_pcs_default;
956 /* For an explanation of these variables, see final_prescan_insn below. */
957 int arm_ccfsm_state;
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc;
961 rtx arm_target_insn;
962 int arm_target_label;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count = 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask = 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen = 0;
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc = 0;
974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
975 int arm_arch_dotprod = 0;
977 /* Nonzero if chip supports the ARMv8-M security extensions. */
978 int arm_arch_cmse = 0;
980 /* Nonzero if the core has a very small, high-latency, multiply unit. */
981 int arm_m_profile_small_mul = 0;
983 /* The condition codes of the ARM, and the inverse function. */
984 static const char * const arm_condition_codes[] =
986 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
987 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
990 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
991 int arm_regs_in_sequence[] =
993 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
996 #define ARM_LSL_NAME "lsl"
997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
999 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1003 /* Initialization code. */
1005 struct cpu_tune
1007 enum processor_type scheduler;
1008 unsigned int tune_flags;
1009 const struct tune_params *tune;
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1015 num_slots, \
1016 l1_size, \
1017 l1_line_size \
1020 /* arm generic vectorizer costs. */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 1, /* vec_unalign_load_cost. */
1031 1, /* vec_unalign_store_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1038 #include "aarch-cost-tables.h"
1042 const struct cpu_cost_table cortexa9_extra_costs =
1044 /* ALU */
1046 0, /* arith. */
1047 0, /* logical. */
1048 0, /* shift. */
1049 COSTS_N_INSNS (1), /* shift_reg. */
1050 COSTS_N_INSNS (1), /* arith_shift. */
1051 COSTS_N_INSNS (2), /* arith_shift_reg. */
1052 0, /* log_shift. */
1053 COSTS_N_INSNS (1), /* log_shift_reg. */
1054 COSTS_N_INSNS (1), /* extend. */
1055 COSTS_N_INSNS (2), /* extend_arith. */
1056 COSTS_N_INSNS (1), /* bfi. */
1057 COSTS_N_INSNS (1), /* bfx. */
1058 0, /* clz. */
1059 0, /* rev. */
1060 0, /* non_exec. */
1061 true /* non_exec_costs_exec. */
1064 /* MULT SImode */
1066 COSTS_N_INSNS (3), /* simple. */
1067 COSTS_N_INSNS (3), /* flag_setting. */
1068 COSTS_N_INSNS (2), /* extend. */
1069 COSTS_N_INSNS (3), /* add. */
1070 COSTS_N_INSNS (2), /* extend_add. */
1071 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1073 /* MULT DImode */
1075 0, /* simple (N/A). */
1076 0, /* flag_setting (N/A). */
1077 COSTS_N_INSNS (4), /* extend. */
1078 0, /* add (N/A). */
1079 COSTS_N_INSNS (4), /* extend_add. */
1080 0 /* idiv (N/A). */
1083 /* LD/ST */
1085 COSTS_N_INSNS (2), /* load. */
1086 COSTS_N_INSNS (2), /* load_sign_extend. */
1087 COSTS_N_INSNS (2), /* ldrd. */
1088 COSTS_N_INSNS (2), /* ldm_1st. */
1089 1, /* ldm_regs_per_insn_1st. */
1090 2, /* ldm_regs_per_insn_subsequent. */
1091 COSTS_N_INSNS (5), /* loadf. */
1092 COSTS_N_INSNS (5), /* loadd. */
1093 COSTS_N_INSNS (1), /* load_unaligned. */
1094 COSTS_N_INSNS (2), /* store. */
1095 COSTS_N_INSNS (2), /* strd. */
1096 COSTS_N_INSNS (2), /* stm_1st. */
1097 1, /* stm_regs_per_insn_1st. */
1098 2, /* stm_regs_per_insn_subsequent. */
1099 COSTS_N_INSNS (1), /* storef. */
1100 COSTS_N_INSNS (1), /* stored. */
1101 COSTS_N_INSNS (1), /* store_unaligned. */
1102 COSTS_N_INSNS (1), /* loadv. */
1103 COSTS_N_INSNS (1) /* storev. */
1106 /* FP SFmode */
1108 COSTS_N_INSNS (14), /* div. */
1109 COSTS_N_INSNS (4), /* mult. */
1110 COSTS_N_INSNS (7), /* mult_addsub. */
1111 COSTS_N_INSNS (30), /* fma. */
1112 COSTS_N_INSNS (3), /* addsub. */
1113 COSTS_N_INSNS (1), /* fpconst. */
1114 COSTS_N_INSNS (1), /* neg. */
1115 COSTS_N_INSNS (3), /* compare. */
1116 COSTS_N_INSNS (3), /* widen. */
1117 COSTS_N_INSNS (3), /* narrow. */
1118 COSTS_N_INSNS (3), /* toint. */
1119 COSTS_N_INSNS (3), /* fromint. */
1120 COSTS_N_INSNS (3) /* roundint. */
1122 /* FP DFmode */
1124 COSTS_N_INSNS (24), /* div. */
1125 COSTS_N_INSNS (5), /* mult. */
1126 COSTS_N_INSNS (8), /* mult_addsub. */
1127 COSTS_N_INSNS (30), /* fma. */
1128 COSTS_N_INSNS (3), /* addsub. */
1129 COSTS_N_INSNS (1), /* fpconst. */
1130 COSTS_N_INSNS (1), /* neg. */
1131 COSTS_N_INSNS (3), /* compare. */
1132 COSTS_N_INSNS (3), /* widen. */
1133 COSTS_N_INSNS (3), /* narrow. */
1134 COSTS_N_INSNS (3), /* toint. */
1135 COSTS_N_INSNS (3), /* fromint. */
1136 COSTS_N_INSNS (3) /* roundint. */
1139 /* Vector */
1141 COSTS_N_INSNS (1) /* alu. */
1145 const struct cpu_cost_table cortexa8_extra_costs =
1147 /* ALU */
1149 0, /* arith. */
1150 0, /* logical. */
1151 COSTS_N_INSNS (1), /* shift. */
1152 0, /* shift_reg. */
1153 COSTS_N_INSNS (1), /* arith_shift. */
1154 0, /* arith_shift_reg. */
1155 COSTS_N_INSNS (1), /* log_shift. */
1156 0, /* log_shift_reg. */
1157 0, /* extend. */
1158 0, /* extend_arith. */
1159 0, /* bfi. */
1160 0, /* bfx. */
1161 0, /* clz. */
1162 0, /* rev. */
1163 0, /* non_exec. */
1164 true /* non_exec_costs_exec. */
1167 /* MULT SImode */
1169 COSTS_N_INSNS (1), /* simple. */
1170 COSTS_N_INSNS (1), /* flag_setting. */
1171 COSTS_N_INSNS (1), /* extend. */
1172 COSTS_N_INSNS (1), /* add. */
1173 COSTS_N_INSNS (1), /* extend_add. */
1174 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1176 /* MULT DImode */
1178 0, /* simple (N/A). */
1179 0, /* flag_setting (N/A). */
1180 COSTS_N_INSNS (2), /* extend. */
1181 0, /* add (N/A). */
1182 COSTS_N_INSNS (2), /* extend_add. */
1183 0 /* idiv (N/A). */
1186 /* LD/ST */
1188 COSTS_N_INSNS (1), /* load. */
1189 COSTS_N_INSNS (1), /* load_sign_extend. */
1190 COSTS_N_INSNS (1), /* ldrd. */
1191 COSTS_N_INSNS (1), /* ldm_1st. */
1192 1, /* ldm_regs_per_insn_1st. */
1193 2, /* ldm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (1), /* loadf. */
1195 COSTS_N_INSNS (1), /* loadd. */
1196 COSTS_N_INSNS (1), /* load_unaligned. */
1197 COSTS_N_INSNS (1), /* store. */
1198 COSTS_N_INSNS (1), /* strd. */
1199 COSTS_N_INSNS (1), /* stm_1st. */
1200 1, /* stm_regs_per_insn_1st. */
1201 2, /* stm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (1), /* storef. */
1203 COSTS_N_INSNS (1), /* stored. */
1204 COSTS_N_INSNS (1), /* store_unaligned. */
1205 COSTS_N_INSNS (1), /* loadv. */
1206 COSTS_N_INSNS (1) /* storev. */
1209 /* FP SFmode */
1211 COSTS_N_INSNS (36), /* div. */
1212 COSTS_N_INSNS (11), /* mult. */
1213 COSTS_N_INSNS (20), /* mult_addsub. */
1214 COSTS_N_INSNS (30), /* fma. */
1215 COSTS_N_INSNS (9), /* addsub. */
1216 COSTS_N_INSNS (3), /* fpconst. */
1217 COSTS_N_INSNS (3), /* neg. */
1218 COSTS_N_INSNS (6), /* compare. */
1219 COSTS_N_INSNS (4), /* widen. */
1220 COSTS_N_INSNS (4), /* narrow. */
1221 COSTS_N_INSNS (8), /* toint. */
1222 COSTS_N_INSNS (8), /* fromint. */
1223 COSTS_N_INSNS (8) /* roundint. */
1225 /* FP DFmode */
1227 COSTS_N_INSNS (64), /* div. */
1228 COSTS_N_INSNS (16), /* mult. */
1229 COSTS_N_INSNS (25), /* mult_addsub. */
1230 COSTS_N_INSNS (30), /* fma. */
1231 COSTS_N_INSNS (9), /* addsub. */
1232 COSTS_N_INSNS (3), /* fpconst. */
1233 COSTS_N_INSNS (3), /* neg. */
1234 COSTS_N_INSNS (6), /* compare. */
1235 COSTS_N_INSNS (6), /* widen. */
1236 COSTS_N_INSNS (6), /* narrow. */
1237 COSTS_N_INSNS (8), /* toint. */
1238 COSTS_N_INSNS (8), /* fromint. */
1239 COSTS_N_INSNS (8) /* roundint. */
1242 /* Vector */
1244 COSTS_N_INSNS (1) /* alu. */
1248 const struct cpu_cost_table cortexa5_extra_costs =
1250 /* ALU */
1252 0, /* arith. */
1253 0, /* logical. */
1254 COSTS_N_INSNS (1), /* shift. */
1255 COSTS_N_INSNS (1), /* shift_reg. */
1256 COSTS_N_INSNS (1), /* arith_shift. */
1257 COSTS_N_INSNS (1), /* arith_shift_reg. */
1258 COSTS_N_INSNS (1), /* log_shift. */
1259 COSTS_N_INSNS (1), /* log_shift_reg. */
1260 COSTS_N_INSNS (1), /* extend. */
1261 COSTS_N_INSNS (1), /* extend_arith. */
1262 COSTS_N_INSNS (1), /* bfi. */
1263 COSTS_N_INSNS (1), /* bfx. */
1264 COSTS_N_INSNS (1), /* clz. */
1265 COSTS_N_INSNS (1), /* rev. */
1266 0, /* non_exec. */
1267 true /* non_exec_costs_exec. */
1271 /* MULT SImode */
1273 0, /* simple. */
1274 COSTS_N_INSNS (1), /* flag_setting. */
1275 COSTS_N_INSNS (1), /* extend. */
1276 COSTS_N_INSNS (1), /* add. */
1277 COSTS_N_INSNS (1), /* extend_add. */
1278 COSTS_N_INSNS (7) /* idiv. */
1280 /* MULT DImode */
1282 0, /* simple (N/A). */
1283 0, /* flag_setting (N/A). */
1284 COSTS_N_INSNS (1), /* extend. */
1285 0, /* add. */
1286 COSTS_N_INSNS (2), /* extend_add. */
1287 0 /* idiv (N/A). */
1290 /* LD/ST */
1292 COSTS_N_INSNS (1), /* load. */
1293 COSTS_N_INSNS (1), /* load_sign_extend. */
1294 COSTS_N_INSNS (6), /* ldrd. */
1295 COSTS_N_INSNS (1), /* ldm_1st. */
1296 1, /* ldm_regs_per_insn_1st. */
1297 2, /* ldm_regs_per_insn_subsequent. */
1298 COSTS_N_INSNS (2), /* loadf. */
1299 COSTS_N_INSNS (4), /* loadd. */
1300 COSTS_N_INSNS (1), /* load_unaligned. */
1301 COSTS_N_INSNS (1), /* store. */
1302 COSTS_N_INSNS (3), /* strd. */
1303 COSTS_N_INSNS (1), /* stm_1st. */
1304 1, /* stm_regs_per_insn_1st. */
1305 2, /* stm_regs_per_insn_subsequent. */
1306 COSTS_N_INSNS (2), /* storef. */
1307 COSTS_N_INSNS (2), /* stored. */
1308 COSTS_N_INSNS (1), /* store_unaligned. */
1309 COSTS_N_INSNS (1), /* loadv. */
1310 COSTS_N_INSNS (1) /* storev. */
1313 /* FP SFmode */
1315 COSTS_N_INSNS (15), /* div. */
1316 COSTS_N_INSNS (3), /* mult. */
1317 COSTS_N_INSNS (7), /* mult_addsub. */
1318 COSTS_N_INSNS (7), /* fma. */
1319 COSTS_N_INSNS (3), /* addsub. */
1320 COSTS_N_INSNS (3), /* fpconst. */
1321 COSTS_N_INSNS (3), /* neg. */
1322 COSTS_N_INSNS (3), /* compare. */
1323 COSTS_N_INSNS (3), /* widen. */
1324 COSTS_N_INSNS (3), /* narrow. */
1325 COSTS_N_INSNS (3), /* toint. */
1326 COSTS_N_INSNS (3), /* fromint. */
1327 COSTS_N_INSNS (3) /* roundint. */
1329 /* FP DFmode */
1331 COSTS_N_INSNS (30), /* div. */
1332 COSTS_N_INSNS (6), /* mult. */
1333 COSTS_N_INSNS (10), /* mult_addsub. */
1334 COSTS_N_INSNS (7), /* fma. */
1335 COSTS_N_INSNS (3), /* addsub. */
1336 COSTS_N_INSNS (3), /* fpconst. */
1337 COSTS_N_INSNS (3), /* neg. */
1338 COSTS_N_INSNS (3), /* compare. */
1339 COSTS_N_INSNS (3), /* widen. */
1340 COSTS_N_INSNS (3), /* narrow. */
1341 COSTS_N_INSNS (3), /* toint. */
1342 COSTS_N_INSNS (3), /* fromint. */
1343 COSTS_N_INSNS (3) /* roundint. */
1346 /* Vector */
1348 COSTS_N_INSNS (1) /* alu. */
1353 const struct cpu_cost_table cortexa7_extra_costs =
1355 /* ALU */
1357 0, /* arith. */
1358 0, /* logical. */
1359 COSTS_N_INSNS (1), /* shift. */
1360 COSTS_N_INSNS (1), /* shift_reg. */
1361 COSTS_N_INSNS (1), /* arith_shift. */
1362 COSTS_N_INSNS (1), /* arith_shift_reg. */
1363 COSTS_N_INSNS (1), /* log_shift. */
1364 COSTS_N_INSNS (1), /* log_shift_reg. */
1365 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (1), /* extend_arith. */
1367 COSTS_N_INSNS (1), /* bfi. */
1368 COSTS_N_INSNS (1), /* bfx. */
1369 COSTS_N_INSNS (1), /* clz. */
1370 COSTS_N_INSNS (1), /* rev. */
1371 0, /* non_exec. */
1372 true /* non_exec_costs_exec. */
1376 /* MULT SImode */
1378 0, /* simple. */
1379 COSTS_N_INSNS (1), /* flag_setting. */
1380 COSTS_N_INSNS (1), /* extend. */
1381 COSTS_N_INSNS (1), /* add. */
1382 COSTS_N_INSNS (1), /* extend_add. */
1383 COSTS_N_INSNS (7) /* idiv. */
1385 /* MULT DImode */
1387 0, /* simple (N/A). */
1388 0, /* flag_setting (N/A). */
1389 COSTS_N_INSNS (1), /* extend. */
1390 0, /* add. */
1391 COSTS_N_INSNS (2), /* extend_add. */
1392 0 /* idiv (N/A). */
1395 /* LD/ST */
1397 COSTS_N_INSNS (1), /* load. */
1398 COSTS_N_INSNS (1), /* load_sign_extend. */
1399 COSTS_N_INSNS (3), /* ldrd. */
1400 COSTS_N_INSNS (1), /* ldm_1st. */
1401 1, /* ldm_regs_per_insn_1st. */
1402 2, /* ldm_regs_per_insn_subsequent. */
1403 COSTS_N_INSNS (2), /* loadf. */
1404 COSTS_N_INSNS (2), /* loadd. */
1405 COSTS_N_INSNS (1), /* load_unaligned. */
1406 COSTS_N_INSNS (1), /* store. */
1407 COSTS_N_INSNS (3), /* strd. */
1408 COSTS_N_INSNS (1), /* stm_1st. */
1409 1, /* stm_regs_per_insn_1st. */
1410 2, /* stm_regs_per_insn_subsequent. */
1411 COSTS_N_INSNS (2), /* storef. */
1412 COSTS_N_INSNS (2), /* stored. */
1413 COSTS_N_INSNS (1), /* store_unaligned. */
1414 COSTS_N_INSNS (1), /* loadv. */
1415 COSTS_N_INSNS (1) /* storev. */
1418 /* FP SFmode */
1420 COSTS_N_INSNS (15), /* div. */
1421 COSTS_N_INSNS (3), /* mult. */
1422 COSTS_N_INSNS (7), /* mult_addsub. */
1423 COSTS_N_INSNS (7), /* fma. */
1424 COSTS_N_INSNS (3), /* addsub. */
1425 COSTS_N_INSNS (3), /* fpconst. */
1426 COSTS_N_INSNS (3), /* neg. */
1427 COSTS_N_INSNS (3), /* compare. */
1428 COSTS_N_INSNS (3), /* widen. */
1429 COSTS_N_INSNS (3), /* narrow. */
1430 COSTS_N_INSNS (3), /* toint. */
1431 COSTS_N_INSNS (3), /* fromint. */
1432 COSTS_N_INSNS (3) /* roundint. */
1434 /* FP DFmode */
1436 COSTS_N_INSNS (30), /* div. */
1437 COSTS_N_INSNS (6), /* mult. */
1438 COSTS_N_INSNS (10), /* mult_addsub. */
1439 COSTS_N_INSNS (7), /* fma. */
1440 COSTS_N_INSNS (3), /* addsub. */
1441 COSTS_N_INSNS (3), /* fpconst. */
1442 COSTS_N_INSNS (3), /* neg. */
1443 COSTS_N_INSNS (3), /* compare. */
1444 COSTS_N_INSNS (3), /* widen. */
1445 COSTS_N_INSNS (3), /* narrow. */
1446 COSTS_N_INSNS (3), /* toint. */
1447 COSTS_N_INSNS (3), /* fromint. */
1448 COSTS_N_INSNS (3) /* roundint. */
1451 /* Vector */
1453 COSTS_N_INSNS (1) /* alu. */
1457 const struct cpu_cost_table cortexa12_extra_costs =
1459 /* ALU */
1461 0, /* arith. */
1462 0, /* logical. */
1463 0, /* shift. */
1464 COSTS_N_INSNS (1), /* shift_reg. */
1465 COSTS_N_INSNS (1), /* arith_shift. */
1466 COSTS_N_INSNS (1), /* arith_shift_reg. */
1467 COSTS_N_INSNS (1), /* log_shift. */
1468 COSTS_N_INSNS (1), /* log_shift_reg. */
1469 0, /* extend. */
1470 COSTS_N_INSNS (1), /* extend_arith. */
1471 0, /* bfi. */
1472 COSTS_N_INSNS (1), /* bfx. */
1473 COSTS_N_INSNS (1), /* clz. */
1474 COSTS_N_INSNS (1), /* rev. */
1475 0, /* non_exec. */
1476 true /* non_exec_costs_exec. */
1478 /* MULT SImode */
1481 COSTS_N_INSNS (2), /* simple. */
1482 COSTS_N_INSNS (3), /* flag_setting. */
1483 COSTS_N_INSNS (2), /* extend. */
1484 COSTS_N_INSNS (3), /* add. */
1485 COSTS_N_INSNS (2), /* extend_add. */
1486 COSTS_N_INSNS (18) /* idiv. */
1488 /* MULT DImode */
1490 0, /* simple (N/A). */
1491 0, /* flag_setting (N/A). */
1492 COSTS_N_INSNS (3), /* extend. */
1493 0, /* add (N/A). */
1494 COSTS_N_INSNS (3), /* extend_add. */
1495 0 /* idiv (N/A). */
1498 /* LD/ST */
1500 COSTS_N_INSNS (3), /* load. */
1501 COSTS_N_INSNS (3), /* load_sign_extend. */
1502 COSTS_N_INSNS (3), /* ldrd. */
1503 COSTS_N_INSNS (3), /* ldm_1st. */
1504 1, /* ldm_regs_per_insn_1st. */
1505 2, /* ldm_regs_per_insn_subsequent. */
1506 COSTS_N_INSNS (3), /* loadf. */
1507 COSTS_N_INSNS (3), /* loadd. */
1508 0, /* load_unaligned. */
1509 0, /* store. */
1510 0, /* strd. */
1511 0, /* stm_1st. */
1512 1, /* stm_regs_per_insn_1st. */
1513 2, /* stm_regs_per_insn_subsequent. */
1514 COSTS_N_INSNS (2), /* storef. */
1515 COSTS_N_INSNS (2), /* stored. */
1516 0, /* store_unaligned. */
1517 COSTS_N_INSNS (1), /* loadv. */
1518 COSTS_N_INSNS (1) /* storev. */
1521 /* FP SFmode */
1523 COSTS_N_INSNS (17), /* div. */
1524 COSTS_N_INSNS (4), /* mult. */
1525 COSTS_N_INSNS (8), /* mult_addsub. */
1526 COSTS_N_INSNS (8), /* fma. */
1527 COSTS_N_INSNS (4), /* addsub. */
1528 COSTS_N_INSNS (2), /* fpconst. */
1529 COSTS_N_INSNS (2), /* neg. */
1530 COSTS_N_INSNS (2), /* compare. */
1531 COSTS_N_INSNS (4), /* widen. */
1532 COSTS_N_INSNS (4), /* narrow. */
1533 COSTS_N_INSNS (4), /* toint. */
1534 COSTS_N_INSNS (4), /* fromint. */
1535 COSTS_N_INSNS (4) /* roundint. */
1537 /* FP DFmode */
1539 COSTS_N_INSNS (31), /* div. */
1540 COSTS_N_INSNS (4), /* mult. */
1541 COSTS_N_INSNS (8), /* mult_addsub. */
1542 COSTS_N_INSNS (8), /* fma. */
1543 COSTS_N_INSNS (4), /* addsub. */
1544 COSTS_N_INSNS (2), /* fpconst. */
1545 COSTS_N_INSNS (2), /* neg. */
1546 COSTS_N_INSNS (2), /* compare. */
1547 COSTS_N_INSNS (4), /* widen. */
1548 COSTS_N_INSNS (4), /* narrow. */
1549 COSTS_N_INSNS (4), /* toint. */
1550 COSTS_N_INSNS (4), /* fromint. */
1551 COSTS_N_INSNS (4) /* roundint. */
1554 /* Vector */
1556 COSTS_N_INSNS (1) /* alu. */
1560 const struct cpu_cost_table cortexa15_extra_costs =
1562 /* ALU */
1564 0, /* arith. */
1565 0, /* logical. */
1566 0, /* shift. */
1567 0, /* shift_reg. */
1568 COSTS_N_INSNS (1), /* arith_shift. */
1569 COSTS_N_INSNS (1), /* arith_shift_reg. */
1570 COSTS_N_INSNS (1), /* log_shift. */
1571 COSTS_N_INSNS (1), /* log_shift_reg. */
1572 0, /* extend. */
1573 COSTS_N_INSNS (1), /* extend_arith. */
1574 COSTS_N_INSNS (1), /* bfi. */
1575 0, /* bfx. */
1576 0, /* clz. */
1577 0, /* rev. */
1578 0, /* non_exec. */
1579 true /* non_exec_costs_exec. */
1581 /* MULT SImode */
1584 COSTS_N_INSNS (2), /* simple. */
1585 COSTS_N_INSNS (3), /* flag_setting. */
1586 COSTS_N_INSNS (2), /* extend. */
1587 COSTS_N_INSNS (2), /* add. */
1588 COSTS_N_INSNS (2), /* extend_add. */
1589 COSTS_N_INSNS (18) /* idiv. */
1591 /* MULT DImode */
1593 0, /* simple (N/A). */
1594 0, /* flag_setting (N/A). */
1595 COSTS_N_INSNS (3), /* extend. */
1596 0, /* add (N/A). */
1597 COSTS_N_INSNS (3), /* extend_add. */
1598 0 /* idiv (N/A). */
1601 /* LD/ST */
1603 COSTS_N_INSNS (3), /* load. */
1604 COSTS_N_INSNS (3), /* load_sign_extend. */
1605 COSTS_N_INSNS (3), /* ldrd. */
1606 COSTS_N_INSNS (4), /* ldm_1st. */
1607 1, /* ldm_regs_per_insn_1st. */
1608 2, /* ldm_regs_per_insn_subsequent. */
1609 COSTS_N_INSNS (4), /* loadf. */
1610 COSTS_N_INSNS (4), /* loadd. */
1611 0, /* load_unaligned. */
1612 0, /* store. */
1613 0, /* strd. */
1614 COSTS_N_INSNS (1), /* stm_1st. */
1615 1, /* stm_regs_per_insn_1st. */
1616 2, /* stm_regs_per_insn_subsequent. */
1617 0, /* storef. */
1618 0, /* stored. */
1619 0, /* store_unaligned. */
1620 COSTS_N_INSNS (1), /* loadv. */
1621 COSTS_N_INSNS (1) /* storev. */
1624 /* FP SFmode */
1626 COSTS_N_INSNS (17), /* div. */
1627 COSTS_N_INSNS (4), /* mult. */
1628 COSTS_N_INSNS (8), /* mult_addsub. */
1629 COSTS_N_INSNS (8), /* fma. */
1630 COSTS_N_INSNS (4), /* addsub. */
1631 COSTS_N_INSNS (2), /* fpconst. */
1632 COSTS_N_INSNS (2), /* neg. */
1633 COSTS_N_INSNS (5), /* compare. */
1634 COSTS_N_INSNS (4), /* widen. */
1635 COSTS_N_INSNS (4), /* narrow. */
1636 COSTS_N_INSNS (4), /* toint. */
1637 COSTS_N_INSNS (4), /* fromint. */
1638 COSTS_N_INSNS (4) /* roundint. */
1640 /* FP DFmode */
1642 COSTS_N_INSNS (31), /* div. */
1643 COSTS_N_INSNS (4), /* mult. */
1644 COSTS_N_INSNS (8), /* mult_addsub. */
1645 COSTS_N_INSNS (8), /* fma. */
1646 COSTS_N_INSNS (4), /* addsub. */
1647 COSTS_N_INSNS (2), /* fpconst. */
1648 COSTS_N_INSNS (2), /* neg. */
1649 COSTS_N_INSNS (2), /* compare. */
1650 COSTS_N_INSNS (4), /* widen. */
1651 COSTS_N_INSNS (4), /* narrow. */
1652 COSTS_N_INSNS (4), /* toint. */
1653 COSTS_N_INSNS (4), /* fromint. */
1654 COSTS_N_INSNS (4) /* roundint. */
1657 /* Vector */
1659 COSTS_N_INSNS (1) /* alu. */
1663 const struct cpu_cost_table v7m_extra_costs =
1665 /* ALU */
1667 0, /* arith. */
1668 0, /* logical. */
1669 0, /* shift. */
1670 0, /* shift_reg. */
1671 0, /* arith_shift. */
1672 COSTS_N_INSNS (1), /* arith_shift_reg. */
1673 0, /* log_shift. */
1674 COSTS_N_INSNS (1), /* log_shift_reg. */
1675 0, /* extend. */
1676 COSTS_N_INSNS (1), /* extend_arith. */
1677 0, /* bfi. */
1678 0, /* bfx. */
1679 0, /* clz. */
1680 0, /* rev. */
1681 COSTS_N_INSNS (1), /* non_exec. */
1682 false /* non_exec_costs_exec. */
1685 /* MULT SImode */
1687 COSTS_N_INSNS (1), /* simple. */
1688 COSTS_N_INSNS (1), /* flag_setting. */
1689 COSTS_N_INSNS (2), /* extend. */
1690 COSTS_N_INSNS (1), /* add. */
1691 COSTS_N_INSNS (3), /* extend_add. */
1692 COSTS_N_INSNS (8) /* idiv. */
1694 /* MULT DImode */
1696 0, /* simple (N/A). */
1697 0, /* flag_setting (N/A). */
1698 COSTS_N_INSNS (2), /* extend. */
1699 0, /* add (N/A). */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 0 /* idiv (N/A). */
1704 /* LD/ST */
1706 COSTS_N_INSNS (2), /* load. */
1707 0, /* load_sign_extend. */
1708 COSTS_N_INSNS (3), /* ldrd. */
1709 COSTS_N_INSNS (2), /* ldm_1st. */
1710 1, /* ldm_regs_per_insn_1st. */
1711 1, /* ldm_regs_per_insn_subsequent. */
1712 COSTS_N_INSNS (2), /* loadf. */
1713 COSTS_N_INSNS (3), /* loadd. */
1714 COSTS_N_INSNS (1), /* load_unaligned. */
1715 COSTS_N_INSNS (2), /* store. */
1716 COSTS_N_INSNS (3), /* strd. */
1717 COSTS_N_INSNS (2), /* stm_1st. */
1718 1, /* stm_regs_per_insn_1st. */
1719 1, /* stm_regs_per_insn_subsequent. */
1720 COSTS_N_INSNS (2), /* storef. */
1721 COSTS_N_INSNS (3), /* stored. */
1722 COSTS_N_INSNS (1), /* store_unaligned. */
1723 COSTS_N_INSNS (1), /* loadv. */
1724 COSTS_N_INSNS (1) /* storev. */
1727 /* FP SFmode */
1729 COSTS_N_INSNS (7), /* div. */
1730 COSTS_N_INSNS (2), /* mult. */
1731 COSTS_N_INSNS (5), /* mult_addsub. */
1732 COSTS_N_INSNS (3), /* fma. */
1733 COSTS_N_INSNS (1), /* addsub. */
1734 0, /* fpconst. */
1735 0, /* neg. */
1736 0, /* compare. */
1737 0, /* widen. */
1738 0, /* narrow. */
1739 0, /* toint. */
1740 0, /* fromint. */
1741 0 /* roundint. */
1743 /* FP DFmode */
1745 COSTS_N_INSNS (15), /* div. */
1746 COSTS_N_INSNS (5), /* mult. */
1747 COSTS_N_INSNS (7), /* mult_addsub. */
1748 COSTS_N_INSNS (7), /* fma. */
1749 COSTS_N_INSNS (3), /* addsub. */
1750 0, /* fpconst. */
1751 0, /* neg. */
1752 0, /* compare. */
1753 0, /* widen. */
1754 0, /* narrow. */
1755 0, /* toint. */
1756 0, /* fromint. */
1757 0 /* roundint. */
1760 /* Vector */
1762 COSTS_N_INSNS (1) /* alu. */
1766 const struct tune_params arm_slowmul_tune =
1768 &generic_extra_costs, /* Insn extra costs. */
1769 NULL, /* Sched adj cost. */
1770 arm_default_branch_cost,
1771 &arm_default_vec_cost,
1772 3, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL,
1777 tune_params::PREF_CONST_POOL_TRUE,
1778 tune_params::PREF_LDRD_FALSE,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER,
1782 tune_params::PREF_NEON_64_FALSE,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE,
1784 tune_params::FUSE_NOTHING,
1785 tune_params::SCHED_AUTOPREF_OFF
1788 const struct tune_params arm_fastmul_tune =
1790 &generic_extra_costs, /* Insn extra costs. */
1791 NULL, /* Sched adj cost. */
1792 arm_default_branch_cost,
1793 &arm_default_vec_cost,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL,
1799 tune_params::PREF_CONST_POOL_TRUE,
1800 tune_params::PREF_LDRD_FALSE,
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER,
1804 tune_params::PREF_NEON_64_FALSE,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE,
1806 tune_params::FUSE_NOTHING,
1807 tune_params::SCHED_AUTOPREF_OFF
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811 skipping is shorter. Set max_insns_skipped to a lower value. */
1813 const struct tune_params arm_strongarm_tune =
1815 &generic_extra_costs, /* Insn extra costs. */
1816 NULL, /* Sched adj cost. */
1817 arm_default_branch_cost,
1818 &arm_default_vec_cost,
1819 1, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL,
1824 tune_params::PREF_CONST_POOL_TRUE,
1825 tune_params::PREF_LDRD_FALSE,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER,
1829 tune_params::PREF_NEON_64_FALSE,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE,
1831 tune_params::FUSE_NOTHING,
1832 tune_params::SCHED_AUTOPREF_OFF
1835 const struct tune_params arm_xscale_tune =
1837 &generic_extra_costs, /* Insn extra costs. */
1838 xscale_sched_adjust_cost,
1839 arm_default_branch_cost,
1840 &arm_default_vec_cost,
1841 2, /* Constant limit. */
1842 3, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 tune_params::PREF_CONST_POOL_TRUE,
1847 tune_params::PREF_LDRD_FALSE,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER,
1851 tune_params::PREF_NEON_64_FALSE,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE,
1853 tune_params::FUSE_NOTHING,
1854 tune_params::SCHED_AUTOPREF_OFF
1857 const struct tune_params arm_9e_tune =
1859 &generic_extra_costs, /* Insn extra costs. */
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 1, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_TRUE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_marvell_pj4_tune =
1881 &generic_extra_costs, /* Insn extra costs. */
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_TRUE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_v6t2_tune =
1903 &generic_extra_costs, /* Insn extra costs. */
1904 NULL, /* Sched adj cost. */
1905 arm_default_branch_cost,
1906 &arm_default_vec_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 1, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL,
1912 tune_params::PREF_CONST_POOL_FALSE,
1913 tune_params::PREF_LDRD_FALSE,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER,
1917 tune_params::PREF_NEON_64_FALSE,
1918 tune_params::PREF_NEON_STRINGOPS_FALSE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1924 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1925 const struct tune_params arm_cortex_tune =
1927 &generic_extra_costs,
1928 NULL, /* Sched adj cost. */
1929 arm_default_branch_cost,
1930 &arm_default_vec_cost,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL,
1936 tune_params::PREF_CONST_POOL_FALSE,
1937 tune_params::PREF_LDRD_FALSE,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER,
1941 tune_params::PREF_NEON_64_FALSE,
1942 tune_params::PREF_NEON_STRINGOPS_FALSE,
1943 tune_params::FUSE_NOTHING,
1944 tune_params::SCHED_AUTOPREF_OFF
1947 const struct tune_params arm_cortex_a8_tune =
1949 &cortexa8_extra_costs,
1950 NULL, /* Sched adj cost. */
1951 arm_default_branch_cost,
1952 &arm_default_vec_cost,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL,
1958 tune_params::PREF_CONST_POOL_FALSE,
1959 tune_params::PREF_LDRD_FALSE,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER,
1963 tune_params::PREF_NEON_64_FALSE,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE,
1965 tune_params::FUSE_NOTHING,
1966 tune_params::SCHED_AUTOPREF_OFF
1969 const struct tune_params arm_cortex_a7_tune =
1971 &cortexa7_extra_costs,
1972 NULL, /* Sched adj cost. */
1973 arm_default_branch_cost,
1974 &arm_default_vec_cost,
1975 1, /* Constant limit. */
1976 5, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 2, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL,
1980 tune_params::PREF_CONST_POOL_FALSE,
1981 tune_params::PREF_LDRD_FALSE,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_NEITHER,
1985 tune_params::PREF_NEON_64_FALSE,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE,
1987 tune_params::FUSE_NOTHING,
1988 tune_params::SCHED_AUTOPREF_OFF
1991 const struct tune_params arm_cortex_a15_tune =
1993 &cortexa15_extra_costs,
1994 NULL, /* Sched adj cost. */
1995 arm_default_branch_cost,
1996 &arm_default_vec_cost,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 3, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL,
2002 tune_params::PREF_CONST_POOL_FALSE,
2003 tune_params::PREF_LDRD_TRUE,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL,
2007 tune_params::PREF_NEON_64_FALSE,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE,
2009 tune_params::FUSE_NOTHING,
2010 tune_params::SCHED_AUTOPREF_FULL
2013 const struct tune_params arm_cortex_a35_tune =
2015 &cortexa53_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 arm_default_branch_cost,
2018 &arm_default_vec_cost,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 1, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL,
2024 tune_params::PREF_CONST_POOL_FALSE,
2025 tune_params::PREF_LDRD_FALSE,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER,
2029 tune_params::PREF_NEON_64_FALSE,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032 tune_params::SCHED_AUTOPREF_OFF
2035 const struct tune_params arm_cortex_a53_tune =
2037 &cortexa53_extra_costs,
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_64_FALSE,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054 tune_params::SCHED_AUTOPREF_OFF
2057 const struct tune_params arm_cortex_a57_tune =
2059 &cortexa57_extra_costs,
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_TRUE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL,
2073 tune_params::PREF_NEON_64_FALSE,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076 tune_params::SCHED_AUTOPREF_FULL
2079 const struct tune_params arm_exynosm1_tune =
2081 &exynosm1_extra_costs,
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 3, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_TRUE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 tune_params::FUSE_NOTHING,
2098 tune_params::SCHED_AUTOPREF_OFF
2101 const struct tune_params arm_xgene1_tune =
2103 &xgene1_extra_costs,
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 32, /* Memset max inline. */
2110 4, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_64_FALSE,
2118 tune_params::PREF_NEON_STRINGOPS_FALSE,
2119 tune_params::FUSE_NOTHING,
2120 tune_params::SCHED_AUTOPREF_OFF
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124 less appealing. Set max_insns_skipped to a low value. */
2126 const struct tune_params arm_cortex_a5_tune =
2128 &cortexa5_extra_costs,
2129 NULL, /* Sched adj cost. */
2130 arm_cortex_a5_branch_cost,
2131 &arm_default_vec_cost,
2132 1, /* Constant limit. */
2133 1, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_NOT_BENEFICIAL,
2137 tune_params::PREF_CONST_POOL_FALSE,
2138 tune_params::PREF_LDRD_FALSE,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER,
2142 tune_params::PREF_NEON_64_FALSE,
2143 tune_params::PREF_NEON_STRINGOPS_TRUE,
2144 tune_params::FUSE_NOTHING,
2145 tune_params::SCHED_AUTOPREF_OFF
2148 const struct tune_params arm_cortex_a9_tune =
2150 &cortexa9_extra_costs,
2151 cortex_a9_sched_adjust_cost,
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 5, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_BENEFICIAL(4,32,32),
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_FALSE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_NEITHER,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2170 const struct tune_params arm_cortex_a12_tune =
2172 &cortexa12_extra_costs,
2173 NULL, /* Sched adj cost. */
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_FALSE,
2182 tune_params::PREF_LDRD_TRUE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE,
2188 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189 tune_params::SCHED_AUTOPREF_OFF
2192 const struct tune_params arm_cortex_a73_tune =
2194 &cortexa57_extra_costs,
2195 NULL, /* Sched adj cost. */
2196 arm_default_branch_cost,
2197 &arm_default_vec_cost, /* Vectorizer costs. */
2198 1, /* Constant limit. */
2199 2, /* Max cond insns. */
2200 8, /* Memset max inline. */
2201 2, /* Issue rate. */
2202 ARM_PREFETCH_NOT_BENEFICIAL,
2203 tune_params::PREF_CONST_POOL_FALSE,
2204 tune_params::PREF_LDRD_TRUE,
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2207 tune_params::DISPARAGE_FLAGS_ALL,
2208 tune_params::PREF_NEON_64_FALSE,
2209 tune_params::PREF_NEON_STRINGOPS_TRUE,
2210 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211 tune_params::SCHED_AUTOPREF_FULL
2214 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2215 cycle to execute each. An LDR from the constant pool also takes two cycles
2216 to execute, but mildly increases pipelining opportunity (consecutive
2217 loads/stores can be pipelined together, saving one cycle), and may also
2218 improve icache utilisation. Hence we prefer the constant pool for such
2219 processors. */
2221 const struct tune_params arm_v7m_tune =
2223 &v7m_extra_costs,
2224 NULL, /* Sched adj cost. */
2225 arm_cortex_m_branch_cost,
2226 &arm_default_vec_cost,
2227 1, /* Constant limit. */
2228 2, /* Max cond insns. */
2229 8, /* Memset max inline. */
2230 1, /* Issue rate. */
2231 ARM_PREFETCH_NOT_BENEFICIAL,
2232 tune_params::PREF_CONST_POOL_TRUE,
2233 tune_params::PREF_LDRD_FALSE,
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2235 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2236 tune_params::DISPARAGE_FLAGS_NEITHER,
2237 tune_params::PREF_NEON_64_FALSE,
2238 tune_params::PREF_NEON_STRINGOPS_FALSE,
2239 tune_params::FUSE_NOTHING,
2240 tune_params::SCHED_AUTOPREF_OFF
2243 /* Cortex-M7 tuning. */
2245 const struct tune_params arm_cortex_m7_tune =
2247 &v7m_extra_costs,
2248 NULL, /* Sched adj cost. */
2249 arm_cortex_m7_branch_cost,
2250 &arm_default_vec_cost,
2251 0, /* Constant limit. */
2252 1, /* Max cond insns. */
2253 8, /* Memset max inline. */
2254 2, /* Issue rate. */
2255 ARM_PREFETCH_NOT_BENEFICIAL,
2256 tune_params::PREF_CONST_POOL_TRUE,
2257 tune_params::PREF_LDRD_FALSE,
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2260 tune_params::DISPARAGE_FLAGS_NEITHER,
2261 tune_params::PREF_NEON_64_FALSE,
2262 tune_params::PREF_NEON_STRINGOPS_FALSE,
2263 tune_params::FUSE_NOTHING,
2264 tune_params::SCHED_AUTOPREF_OFF
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269 cortex-m23. */
2270 const struct tune_params arm_v6m_tune =
2272 &generic_extra_costs, /* Insn extra costs. */
2273 NULL, /* Sched adj cost. */
2274 arm_default_branch_cost,
2275 &arm_default_vec_cost, /* Vectorizer costs. */
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 1, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL,
2281 tune_params::PREF_CONST_POOL_FALSE,
2282 tune_params::PREF_LDRD_FALSE,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER,
2286 tune_params::PREF_NEON_64_FALSE,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 const struct tune_params arm_fa726te_tune =
2294 &generic_extra_costs, /* Insn extra costs. */
2295 fa726te_sched_adjust_cost,
2296 arm_default_branch_cost,
2297 &arm_default_vec_cost,
2298 1, /* Constant limit. */
2299 5, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2314 /* Auto-generated CPU, FPU and architecture tables. */
2315 #include "arm-cpu-data.h"
2317 /* The name of the preprocessor macro to define for this architecture. PROFILE
2318 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319 is thus chosen to be big enough to hold the longest architecture name. */
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2323 /* Supported TLS relocations. */
2325 enum tls_reloc {
2326 TLS_GD32,
2327 TLS_LDM32,
2328 TLS_LDO32,
2329 TLS_IE32,
2330 TLS_LE32,
2331 TLS_DESCSEQ /* GNU scheme */
2334 /* The maximum number of insns to be used when loading a constant. */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2338 return size_p ? 1 : current_tune->constant_limit;
2341 /* Emit an insn that's a simple single-set. Both the operands must be known
2342 to be valid. */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2346 return emit_insn (gen_rtx_SET (x, y));
2349 /* Return the number of bits set in VALUE. */
2350 static unsigned
2351 bit_count (unsigned long value)
2353 unsigned long count = 0;
2355 while (value)
2357 count++;
2358 value &= value - 1; /* Clear the least-significant set bit. */
2361 return count;
2364 /* Return the number of bits set in BMAP. */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2368 unsigned int count = 0;
2369 unsigned int n = 0;
2370 sbitmap_iterator sbi;
2372 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373 count++;
2374 return count;
2377 typedef struct
2379 machine_mode mode;
2380 const char *name;
2381 } arm_fixed_mode_set;
2383 /* A small helper for setting fixed-point library libfuncs. */
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387 const char *funcname, const char *modename,
2388 int num_suffix)
2390 char buffer[50];
2392 if (num_suffix == 0)
2393 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394 else
2395 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2397 set_optab_libfunc (optable, mode, buffer);
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402 machine_mode from, const char *funcname,
2403 const char *toname, const char *fromname)
2405 char buffer[50];
2406 const char *maybe_suffix_2 = "";
2408 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2409 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412 maybe_suffix_2 = "2";
2414 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415 maybe_suffix_2);
2417 set_conv_libfunc (optable, to, from, buffer);
2420 /* Set up library functions unique to ARM. */
2422 static void
2423 arm_init_libfuncs (void)
2425 /* For Linux, we have access to kernel support for atomic operations. */
2426 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2429 /* There are no special library functions unless we are using the
2430 ARM BPABI. */
2431 if (!TARGET_BPABI)
2432 return;
2434 /* The functions below are described in Section 4 of the "Run-Time
2435 ABI for the ARM architecture", Version 1.0. */
2437 /* Double-precision floating-point arithmetic. Table 2. */
2438 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2444 /* Double-precision comparisons. Table 3. */
2445 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446 set_optab_libfunc (ne_optab, DFmode, NULL);
2447 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2453 /* Single-precision floating-point arithmetic. Table 4. */
2454 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2460 /* Single-precision comparisons. Table 5. */
2461 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462 set_optab_libfunc (ne_optab, SFmode, NULL);
2463 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2469 /* Floating-point to integer conversions. Table 6. */
2470 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2479 /* Conversions between floating types. Table 7. */
2480 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2483 /* Integer to floating-point conversions. Table 8. */
2484 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2493 /* Long long. Table 9. */
2494 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2503 /* Integer (32/32->32) division. \S 4.3.1. */
2504 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2507 /* The divmod functions are designed so that they can be used for
2508 plain division, even though they return both the quotient and the
2509 remainder. The quotient is returned in the usual location (i.e.,
2510 r0 for SImode, {r0, r1} for DImode), just as would be expected
2511 for an ordinary division routine. Because the AAPCS calling
2512 conventions specify that all of { r0, r1, r2, r3 } are
2513 callee-saved registers, there is no need to tell the compiler
2514 explicitly that those registers are clobbered by these
2515 routines. */
2516 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2519 /* For SImode division the ABI provides div-without-mod routines,
2520 which are faster. */
2521 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2524 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2525 divmod libcalls instead. */
2526 set_optab_libfunc (smod_optab, DImode, NULL);
2527 set_optab_libfunc (umod_optab, DImode, NULL);
2528 set_optab_libfunc (smod_optab, SImode, NULL);
2529 set_optab_libfunc (umod_optab, SImode, NULL);
2531 /* Half-precision float operations. The compiler handles all operations
2532 with NULL libfuncs by converting the SFmode. */
2533 switch (arm_fp16_format)
2535 case ARM_FP16_FORMAT_IEEE:
2536 case ARM_FP16_FORMAT_ALTERNATIVE:
2538 /* Conversions. */
2539 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_f2h_ieee"
2542 : "__gnu_f2h_alternative"));
2543 set_conv_libfunc (sext_optab, SFmode, HFmode,
2544 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545 ? "__gnu_h2f_ieee"
2546 : "__gnu_h2f_alternative"));
2548 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550 ? "__gnu_d2h_ieee"
2551 : "__gnu_d2h_alternative"));
2553 /* Arithmetic. */
2554 set_optab_libfunc (add_optab, HFmode, NULL);
2555 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556 set_optab_libfunc (smul_optab, HFmode, NULL);
2557 set_optab_libfunc (neg_optab, HFmode, NULL);
2558 set_optab_libfunc (sub_optab, HFmode, NULL);
2560 /* Comparisons. */
2561 set_optab_libfunc (eq_optab, HFmode, NULL);
2562 set_optab_libfunc (ne_optab, HFmode, NULL);
2563 set_optab_libfunc (lt_optab, HFmode, NULL);
2564 set_optab_libfunc (le_optab, HFmode, NULL);
2565 set_optab_libfunc (ge_optab, HFmode, NULL);
2566 set_optab_libfunc (gt_optab, HFmode, NULL);
2567 set_optab_libfunc (unord_optab, HFmode, NULL);
2568 break;
2570 default:
2571 break;
2574 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2576 const arm_fixed_mode_set fixed_arith_modes[] =
2578 { E_QQmode, "qq" },
2579 { E_UQQmode, "uqq" },
2580 { E_HQmode, "hq" },
2581 { E_UHQmode, "uhq" },
2582 { E_SQmode, "sq" },
2583 { E_USQmode, "usq" },
2584 { E_DQmode, "dq" },
2585 { E_UDQmode, "udq" },
2586 { E_TQmode, "tq" },
2587 { E_UTQmode, "utq" },
2588 { E_HAmode, "ha" },
2589 { E_UHAmode, "uha" },
2590 { E_SAmode, "sa" },
2591 { E_USAmode, "usa" },
2592 { E_DAmode, "da" },
2593 { E_UDAmode, "uda" },
2594 { E_TAmode, "ta" },
2595 { E_UTAmode, "uta" }
2597 const arm_fixed_mode_set fixed_conv_modes[] =
2599 { E_QQmode, "qq" },
2600 { E_UQQmode, "uqq" },
2601 { E_HQmode, "hq" },
2602 { E_UHQmode, "uhq" },
2603 { E_SQmode, "sq" },
2604 { E_USQmode, "usq" },
2605 { E_DQmode, "dq" },
2606 { E_UDQmode, "udq" },
2607 { E_TQmode, "tq" },
2608 { E_UTQmode, "utq" },
2609 { E_HAmode, "ha" },
2610 { E_UHAmode, "uha" },
2611 { E_SAmode, "sa" },
2612 { E_USAmode, "usa" },
2613 { E_DAmode, "da" },
2614 { E_UDAmode, "uda" },
2615 { E_TAmode, "ta" },
2616 { E_UTAmode, "uta" },
2617 { E_QImode, "qi" },
2618 { E_HImode, "hi" },
2619 { E_SImode, "si" },
2620 { E_DImode, "di" },
2621 { E_TImode, "ti" },
2622 { E_SFmode, "sf" },
2623 { E_DFmode, "df" }
2625 unsigned int i, j;
2627 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2629 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630 "add", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632 "ssadd", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634 "usadd", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636 "sub", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638 "sssub", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640 "ussub", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642 "mul", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644 "ssmul", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646 "usmul", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648 "div", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650 "udiv", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652 "ssdiv", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654 "usdiv", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656 "neg", fixed_arith_modes[i].name, 2);
2657 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658 "ssneg", fixed_arith_modes[i].name, 2);
2659 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660 "usneg", fixed_arith_modes[i].name, 2);
2661 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662 "ashl", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664 "ashr", fixed_arith_modes[i].name, 3);
2665 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666 "lshr", fixed_arith_modes[i].name, 3);
2667 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668 "ssashl", fixed_arith_modes[i].name, 3);
2669 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670 "usashl", fixed_arith_modes[i].name, 3);
2671 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672 "cmp", fixed_arith_modes[i].name, 2);
2675 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2678 if (i == j
2679 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681 continue;
2683 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684 fixed_conv_modes[j].mode, "fract",
2685 fixed_conv_modes[i].name,
2686 fixed_conv_modes[j].name);
2687 arm_set_fixed_conv_libfunc (satfract_optab,
2688 fixed_conv_modes[i].mode,
2689 fixed_conv_modes[j].mode, "satfract",
2690 fixed_conv_modes[i].name,
2691 fixed_conv_modes[j].name);
2692 arm_set_fixed_conv_libfunc (fractuns_optab,
2693 fixed_conv_modes[i].mode,
2694 fixed_conv_modes[j].mode, "fractuns",
2695 fixed_conv_modes[i].name,
2696 fixed_conv_modes[j].name);
2697 arm_set_fixed_conv_libfunc (satfractuns_optab,
2698 fixed_conv_modes[i].mode,
2699 fixed_conv_modes[j].mode, "satfractuns",
2700 fixed_conv_modes[i].name,
2701 fixed_conv_modes[j].name);
2705 if (TARGET_AAPCS_BASED)
2706 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2709 /* On AAPCS systems, this is the "struct __va_list". */
2710 static GTY(()) tree va_list_type;
2712 /* Return the type to use as __builtin_va_list. */
2713 static tree
2714 arm_build_builtin_va_list (void)
2716 tree va_list_name;
2717 tree ap_field;
2719 if (!TARGET_AAPCS_BASED)
2720 return std_build_builtin_va_list ();
2722 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723 defined as:
2725 struct __va_list
2727 void *__ap;
2730 The C Library ABI further reinforces this definition in \S
2731 4.1.
2733 We must follow this definition exactly. The structure tag
2734 name is visible in C++ mangled names, and thus forms a part
2735 of the ABI. The field name may be used by people who
2736 #include <stdarg.h>. */
2737 /* Create the type. */
2738 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739 /* Give it the required name. */
2740 va_list_name = build_decl (BUILTINS_LOCATION,
2741 TYPE_DECL,
2742 get_identifier ("__va_list"),
2743 va_list_type);
2744 DECL_ARTIFICIAL (va_list_name) = 1;
2745 TYPE_NAME (va_list_type) = va_list_name;
2746 TYPE_STUB_DECL (va_list_type) = va_list_name;
2747 /* Create the __ap field. */
2748 ap_field = build_decl (BUILTINS_LOCATION,
2749 FIELD_DECL,
2750 get_identifier ("__ap"),
2751 ptr_type_node);
2752 DECL_ARTIFICIAL (ap_field) = 1;
2753 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754 TYPE_FIELDS (va_list_type) = ap_field;
2755 /* Compute its layout. */
2756 layout_type (va_list_type);
2758 return va_list_type;
2761 /* Return an expression of type "void *" pointing to the next
2762 available argument in a variable-argument list. VALIST is the
2763 user-level va_list object, of type __builtin_va_list. */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2767 if (TREE_TYPE (valist) == error_mark_node)
2768 return error_mark_node;
2770 /* On an AAPCS target, the pointer is stored within "struct
2771 va_list". */
2772 if (TARGET_AAPCS_BASED)
2774 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776 valist, ap_field, NULL_TREE);
2779 return valist;
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2786 valist = arm_extract_valist_ptr (valist);
2787 std_expand_builtin_va_start (valist, nextarg);
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793 gimple_seq *post_p)
2795 valist = arm_extract_valist_ptr (valist);
2796 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2799 /* Check any incompatible options that the user has specified. */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2803 int flags = opts->x_target_flags;
2805 /* iWMMXt and NEON are incompatible. */
2806 if (TARGET_IWMMXT
2807 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808 error ("iWMMXt and NEON are incompatible");
2810 /* Make sure that the processor choice does not conflict with any of the
2811 other command line choices. */
2812 if (TARGET_ARM_P (flags)
2813 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814 error ("target CPU does not support ARM mode");
2816 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2817 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2820 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2823 /* If this target is normally configured to use APCS frames, warn if they
2824 are turned off and debugging is turned on. */
2825 if (TARGET_ARM_P (flags)
2826 && write_symbols != NO_DEBUG
2827 && !TARGET_APCS_FRAME
2828 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2831 /* iWMMXt unsupported under Thumb mode. */
2832 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833 error ("iWMMXt unsupported under Thumb mode");
2835 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836 error ("can not use -mtp=cp15 with 16-bit Thumb");
2838 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2840 error ("RTP PIC is incompatible with Thumb");
2841 flag_pic = 0;
2844 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845 with MOVT. */
2846 if ((target_pure_code || target_slow_flash_data)
2847 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2849 const char *flag = (target_pure_code ? "-mpure-code" :
2850 "-mslow-flash-data");
2851 error ("%s only supports non-pic code on M-profile targets with the "
2852 "MOVT instruction", flag);
2857 /* Recompute the global settings depending on target attribute options. */
2859 static void
2860 arm_option_params_internal (void)
2862 /* If we are not using the default (ARM mode) section anchor offset
2863 ranges, then set the correct ranges now. */
2864 if (TARGET_THUMB1)
2866 /* Thumb-1 LDR instructions cannot have negative offsets.
2867 Permissible positive offset ranges are 5-bit (for byte loads),
2868 6-bit (for halfword loads), or 7-bit (for word loads).
2869 Empirical results suggest a 7-bit anchor range gives the best
2870 overall code size. */
2871 targetm.min_anchor_offset = 0;
2872 targetm.max_anchor_offset = 127;
2874 else if (TARGET_THUMB2)
2876 /* The minimum is set such that the total size of the block
2877 for a particular anchor is 248 + 1 + 4095 bytes, which is
2878 divisible by eight, ensuring natural spacing of anchors. */
2879 targetm.min_anchor_offset = -248;
2880 targetm.max_anchor_offset = 4095;
2882 else
2884 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2888 /* Increase the number of conditional instructions with -Os. */
2889 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2891 /* For THUMB2, we limit the conditional sequence to one IT block. */
2892 if (TARGET_THUMB2)
2893 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper;
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize;
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2906 if (opts->x_align_functions <= 0)
2907 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908 && opts->x_optimize_size ? 2 : 4;
2911 /* Implement targetm.override_options_after_change. */
2913 static void
2914 arm_override_options_after_change (void)
2916 arm_configure_build_target (&arm_active_target,
2917 TREE_TARGET_OPTION (target_option_default_node),
2918 &global_options_set, false);
2920 arm_override_options_after_change_1 (&global_options);
2923 /* Implement TARGET_OPTION_SAVE. */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2927 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2932 /* Implement TARGET_OPTION_RESTORE. */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2936 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940 false);
2943 /* Reset options between modes that the user has specified. */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946 struct gcc_options *opts_set)
2948 arm_override_options_after_change_1 (opts);
2950 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2952 /* The default is to enable interworking, so this warning message would
2953 be confusing to users who have just compiled with, eg, -march=armv3. */
2954 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955 opts->x_target_flags &= ~MASK_INTERWORK;
2958 if (TARGET_THUMB_P (opts->x_target_flags)
2959 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2961 warning (0, "target CPU does not support THUMB instructions");
2962 opts->x_target_flags &= ~MASK_THUMB;
2965 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2967 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968 opts->x_target_flags &= ~MASK_APCS_FRAME;
2971 /* Callee super interworking implies thumb interworking. Adding
2972 this to the flags here simplifies the logic elsewhere. */
2973 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974 opts->x_target_flags |= MASK_INTERWORK;
2976 /* need to remember initial values so combinaisons of options like
2977 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2978 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2980 if (! opts_set->x_arm_restrict_it)
2981 opts->x_arm_restrict_it = arm_arch8;
2983 /* ARM execution state and M profile don't have [restrict] IT. */
2984 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985 opts->x_arm_restrict_it = 0;
2987 /* Enable -munaligned-access by default for
2988 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989 i.e. Thumb2 and ARM state only.
2990 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991 - ARMv8 architecture-base processors.
2993 Disable -munaligned-access by default for
2994 - all pre-ARMv6 architecture-based processors
2995 - ARMv6-M architecture-based processors
2996 - ARMv8-M Baseline processors. */
2998 if (! opts_set->x_unaligned_access)
3000 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001 && arm_arch6 && (arm_arch_notm || arm_arch7));
3003 else if (opts->x_unaligned_access == 1
3004 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3006 warning (0, "target CPU does not support unaligned accesses");
3007 opts->x_unaligned_access = 0;
3010 /* Don't warn since it's on by default in -O2. */
3011 if (TARGET_THUMB1_P (opts->x_target_flags))
3012 opts->x_flag_schedule_insns = 0;
3013 else
3014 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3016 /* Disable shrink-wrap when optimizing function for size, since it tends to
3017 generate additional returns. */
3018 if (optimize_function_for_size_p (cfun)
3019 && TARGET_THUMB2_P (opts->x_target_flags))
3020 opts->x_flag_shrink_wrap = false;
3021 else
3022 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3024 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025 - epilogue_insns - does not accurately model the corresponding insns
3026 emitted in the asm file. In particular, see the comment in thumb_exit
3027 'Find out how many of the (return) argument registers we can corrupt'.
3028 As a consequence, the epilogue may clobber registers without fipa-ra
3029 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3030 TODO: Accurately model clobbers for epilogue_insns and reenable
3031 fipa-ra. */
3032 if (TARGET_THUMB1_P (opts->x_target_flags))
3033 opts->x_flag_ipa_ra = 0;
3034 else
3035 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3037 /* Thumb2 inline assembly code should always use unified syntax.
3038 This will apply to ARM and Thumb1 eventually. */
3039 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051 architecture have been specified, but the two are not identical. */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054 struct cl_target_option *opts,
3055 struct gcc_options *opts_set,
3056 bool warn_compatible)
3058 const cpu_option *arm_selected_tune = NULL;
3059 const arch_option *arm_selected_arch = NULL;
3060 const cpu_option *arm_selected_cpu = NULL;
3061 const arm_fpu_desc *arm_selected_fpu = NULL;
3062 const char *tune_opts = NULL;
3063 const char *arch_opts = NULL;
3064 const char *cpu_opts = NULL;
3066 bitmap_clear (target->isa);
3067 target->core_name = NULL;
3068 target->arch_name = NULL;
3070 if (opts_set->x_arm_arch_string)
3072 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073 "-march",
3074 opts->x_arm_arch_string);
3075 arch_opts = strchr (opts->x_arm_arch_string, '+');
3078 if (opts_set->x_arm_cpu_string)
3080 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081 opts->x_arm_cpu_string);
3082 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083 arm_selected_tune = arm_selected_cpu;
3084 /* If taking the tuning from -mcpu, we don't need to rescan the
3085 options for tuning. */
3088 if (opts_set->x_arm_tune_string)
3090 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091 opts->x_arm_tune_string);
3092 tune_opts = strchr (opts->x_arm_tune_string, '+');
3095 if (arm_selected_arch)
3097 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099 arch_opts);
3101 if (arm_selected_cpu)
3103 auto_sbitmap cpu_isa (isa_num_bits);
3104 auto_sbitmap isa_delta (isa_num_bits);
3106 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108 cpu_opts);
3109 bitmap_xor (isa_delta, cpu_isa, target->isa);
3110 /* Ignore any bits that are quirk bits. */
3111 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112 /* Ignore (for now) any bits that might be set by -mfpu. */
3113 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3115 if (!bitmap_empty_p (isa_delta))
3117 if (warn_compatible)
3118 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119 arm_selected_cpu->common.name,
3120 arm_selected_arch->common.name);
3121 /* -march wins for code generation.
3122 -mcpu wins for default tuning. */
3123 if (!arm_selected_tune)
3124 arm_selected_tune = arm_selected_cpu;
3126 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127 target->arch_name = arm_selected_arch->common.name;
3129 else
3131 /* Architecture and CPU are essentially the same.
3132 Prefer the CPU setting. */
3133 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134 target->core_name = arm_selected_cpu->common.name;
3135 /* Copy the CPU's capabilities, so that we inherit the
3136 appropriate extensions and quirks. */
3137 bitmap_copy (target->isa, cpu_isa);
3140 else
3142 /* Pick a CPU based on the architecture. */
3143 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144 target->arch_name = arm_selected_arch->common.name;
3145 /* Note: target->core_name is left unset in this path. */
3148 else if (arm_selected_cpu)
3150 target->core_name = arm_selected_cpu->common.name;
3151 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153 cpu_opts);
3154 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3156 /* If the user did not specify a processor or architecture, choose
3157 one for them. */
3158 else
3160 const cpu_option *sel;
3161 auto_sbitmap sought_isa (isa_num_bits);
3162 bitmap_clear (sought_isa);
3163 auto_sbitmap default_isa (isa_num_bits);
3165 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166 TARGET_CPU_DEFAULT);
3167 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168 gcc_assert (arm_selected_cpu->common.name);
3170 /* RWE: All of the selection logic below (to the end of this
3171 'if' clause) looks somewhat suspect. It appears to be mostly
3172 there to support forcing thumb support when the default CPU
3173 does not have thumb (somewhat dubious in terms of what the
3174 user might be expecting). I think it should be removed once
3175 support for the pre-thumb era cores is removed. */
3176 sel = arm_selected_cpu;
3177 arm_initialize_isa (default_isa, sel->common.isa_bits);
3178 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179 cpu_opts);
3181 /* Now check to see if the user has specified any command line
3182 switches that require certain abilities from the cpu. */
3184 if (TARGET_INTERWORK || TARGET_THUMB)
3186 bitmap_set_bit (sought_isa, isa_bit_thumb);
3187 bitmap_set_bit (sought_isa, isa_bit_mode32);
3189 /* There are no ARM processors that support both APCS-26 and
3190 interworking. Therefore we forcibly remove MODE26 from
3191 from the isa features here (if it was set), so that the
3192 search below will always be able to find a compatible
3193 processor. */
3194 bitmap_clear_bit (default_isa, isa_bit_mode26);
3197 /* If there are such requirements and the default CPU does not
3198 satisfy them, we need to run over the complete list of
3199 cores looking for one that is satisfactory. */
3200 if (!bitmap_empty_p (sought_isa)
3201 && !bitmap_subset_p (sought_isa, default_isa))
3203 auto_sbitmap candidate_isa (isa_num_bits);
3204 /* We're only interested in a CPU with at least the
3205 capabilities of the default CPU and the required
3206 additional features. */
3207 bitmap_ior (default_isa, default_isa, sought_isa);
3209 /* Try to locate a CPU type that supports all of the abilities
3210 of the default CPU, plus the extra abilities requested by
3211 the user. */
3212 for (sel = all_cores; sel->common.name != NULL; sel++)
3214 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215 /* An exact match? */
3216 if (bitmap_equal_p (default_isa, candidate_isa))
3217 break;
3220 if (sel->common.name == NULL)
3222 unsigned current_bit_count = isa_num_bits;
3223 const cpu_option *best_fit = NULL;
3225 /* Ideally we would like to issue an error message here
3226 saying that it was not possible to find a CPU compatible
3227 with the default CPU, but which also supports the command
3228 line options specified by the programmer, and so they
3229 ought to use the -mcpu=<name> command line option to
3230 override the default CPU type.
3232 If we cannot find a CPU that has exactly the
3233 characteristics of the default CPU and the given
3234 command line options we scan the array again looking
3235 for a best match. The best match must have at least
3236 the capabilities of the perfect match. */
3237 for (sel = all_cores; sel->common.name != NULL; sel++)
3239 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3241 if (bitmap_subset_p (default_isa, candidate_isa))
3243 unsigned count;
3245 bitmap_and_compl (candidate_isa, candidate_isa,
3246 default_isa);
3247 count = bitmap_popcount (candidate_isa);
3249 if (count < current_bit_count)
3251 best_fit = sel;
3252 current_bit_count = count;
3256 gcc_assert (best_fit);
3257 sel = best_fit;
3260 arm_selected_cpu = sel;
3263 /* Now we know the CPU, we can finally initialize the target
3264 structure. */
3265 target->core_name = arm_selected_cpu->common.name;
3266 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268 cpu_opts);
3269 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3272 gcc_assert (arm_selected_cpu);
3273 gcc_assert (arm_selected_arch);
3275 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3277 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278 auto_sbitmap fpu_bits (isa_num_bits);
3280 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282 bitmap_ior (target->isa, target->isa, fpu_bits);
3285 if (!arm_selected_tune)
3286 arm_selected_tune = arm_selected_cpu;
3287 else /* Validate the features passed to -mtune. */
3288 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3290 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3292 /* Finish initializing the target structure. */
3293 target->arch_pp_name = arm_selected_arch->arch;
3294 target->base_arch = arm_selected_arch->base_arch;
3295 target->profile = arm_selected_arch->profile;
3297 target->tune_flags = tune_data->tune_flags;
3298 target->tune = tune_data->tune;
3299 target->tune_core = tune_data->scheduler;
3302 /* Fix up any incompatible options that the user has specified. */
3303 static void
3304 arm_option_override (void)
3306 static const enum isa_feature fpu_bitlist[]
3307 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309 cl_target_option opts;
3311 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3314 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3317 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3319 if (!global_options_set.x_arm_fpu_index)
3321 bool ok;
3322 int fpu_index;
3324 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325 CL_TARGET);
3326 gcc_assert (ok);
3327 arm_fpu_index = (enum fpu_type) fpu_index;
3330 cl_target_option_save (&opts, &global_options);
3331 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332 true);
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335 SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3338 /* Initialize boolean versions of the architectural flags, for use
3339 in the arm.md file and for enabling feature flags. */
3340 arm_option_reconfigure_globals ();
3342 arm_tune = arm_active_target.tune_core;
3343 tune_flags = arm_active_target.tune_flags;
3344 current_tune = arm_active_target.tune;
3346 /* TBD: Dwarf info for apcs frame is not handled yet. */
3347 if (TARGET_APCS_FRAME)
3348 flag_shrink_wrap = false;
3350 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3352 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3353 target_flags |= MASK_APCS_FRAME;
3356 if (TARGET_POKE_FUNCTION_NAME)
3357 target_flags |= MASK_APCS_FRAME;
3359 if (TARGET_APCS_REENT && flag_pic)
3360 error ("-fpic and -mapcs-reent are incompatible");
3362 if (TARGET_APCS_REENT)
3363 warning (0, "APCS reentrant code not supported. Ignored");
3365 /* Set up some tuning parameters. */
3366 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3367 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3368 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3369 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3370 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3371 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3373 /* For arm2/3 there is no need to do any scheduling if we are doing
3374 software floating-point. */
3375 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3376 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3378 /* Override the default structure alignment for AAPCS ABI. */
3379 if (!global_options_set.x_arm_structure_size_boundary)
3381 if (TARGET_AAPCS_BASED)
3382 arm_structure_size_boundary = 8;
3384 else
3386 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3388 if (arm_structure_size_boundary != 8
3389 && arm_structure_size_boundary != 32
3390 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3392 if (ARM_DOUBLEWORD_ALIGN)
3393 warning (0,
3394 "structure size boundary can only be set to 8, 32 or 64");
3395 else
3396 warning (0, "structure size boundary can only be set to 8 or 32");
3397 arm_structure_size_boundary
3398 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3402 if (TARGET_VXWORKS_RTP)
3404 if (!global_options_set.x_arm_pic_data_is_text_relative)
3405 arm_pic_data_is_text_relative = 0;
3407 else if (flag_pic
3408 && !arm_pic_data_is_text_relative
3409 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3410 /* When text & data segments don't have a fixed displacement, the
3411 intended use is with a single, read only, pic base register.
3412 Unless the user explicitly requested not to do that, set
3413 it. */
3414 target_flags |= MASK_SINGLE_PIC_BASE;
3416 /* If stack checking is disabled, we can use r10 as the PIC register,
3417 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3418 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3420 if (TARGET_VXWORKS_RTP)
3421 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3422 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3425 if (flag_pic && TARGET_VXWORKS_RTP)
3426 arm_pic_register = 9;
3428 if (arm_pic_register_string != NULL)
3430 int pic_register = decode_reg_name (arm_pic_register_string);
3432 if (!flag_pic)
3433 warning (0, "-mpic-register= is useless without -fpic");
3435 /* Prevent the user from choosing an obviously stupid PIC register. */
3436 else if (pic_register < 0 || call_used_regs[pic_register]
3437 || pic_register == HARD_FRAME_POINTER_REGNUM
3438 || pic_register == STACK_POINTER_REGNUM
3439 || pic_register >= PC_REGNUM
3440 || (TARGET_VXWORKS_RTP
3441 && (unsigned int) pic_register != arm_pic_register))
3442 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3443 else
3444 arm_pic_register = pic_register;
3447 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3448 if (fix_cm3_ldrd == 2)
3450 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3451 fix_cm3_ldrd = 1;
3452 else
3453 fix_cm3_ldrd = 0;
3456 /* Hot/Cold partitioning is not currently supported, since we can't
3457 handle literal pool placement in that case. */
3458 if (flag_reorder_blocks_and_partition)
3460 inform (input_location,
3461 "-freorder-blocks-and-partition not supported on this architecture");
3462 flag_reorder_blocks_and_partition = 0;
3463 flag_reorder_blocks = 1;
3466 if (flag_pic)
3467 /* Hoisting PIC address calculations more aggressively provides a small,
3468 but measurable, size reduction for PIC code. Therefore, we decrease
3469 the bar for unrestricted expression hoisting to the cost of PIC address
3470 calculation, which is 2 instructions. */
3471 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3472 global_options.x_param_values,
3473 global_options_set.x_param_values);
3475 /* ARM EABI defaults to strict volatile bitfields. */
3476 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3477 && abi_version_at_least(2))
3478 flag_strict_volatile_bitfields = 1;
3480 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3481 have deemed it beneficial (signified by setting
3482 prefetch.num_slots to 1 or more). */
3483 if (flag_prefetch_loop_arrays < 0
3484 && HAVE_prefetch
3485 && optimize >= 3
3486 && current_tune->prefetch.num_slots > 0)
3487 flag_prefetch_loop_arrays = 1;
3489 /* Set up parameters to be used in prefetching algorithm. Do not
3490 override the defaults unless we are tuning for a core we have
3491 researched values for. */
3492 if (current_tune->prefetch.num_slots > 0)
3493 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3494 current_tune->prefetch.num_slots,
3495 global_options.x_param_values,
3496 global_options_set.x_param_values);
3497 if (current_tune->prefetch.l1_cache_line_size >= 0)
3498 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3499 current_tune->prefetch.l1_cache_line_size,
3500 global_options.x_param_values,
3501 global_options_set.x_param_values);
3502 if (current_tune->prefetch.l1_cache_size >= 0)
3503 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3504 current_tune->prefetch.l1_cache_size,
3505 global_options.x_param_values,
3506 global_options_set.x_param_values);
3508 /* Use Neon to perform 64-bits operations rather than core
3509 registers. */
3510 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3511 if (use_neon_for_64bits == 1)
3512 prefer_neon_for_64bits = true;
3514 /* Use the alternative scheduling-pressure algorithm by default. */
3515 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3516 global_options.x_param_values,
3517 global_options_set.x_param_values);
3519 /* Look through ready list and all of queue for instructions
3520 relevant for L2 auto-prefetcher. */
3521 int param_sched_autopref_queue_depth;
3523 switch (current_tune->sched_autopref)
3525 case tune_params::SCHED_AUTOPREF_OFF:
3526 param_sched_autopref_queue_depth = -1;
3527 break;
3529 case tune_params::SCHED_AUTOPREF_RANK:
3530 param_sched_autopref_queue_depth = 0;
3531 break;
3533 case tune_params::SCHED_AUTOPREF_FULL:
3534 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3535 break;
3537 default:
3538 gcc_unreachable ();
3541 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3542 param_sched_autopref_queue_depth,
3543 global_options.x_param_values,
3544 global_options_set.x_param_values);
3546 /* Currently, for slow flash data, we just disable literal pools. We also
3547 disable it for pure-code. */
3548 if (target_slow_flash_data || target_pure_code)
3549 arm_disable_literal_pool = true;
3551 /* Disable scheduling fusion by default if it's not armv7 processor
3552 or doesn't prefer ldrd/strd. */
3553 if (flag_schedule_fusion == 2
3554 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3555 flag_schedule_fusion = 0;
3557 /* Need to remember initial options before they are overriden. */
3558 init_optimize = build_optimization_node (&global_options);
3560 arm_options_perform_arch_sanity_checks ();
3561 arm_option_override_internal (&global_options, &global_options_set);
3562 arm_option_check_internal (&global_options);
3563 arm_option_params_internal ();
3565 /* Create the default target_options structure. */
3566 target_option_default_node = target_option_current_node
3567 = build_target_option_node (&global_options);
3569 /* Register global variables with the garbage collector. */
3570 arm_add_gc_roots ();
3572 /* Init initial mode for testing. */
3573 thumb_flipper = TARGET_THUMB;
3577 /* Reconfigure global status flags from the active_target.isa. */
3578 void
3579 arm_option_reconfigure_globals (void)
3581 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3582 arm_base_arch = arm_active_target.base_arch;
3584 /* Initialize boolean versions of the architectural flags, for use
3585 in the arm.md file. */
3586 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3587 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3588 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3589 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3590 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3591 arm_arch5te = arm_arch5e
3592 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3593 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3594 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3595 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3596 arm_arch6m = arm_arch6 && !arm_arch_notm;
3597 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3598 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3599 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3600 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3601 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3602 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3603 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3604 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3605 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3606 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3607 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3608 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3609 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3610 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3611 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3612 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3613 if (arm_fp16_inst)
3615 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3616 error ("selected fp16 options are incompatible");
3617 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3620 /* And finally, set up some quirks. */
3621 arm_arch_no_volatile_ce
3622 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3623 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3624 isa_bit_quirk_armv6kz);
3626 /* Use the cp15 method if it is available. */
3627 if (target_thread_pointer == TP_AUTO)
3629 if (arm_arch6k && !TARGET_THUMB1)
3630 target_thread_pointer = TP_CP15;
3631 else
3632 target_thread_pointer = TP_SOFT;
3636 /* Perform some validation between the desired architecture and the rest of the
3637 options. */
3638 void
3639 arm_options_perform_arch_sanity_checks (void)
3641 /* V5 code we generate is completely interworking capable, so we turn off
3642 TARGET_INTERWORK here to avoid many tests later on. */
3644 /* XXX However, we must pass the right pre-processor defines to CPP
3645 or GLD can get confused. This is a hack. */
3646 if (TARGET_INTERWORK)
3647 arm_cpp_interwork = 1;
3649 if (arm_arch5)
3650 target_flags &= ~MASK_INTERWORK;
3652 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3653 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3655 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3656 error ("iwmmxt abi requires an iwmmxt capable cpu");
3658 /* BPABI targets use linker tricks to allow interworking on cores
3659 without thumb support. */
3660 if (TARGET_INTERWORK
3661 && !TARGET_BPABI
3662 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3664 warning (0, "target CPU does not support interworking" );
3665 target_flags &= ~MASK_INTERWORK;
3668 /* If soft-float is specified then don't use FPU. */
3669 if (TARGET_SOFT_FLOAT)
3670 arm_fpu_attr = FPU_NONE;
3671 else
3672 arm_fpu_attr = FPU_VFP;
3674 if (TARGET_AAPCS_BASED)
3676 if (TARGET_CALLER_INTERWORKING)
3677 error ("AAPCS does not support -mcaller-super-interworking");
3678 else
3679 if (TARGET_CALLEE_INTERWORKING)
3680 error ("AAPCS does not support -mcallee-super-interworking");
3683 /* __fp16 support currently assumes the core has ldrh. */
3684 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3685 sorry ("__fp16 and no ldrh");
3687 if (use_cmse && !arm_arch_cmse)
3688 error ("target CPU does not support ARMv8-M Security Extensions");
3690 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3691 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3692 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3693 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3696 if (TARGET_AAPCS_BASED)
3698 if (arm_abi == ARM_ABI_IWMMXT)
3699 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3700 else if (TARGET_HARD_FLOAT_ABI)
3702 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3703 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3704 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3706 else
3707 arm_pcs_default = ARM_PCS_AAPCS;
3709 else
3711 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3712 sorry ("-mfloat-abi=hard and VFP");
3714 if (arm_abi == ARM_ABI_APCS)
3715 arm_pcs_default = ARM_PCS_APCS;
3716 else
3717 arm_pcs_default = ARM_PCS_ATPCS;
3721 static void
3722 arm_add_gc_roots (void)
3724 gcc_obstack_init(&minipool_obstack);
3725 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3728 /* A table of known ARM exception types.
3729 For use with the interrupt function attribute. */
3731 typedef struct
3733 const char *const arg;
3734 const unsigned long return_value;
3736 isr_attribute_arg;
3738 static const isr_attribute_arg isr_attribute_args [] =
3740 { "IRQ", ARM_FT_ISR },
3741 { "irq", ARM_FT_ISR },
3742 { "FIQ", ARM_FT_FIQ },
3743 { "fiq", ARM_FT_FIQ },
3744 { "ABORT", ARM_FT_ISR },
3745 { "abort", ARM_FT_ISR },
3746 { "ABORT", ARM_FT_ISR },
3747 { "abort", ARM_FT_ISR },
3748 { "UNDEF", ARM_FT_EXCEPTION },
3749 { "undef", ARM_FT_EXCEPTION },
3750 { "SWI", ARM_FT_EXCEPTION },
3751 { "swi", ARM_FT_EXCEPTION },
3752 { NULL, ARM_FT_NORMAL }
3755 /* Returns the (interrupt) function type of the current
3756 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3758 static unsigned long
3759 arm_isr_value (tree argument)
3761 const isr_attribute_arg * ptr;
3762 const char * arg;
3764 if (!arm_arch_notm)
3765 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3767 /* No argument - default to IRQ. */
3768 if (argument == NULL_TREE)
3769 return ARM_FT_ISR;
3771 /* Get the value of the argument. */
3772 if (TREE_VALUE (argument) == NULL_TREE
3773 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3774 return ARM_FT_UNKNOWN;
3776 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3778 /* Check it against the list of known arguments. */
3779 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3780 if (streq (arg, ptr->arg))
3781 return ptr->return_value;
3783 /* An unrecognized interrupt type. */
3784 return ARM_FT_UNKNOWN;
3787 /* Computes the type of the current function. */
3789 static unsigned long
3790 arm_compute_func_type (void)
3792 unsigned long type = ARM_FT_UNKNOWN;
3793 tree a;
3794 tree attr;
3796 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3798 /* Decide if the current function is volatile. Such functions
3799 never return, and many memory cycles can be saved by not storing
3800 register values that will never be needed again. This optimization
3801 was added to speed up context switching in a kernel application. */
3802 if (optimize > 0
3803 && (TREE_NOTHROW (current_function_decl)
3804 || !(flag_unwind_tables
3805 || (flag_exceptions
3806 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3807 && TREE_THIS_VOLATILE (current_function_decl))
3808 type |= ARM_FT_VOLATILE;
3810 if (cfun->static_chain_decl != NULL)
3811 type |= ARM_FT_NESTED;
3813 attr = DECL_ATTRIBUTES (current_function_decl);
3815 a = lookup_attribute ("naked", attr);
3816 if (a != NULL_TREE)
3817 type |= ARM_FT_NAKED;
3819 a = lookup_attribute ("isr", attr);
3820 if (a == NULL_TREE)
3821 a = lookup_attribute ("interrupt", attr);
3823 if (a == NULL_TREE)
3824 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3825 else
3826 type |= arm_isr_value (TREE_VALUE (a));
3828 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3829 type |= ARM_FT_CMSE_ENTRY;
3831 return type;
3834 /* Returns the type of the current function. */
3836 unsigned long
3837 arm_current_func_type (void)
3839 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3840 cfun->machine->func_type = arm_compute_func_type ();
3842 return cfun->machine->func_type;
3845 bool
3846 arm_allocate_stack_slots_for_args (void)
3848 /* Naked functions should not allocate stack slots for arguments. */
3849 return !IS_NAKED (arm_current_func_type ());
3852 static bool
3853 arm_warn_func_return (tree decl)
3855 /* Naked functions are implemented entirely in assembly, including the
3856 return sequence, so suppress warnings about this. */
3857 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3861 /* Output assembler code for a block containing the constant parts
3862 of a trampoline, leaving space for the variable parts.
3864 On the ARM, (if r8 is the static chain regnum, and remembering that
3865 referencing pc adds an offset of 8) the trampoline looks like:
3866 ldr r8, [pc, #0]
3867 ldr pc, [pc]
3868 .word static chain value
3869 .word function's address
3870 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3872 static void
3873 arm_asm_trampoline_template (FILE *f)
3875 fprintf (f, "\t.syntax unified\n");
3877 if (TARGET_ARM)
3879 fprintf (f, "\t.arm\n");
3880 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3881 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3883 else if (TARGET_THUMB2)
3885 fprintf (f, "\t.thumb\n");
3886 /* The Thumb-2 trampoline is similar to the arm implementation.
3887 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3888 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3889 STATIC_CHAIN_REGNUM, PC_REGNUM);
3890 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3892 else
3894 ASM_OUTPUT_ALIGN (f, 2);
3895 fprintf (f, "\t.code\t16\n");
3896 fprintf (f, ".Ltrampoline_start:\n");
3897 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3898 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3899 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3900 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3901 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3902 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3904 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3905 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3908 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3910 static void
3911 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3913 rtx fnaddr, mem, a_tramp;
3915 emit_block_move (m_tramp, assemble_trampoline_template (),
3916 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3918 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3919 emit_move_insn (mem, chain_value);
3921 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3922 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3923 emit_move_insn (mem, fnaddr);
3925 a_tramp = XEXP (m_tramp, 0);
3926 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3927 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3928 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3931 /* Thumb trampolines should be entered in thumb mode, so set
3932 the bottom bit of the address. */
3934 static rtx
3935 arm_trampoline_adjust_address (rtx addr)
3937 if (TARGET_THUMB)
3938 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3939 NULL, 0, OPTAB_LIB_WIDEN);
3940 return addr;
3943 /* Return 1 if it is possible to return using a single instruction.
3944 If SIBLING is non-null, this is a test for a return before a sibling
3945 call. SIBLING is the call insn, so we can examine its register usage. */
3948 use_return_insn (int iscond, rtx sibling)
3950 int regno;
3951 unsigned int func_type;
3952 unsigned long saved_int_regs;
3953 unsigned HOST_WIDE_INT stack_adjust;
3954 arm_stack_offsets *offsets;
3956 /* Never use a return instruction before reload has run. */
3957 if (!reload_completed)
3958 return 0;
3960 func_type = arm_current_func_type ();
3962 /* Naked, volatile and stack alignment functions need special
3963 consideration. */
3964 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3965 return 0;
3967 /* So do interrupt functions that use the frame pointer and Thumb
3968 interrupt functions. */
3969 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3970 return 0;
3972 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3973 && !optimize_function_for_size_p (cfun))
3974 return 0;
3976 offsets = arm_get_frame_offsets ();
3977 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3979 /* As do variadic functions. */
3980 if (crtl->args.pretend_args_size
3981 || cfun->machine->uses_anonymous_args
3982 /* Or if the function calls __builtin_eh_return () */
3983 || crtl->calls_eh_return
3984 /* Or if the function calls alloca */
3985 || cfun->calls_alloca
3986 /* Or if there is a stack adjustment. However, if the stack pointer
3987 is saved on the stack, we can use a pre-incrementing stack load. */
3988 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3989 && stack_adjust == 4))
3990 /* Or if the static chain register was saved above the frame, under the
3991 assumption that the stack pointer isn't saved on the stack. */
3992 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3993 && arm_compute_static_chain_stack_bytes() != 0))
3994 return 0;
3996 saved_int_regs = offsets->saved_regs_mask;
3998 /* Unfortunately, the insn
4000 ldmib sp, {..., sp, ...}
4002 triggers a bug on most SA-110 based devices, such that the stack
4003 pointer won't be correctly restored if the instruction takes a
4004 page fault. We work around this problem by popping r3 along with
4005 the other registers, since that is never slower than executing
4006 another instruction.
4008 We test for !arm_arch5 here, because code for any architecture
4009 less than this could potentially be run on one of the buggy
4010 chips. */
4011 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4013 /* Validate that r3 is a call-clobbered register (always true in
4014 the default abi) ... */
4015 if (!call_used_regs[3])
4016 return 0;
4018 /* ... that it isn't being used for a return value ... */
4019 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4020 return 0;
4022 /* ... or for a tail-call argument ... */
4023 if (sibling)
4025 gcc_assert (CALL_P (sibling));
4027 if (find_regno_fusage (sibling, USE, 3))
4028 return 0;
4031 /* ... and that there are no call-saved registers in r0-r2
4032 (always true in the default ABI). */
4033 if (saved_int_regs & 0x7)
4034 return 0;
4037 /* Can't be done if interworking with Thumb, and any registers have been
4038 stacked. */
4039 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4040 return 0;
4042 /* On StrongARM, conditional returns are expensive if they aren't
4043 taken and multiple registers have been stacked. */
4044 if (iscond && arm_tune_strongarm)
4046 /* Conditional return when just the LR is stored is a simple
4047 conditional-load instruction, that's not expensive. */
4048 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4049 return 0;
4051 if (flag_pic
4052 && arm_pic_register != INVALID_REGNUM
4053 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4054 return 0;
4057 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4058 several instructions if anything needs to be popped. */
4059 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4060 return 0;
4062 /* If there are saved registers but the LR isn't saved, then we need
4063 two instructions for the return. */
4064 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4065 return 0;
4067 /* Can't be done if any of the VFP regs are pushed,
4068 since this also requires an insn. */
4069 if (TARGET_HARD_FLOAT)
4070 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4071 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4072 return 0;
4074 if (TARGET_REALLY_IWMMXT)
4075 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4076 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4077 return 0;
4079 return 1;
4082 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4083 shrink-wrapping if possible. This is the case if we need to emit a
4084 prologue, which we can test by looking at the offsets. */
4085 bool
4086 use_simple_return_p (void)
4088 arm_stack_offsets *offsets;
4090 /* Note this function can be called before or after reload. */
4091 if (!reload_completed)
4092 arm_compute_frame_layout ();
4094 offsets = arm_get_frame_offsets ();
4095 return offsets->outgoing_args != 0;
4098 /* Return TRUE if int I is a valid immediate ARM constant. */
4101 const_ok_for_arm (HOST_WIDE_INT i)
4103 int lowbit;
4105 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4106 be all zero, or all one. */
4107 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4108 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4109 != ((~(unsigned HOST_WIDE_INT) 0)
4110 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4111 return FALSE;
4113 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4115 /* Fast return for 0 and small values. We must do this for zero, since
4116 the code below can't handle that one case. */
4117 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4118 return TRUE;
4120 /* Get the number of trailing zeros. */
4121 lowbit = ffs((int) i) - 1;
4123 /* Only even shifts are allowed in ARM mode so round down to the
4124 nearest even number. */
4125 if (TARGET_ARM)
4126 lowbit &= ~1;
4128 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4129 return TRUE;
4131 if (TARGET_ARM)
4133 /* Allow rotated constants in ARM mode. */
4134 if (lowbit <= 4
4135 && ((i & ~0xc000003f) == 0
4136 || (i & ~0xf000000f) == 0
4137 || (i & ~0xfc000003) == 0))
4138 return TRUE;
4140 else if (TARGET_THUMB2)
4142 HOST_WIDE_INT v;
4144 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4145 v = i & 0xff;
4146 v |= v << 16;
4147 if (i == v || i == (v | (v << 8)))
4148 return TRUE;
4150 /* Allow repeated pattern 0xXY00XY00. */
4151 v = i & 0xff00;
4152 v |= v << 16;
4153 if (i == v)
4154 return TRUE;
4156 else if (TARGET_HAVE_MOVT)
4158 /* Thumb-1 Targets with MOVT. */
4159 if (i > 0xffff)
4160 return FALSE;
4161 else
4162 return TRUE;
4165 return FALSE;
4168 /* Return true if I is a valid constant for the operation CODE. */
4170 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4172 if (const_ok_for_arm (i))
4173 return 1;
4175 switch (code)
4177 case SET:
4178 /* See if we can use movw. */
4179 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4180 return 1;
4181 else
4182 /* Otherwise, try mvn. */
4183 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4185 case PLUS:
4186 /* See if we can use addw or subw. */
4187 if (TARGET_THUMB2
4188 && ((i & 0xfffff000) == 0
4189 || ((-i) & 0xfffff000) == 0))
4190 return 1;
4191 /* Fall through. */
4192 case COMPARE:
4193 case EQ:
4194 case NE:
4195 case GT:
4196 case LE:
4197 case LT:
4198 case GE:
4199 case GEU:
4200 case LTU:
4201 case GTU:
4202 case LEU:
4203 case UNORDERED:
4204 case ORDERED:
4205 case UNEQ:
4206 case UNGE:
4207 case UNLT:
4208 case UNGT:
4209 case UNLE:
4210 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4212 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4213 case XOR:
4214 return 0;
4216 case IOR:
4217 if (TARGET_THUMB2)
4218 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4219 return 0;
4221 case AND:
4222 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4224 default:
4225 gcc_unreachable ();
4229 /* Return true if I is a valid di mode constant for the operation CODE. */
4231 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4233 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4234 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4235 rtx hi = GEN_INT (hi_val);
4236 rtx lo = GEN_INT (lo_val);
4238 if (TARGET_THUMB1)
4239 return 0;
4241 switch (code)
4243 case AND:
4244 case IOR:
4245 case XOR:
4246 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4247 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4248 case PLUS:
4249 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4251 default:
4252 return 0;
4256 /* Emit a sequence of insns to handle a large constant.
4257 CODE is the code of the operation required, it can be any of SET, PLUS,
4258 IOR, AND, XOR, MINUS;
4259 MODE is the mode in which the operation is being performed;
4260 VAL is the integer to operate on;
4261 SOURCE is the other operand (a register, or a null-pointer for SET);
4262 SUBTARGETS means it is safe to create scratch registers if that will
4263 either produce a simpler sequence, or we will want to cse the values.
4264 Return value is the number of insns emitted. */
4266 /* ??? Tweak this for thumb2. */
4268 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4269 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4271 rtx cond;
4273 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4274 cond = COND_EXEC_TEST (PATTERN (insn));
4275 else
4276 cond = NULL_RTX;
4278 if (subtargets || code == SET
4279 || (REG_P (target) && REG_P (source)
4280 && REGNO (target) != REGNO (source)))
4282 /* After arm_reorg has been called, we can't fix up expensive
4283 constants by pushing them into memory so we must synthesize
4284 them in-line, regardless of the cost. This is only likely to
4285 be more costly on chips that have load delay slots and we are
4286 compiling without running the scheduler (so no splitting
4287 occurred before the final instruction emission).
4289 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4291 if (!cfun->machine->after_arm_reorg
4292 && !cond
4293 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4294 1, 0)
4295 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4296 + (code != SET))))
4298 if (code == SET)
4300 /* Currently SET is the only monadic value for CODE, all
4301 the rest are diadic. */
4302 if (TARGET_USE_MOVT)
4303 arm_emit_movpair (target, GEN_INT (val));
4304 else
4305 emit_set_insn (target, GEN_INT (val));
4307 return 1;
4309 else
4311 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4313 if (TARGET_USE_MOVT)
4314 arm_emit_movpair (temp, GEN_INT (val));
4315 else
4316 emit_set_insn (temp, GEN_INT (val));
4318 /* For MINUS, the value is subtracted from, since we never
4319 have subtraction of a constant. */
4320 if (code == MINUS)
4321 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4322 else
4323 emit_set_insn (target,
4324 gen_rtx_fmt_ee (code, mode, source, temp));
4325 return 2;
4330 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4334 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4335 ARM/THUMB2 immediates, and add up to VAL.
4336 Thr function return value gives the number of insns required. */
4337 static int
4338 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4339 struct four_ints *return_sequence)
4341 int best_consecutive_zeros = 0;
4342 int i;
4343 int best_start = 0;
4344 int insns1, insns2;
4345 struct four_ints tmp_sequence;
4347 /* If we aren't targeting ARM, the best place to start is always at
4348 the bottom, otherwise look more closely. */
4349 if (TARGET_ARM)
4351 for (i = 0; i < 32; i += 2)
4353 int consecutive_zeros = 0;
4355 if (!(val & (3 << i)))
4357 while ((i < 32) && !(val & (3 << i)))
4359 consecutive_zeros += 2;
4360 i += 2;
4362 if (consecutive_zeros > best_consecutive_zeros)
4364 best_consecutive_zeros = consecutive_zeros;
4365 best_start = i - consecutive_zeros;
4367 i -= 2;
4372 /* So long as it won't require any more insns to do so, it's
4373 desirable to emit a small constant (in bits 0...9) in the last
4374 insn. This way there is more chance that it can be combined with
4375 a later addressing insn to form a pre-indexed load or store
4376 operation. Consider:
4378 *((volatile int *)0xe0000100) = 1;
4379 *((volatile int *)0xe0000110) = 2;
4381 We want this to wind up as:
4383 mov rA, #0xe0000000
4384 mov rB, #1
4385 str rB, [rA, #0x100]
4386 mov rB, #2
4387 str rB, [rA, #0x110]
4389 rather than having to synthesize both large constants from scratch.
4391 Therefore, we calculate how many insns would be required to emit
4392 the constant starting from `best_start', and also starting from
4393 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4394 yield a shorter sequence, we may as well use zero. */
4395 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4396 if (best_start != 0
4397 && ((HOST_WIDE_INT_1U << best_start) < val))
4399 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4400 if (insns2 <= insns1)
4402 *return_sequence = tmp_sequence;
4403 insns1 = insns2;
4407 return insns1;
4410 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4411 static int
4412 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4413 struct four_ints *return_sequence, int i)
4415 int remainder = val & 0xffffffff;
4416 int insns = 0;
4418 /* Try and find a way of doing the job in either two or three
4419 instructions.
4421 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4422 location. We start at position I. This may be the MSB, or
4423 optimial_immediate_sequence may have positioned it at the largest block
4424 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4425 wrapping around to the top of the word when we drop off the bottom.
4426 In the worst case this code should produce no more than four insns.
4428 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4429 constants, shifted to any arbitrary location. We should always start
4430 at the MSB. */
4433 int end;
4434 unsigned int b1, b2, b3, b4;
4435 unsigned HOST_WIDE_INT result;
4436 int loc;
4438 gcc_assert (insns < 4);
4440 if (i <= 0)
4441 i += 32;
4443 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4444 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4446 loc = i;
4447 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4448 /* We can use addw/subw for the last 12 bits. */
4449 result = remainder;
4450 else
4452 /* Use an 8-bit shifted/rotated immediate. */
4453 end = i - 8;
4454 if (end < 0)
4455 end += 32;
4456 result = remainder & ((0x0ff << end)
4457 | ((i < end) ? (0xff >> (32 - end))
4458 : 0));
4459 i -= 8;
4462 else
4464 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4465 arbitrary shifts. */
4466 i -= TARGET_ARM ? 2 : 1;
4467 continue;
4470 /* Next, see if we can do a better job with a thumb2 replicated
4471 constant.
4473 We do it this way around to catch the cases like 0x01F001E0 where
4474 two 8-bit immediates would work, but a replicated constant would
4475 make it worse.
4477 TODO: 16-bit constants that don't clear all the bits, but still win.
4478 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4479 if (TARGET_THUMB2)
4481 b1 = (remainder & 0xff000000) >> 24;
4482 b2 = (remainder & 0x00ff0000) >> 16;
4483 b3 = (remainder & 0x0000ff00) >> 8;
4484 b4 = remainder & 0xff;
4486 if (loc > 24)
4488 /* The 8-bit immediate already found clears b1 (and maybe b2),
4489 but must leave b3 and b4 alone. */
4491 /* First try to find a 32-bit replicated constant that clears
4492 almost everything. We can assume that we can't do it in one,
4493 or else we wouldn't be here. */
4494 unsigned int tmp = b1 & b2 & b3 & b4;
4495 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4496 + (tmp << 24);
4497 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4498 + (tmp == b3) + (tmp == b4);
4499 if (tmp
4500 && (matching_bytes >= 3
4501 || (matching_bytes == 2
4502 && const_ok_for_op (remainder & ~tmp2, code))))
4504 /* At least 3 of the bytes match, and the fourth has at
4505 least as many bits set, or two of the bytes match
4506 and it will only require one more insn to finish. */
4507 result = tmp2;
4508 i = tmp != b1 ? 32
4509 : tmp != b2 ? 24
4510 : tmp != b3 ? 16
4511 : 8;
4514 /* Second, try to find a 16-bit replicated constant that can
4515 leave three of the bytes clear. If b2 or b4 is already
4516 zero, then we can. If the 8-bit from above would not
4517 clear b2 anyway, then we still win. */
4518 else if (b1 == b3 && (!b2 || !b4
4519 || (remainder & 0x00ff0000 & ~result)))
4521 result = remainder & 0xff00ff00;
4522 i = 24;
4525 else if (loc > 16)
4527 /* The 8-bit immediate already found clears b2 (and maybe b3)
4528 and we don't get here unless b1 is alredy clear, but it will
4529 leave b4 unchanged. */
4531 /* If we can clear b2 and b4 at once, then we win, since the
4532 8-bits couldn't possibly reach that far. */
4533 if (b2 == b4)
4535 result = remainder & 0x00ff00ff;
4536 i = 16;
4541 return_sequence->i[insns++] = result;
4542 remainder &= ~result;
4544 if (code == SET || code == MINUS)
4545 code = PLUS;
4547 while (remainder);
4549 return insns;
4552 /* Emit an instruction with the indicated PATTERN. If COND is
4553 non-NULL, conditionalize the execution of the instruction on COND
4554 being true. */
4556 static void
4557 emit_constant_insn (rtx cond, rtx pattern)
4559 if (cond)
4560 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4561 emit_insn (pattern);
4564 /* As above, but extra parameter GENERATE which, if clear, suppresses
4565 RTL generation. */
4567 static int
4568 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4569 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4570 int subtargets, int generate)
4572 int can_invert = 0;
4573 int can_negate = 0;
4574 int final_invert = 0;
4575 int i;
4576 int set_sign_bit_copies = 0;
4577 int clear_sign_bit_copies = 0;
4578 int clear_zero_bit_copies = 0;
4579 int set_zero_bit_copies = 0;
4580 int insns = 0, neg_insns, inv_insns;
4581 unsigned HOST_WIDE_INT temp1, temp2;
4582 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4583 struct four_ints *immediates;
4584 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4586 /* Find out which operations are safe for a given CODE. Also do a quick
4587 check for degenerate cases; these can occur when DImode operations
4588 are split. */
4589 switch (code)
4591 case SET:
4592 can_invert = 1;
4593 break;
4595 case PLUS:
4596 can_negate = 1;
4597 break;
4599 case IOR:
4600 if (remainder == 0xffffffff)
4602 if (generate)
4603 emit_constant_insn (cond,
4604 gen_rtx_SET (target,
4605 GEN_INT (ARM_SIGN_EXTEND (val))));
4606 return 1;
4609 if (remainder == 0)
4611 if (reload_completed && rtx_equal_p (target, source))
4612 return 0;
4614 if (generate)
4615 emit_constant_insn (cond, gen_rtx_SET (target, source));
4616 return 1;
4618 break;
4620 case AND:
4621 if (remainder == 0)
4623 if (generate)
4624 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4625 return 1;
4627 if (remainder == 0xffffffff)
4629 if (reload_completed && rtx_equal_p (target, source))
4630 return 0;
4631 if (generate)
4632 emit_constant_insn (cond, gen_rtx_SET (target, source));
4633 return 1;
4635 can_invert = 1;
4636 break;
4638 case XOR:
4639 if (remainder == 0)
4641 if (reload_completed && rtx_equal_p (target, source))
4642 return 0;
4643 if (generate)
4644 emit_constant_insn (cond, gen_rtx_SET (target, source));
4645 return 1;
4648 if (remainder == 0xffffffff)
4650 if (generate)
4651 emit_constant_insn (cond,
4652 gen_rtx_SET (target,
4653 gen_rtx_NOT (mode, source)));
4654 return 1;
4656 final_invert = 1;
4657 break;
4659 case MINUS:
4660 /* We treat MINUS as (val - source), since (source - val) is always
4661 passed as (source + (-val)). */
4662 if (remainder == 0)
4664 if (generate)
4665 emit_constant_insn (cond,
4666 gen_rtx_SET (target,
4667 gen_rtx_NEG (mode, source)));
4668 return 1;
4670 if (const_ok_for_arm (val))
4672 if (generate)
4673 emit_constant_insn (cond,
4674 gen_rtx_SET (target,
4675 gen_rtx_MINUS (mode, GEN_INT (val),
4676 source)));
4677 return 1;
4680 break;
4682 default:
4683 gcc_unreachable ();
4686 /* If we can do it in one insn get out quickly. */
4687 if (const_ok_for_op (val, code))
4689 if (generate)
4690 emit_constant_insn (cond,
4691 gen_rtx_SET (target,
4692 (source
4693 ? gen_rtx_fmt_ee (code, mode, source,
4694 GEN_INT (val))
4695 : GEN_INT (val))));
4696 return 1;
4699 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4700 insn. */
4701 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4702 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4704 if (generate)
4706 if (mode == SImode && i == 16)
4707 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4708 smaller insn. */
4709 emit_constant_insn (cond,
4710 gen_zero_extendhisi2
4711 (target, gen_lowpart (HImode, source)));
4712 else
4713 /* Extz only supports SImode, but we can coerce the operands
4714 into that mode. */
4715 emit_constant_insn (cond,
4716 gen_extzv_t2 (gen_lowpart (SImode, target),
4717 gen_lowpart (SImode, source),
4718 GEN_INT (i), const0_rtx));
4721 return 1;
4724 /* Calculate a few attributes that may be useful for specific
4725 optimizations. */
4726 /* Count number of leading zeros. */
4727 for (i = 31; i >= 0; i--)
4729 if ((remainder & (1 << i)) == 0)
4730 clear_sign_bit_copies++;
4731 else
4732 break;
4735 /* Count number of leading 1's. */
4736 for (i = 31; i >= 0; i--)
4738 if ((remainder & (1 << i)) != 0)
4739 set_sign_bit_copies++;
4740 else
4741 break;
4744 /* Count number of trailing zero's. */
4745 for (i = 0; i <= 31; i++)
4747 if ((remainder & (1 << i)) == 0)
4748 clear_zero_bit_copies++;
4749 else
4750 break;
4753 /* Count number of trailing 1's. */
4754 for (i = 0; i <= 31; i++)
4756 if ((remainder & (1 << i)) != 0)
4757 set_zero_bit_copies++;
4758 else
4759 break;
4762 switch (code)
4764 case SET:
4765 /* See if we can do this by sign_extending a constant that is known
4766 to be negative. This is a good, way of doing it, since the shift
4767 may well merge into a subsequent insn. */
4768 if (set_sign_bit_copies > 1)
4770 if (const_ok_for_arm
4771 (temp1 = ARM_SIGN_EXTEND (remainder
4772 << (set_sign_bit_copies - 1))))
4774 if (generate)
4776 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4777 emit_constant_insn (cond,
4778 gen_rtx_SET (new_src, GEN_INT (temp1)));
4779 emit_constant_insn (cond,
4780 gen_ashrsi3 (target, new_src,
4781 GEN_INT (set_sign_bit_copies - 1)));
4783 return 2;
4785 /* For an inverted constant, we will need to set the low bits,
4786 these will be shifted out of harm's way. */
4787 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4788 if (const_ok_for_arm (~temp1))
4790 if (generate)
4792 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4793 emit_constant_insn (cond,
4794 gen_rtx_SET (new_src, GEN_INT (temp1)));
4795 emit_constant_insn (cond,
4796 gen_ashrsi3 (target, new_src,
4797 GEN_INT (set_sign_bit_copies - 1)));
4799 return 2;
4803 /* See if we can calculate the value as the difference between two
4804 valid immediates. */
4805 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4807 int topshift = clear_sign_bit_copies & ~1;
4809 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4810 & (0xff000000 >> topshift));
4812 /* If temp1 is zero, then that means the 9 most significant
4813 bits of remainder were 1 and we've caused it to overflow.
4814 When topshift is 0 we don't need to do anything since we
4815 can borrow from 'bit 32'. */
4816 if (temp1 == 0 && topshift != 0)
4817 temp1 = 0x80000000 >> (topshift - 1);
4819 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4821 if (const_ok_for_arm (temp2))
4823 if (generate)
4825 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4826 emit_constant_insn (cond,
4827 gen_rtx_SET (new_src, GEN_INT (temp1)));
4828 emit_constant_insn (cond,
4829 gen_addsi3 (target, new_src,
4830 GEN_INT (-temp2)));
4833 return 2;
4837 /* See if we can generate this by setting the bottom (or the top)
4838 16 bits, and then shifting these into the other half of the
4839 word. We only look for the simplest cases, to do more would cost
4840 too much. Be careful, however, not to generate this when the
4841 alternative would take fewer insns. */
4842 if (val & 0xffff0000)
4844 temp1 = remainder & 0xffff0000;
4845 temp2 = remainder & 0x0000ffff;
4847 /* Overlaps outside this range are best done using other methods. */
4848 for (i = 9; i < 24; i++)
4850 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4851 && !const_ok_for_arm (temp2))
4853 rtx new_src = (subtargets
4854 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4855 : target);
4856 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4857 source, subtargets, generate);
4858 source = new_src;
4859 if (generate)
4860 emit_constant_insn
4861 (cond,
4862 gen_rtx_SET
4863 (target,
4864 gen_rtx_IOR (mode,
4865 gen_rtx_ASHIFT (mode, source,
4866 GEN_INT (i)),
4867 source)));
4868 return insns + 1;
4872 /* Don't duplicate cases already considered. */
4873 for (i = 17; i < 24; i++)
4875 if (((temp1 | (temp1 >> i)) == remainder)
4876 && !const_ok_for_arm (temp1))
4878 rtx new_src = (subtargets
4879 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4880 : target);
4881 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4882 source, subtargets, generate);
4883 source = new_src;
4884 if (generate)
4885 emit_constant_insn
4886 (cond,
4887 gen_rtx_SET (target,
4888 gen_rtx_IOR
4889 (mode,
4890 gen_rtx_LSHIFTRT (mode, source,
4891 GEN_INT (i)),
4892 source)));
4893 return insns + 1;
4897 break;
4899 case IOR:
4900 case XOR:
4901 /* If we have IOR or XOR, and the constant can be loaded in a
4902 single instruction, and we can find a temporary to put it in,
4903 then this can be done in two instructions instead of 3-4. */
4904 if (subtargets
4905 /* TARGET can't be NULL if SUBTARGETS is 0 */
4906 || (reload_completed && !reg_mentioned_p (target, source)))
4908 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4910 if (generate)
4912 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4914 emit_constant_insn (cond,
4915 gen_rtx_SET (sub, GEN_INT (val)));
4916 emit_constant_insn (cond,
4917 gen_rtx_SET (target,
4918 gen_rtx_fmt_ee (code, mode,
4919 source, sub)));
4921 return 2;
4925 if (code == XOR)
4926 break;
4928 /* Convert.
4929 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4930 and the remainder 0s for e.g. 0xfff00000)
4931 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4933 This can be done in 2 instructions by using shifts with mov or mvn.
4934 e.g. for
4935 x = x | 0xfff00000;
4936 we generate.
4937 mvn r0, r0, asl #12
4938 mvn r0, r0, lsr #12 */
4939 if (set_sign_bit_copies > 8
4940 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4942 if (generate)
4944 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4945 rtx shift = GEN_INT (set_sign_bit_copies);
4947 emit_constant_insn
4948 (cond,
4949 gen_rtx_SET (sub,
4950 gen_rtx_NOT (mode,
4951 gen_rtx_ASHIFT (mode,
4952 source,
4953 shift))));
4954 emit_constant_insn
4955 (cond,
4956 gen_rtx_SET (target,
4957 gen_rtx_NOT (mode,
4958 gen_rtx_LSHIFTRT (mode, sub,
4959 shift))));
4961 return 2;
4964 /* Convert
4965 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4967 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4969 For eg. r0 = r0 | 0xfff
4970 mvn r0, r0, lsr #12
4971 mvn r0, r0, asl #12
4974 if (set_zero_bit_copies > 8
4975 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4977 if (generate)
4979 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4980 rtx shift = GEN_INT (set_zero_bit_copies);
4982 emit_constant_insn
4983 (cond,
4984 gen_rtx_SET (sub,
4985 gen_rtx_NOT (mode,
4986 gen_rtx_LSHIFTRT (mode,
4987 source,
4988 shift))));
4989 emit_constant_insn
4990 (cond,
4991 gen_rtx_SET (target,
4992 gen_rtx_NOT (mode,
4993 gen_rtx_ASHIFT (mode, sub,
4994 shift))));
4996 return 2;
4999 /* This will never be reached for Thumb2 because orn is a valid
5000 instruction. This is for Thumb1 and the ARM 32 bit cases.
5002 x = y | constant (such that ~constant is a valid constant)
5003 Transform this to
5004 x = ~(~y & ~constant).
5006 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5008 if (generate)
5010 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5011 emit_constant_insn (cond,
5012 gen_rtx_SET (sub,
5013 gen_rtx_NOT (mode, source)));
5014 source = sub;
5015 if (subtargets)
5016 sub = gen_reg_rtx (mode);
5017 emit_constant_insn (cond,
5018 gen_rtx_SET (sub,
5019 gen_rtx_AND (mode, source,
5020 GEN_INT (temp1))));
5021 emit_constant_insn (cond,
5022 gen_rtx_SET (target,
5023 gen_rtx_NOT (mode, sub)));
5025 return 3;
5027 break;
5029 case AND:
5030 /* See if two shifts will do 2 or more insn's worth of work. */
5031 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5033 HOST_WIDE_INT shift_mask = ((0xffffffff
5034 << (32 - clear_sign_bit_copies))
5035 & 0xffffffff);
5037 if ((remainder | shift_mask) != 0xffffffff)
5039 HOST_WIDE_INT new_val
5040 = ARM_SIGN_EXTEND (remainder | shift_mask);
5042 if (generate)
5044 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5045 insns = arm_gen_constant (AND, SImode, cond, new_val,
5046 new_src, source, subtargets, 1);
5047 source = new_src;
5049 else
5051 rtx targ = subtargets ? NULL_RTX : target;
5052 insns = arm_gen_constant (AND, mode, cond, new_val,
5053 targ, source, subtargets, 0);
5057 if (generate)
5059 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060 rtx shift = GEN_INT (clear_sign_bit_copies);
5062 emit_insn (gen_ashlsi3 (new_src, source, shift));
5063 emit_insn (gen_lshrsi3 (target, new_src, shift));
5066 return insns + 2;
5069 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5071 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5073 if ((remainder | shift_mask) != 0xffffffff)
5075 HOST_WIDE_INT new_val
5076 = ARM_SIGN_EXTEND (remainder | shift_mask);
5077 if (generate)
5079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5081 insns = arm_gen_constant (AND, mode, cond, new_val,
5082 new_src, source, subtargets, 1);
5083 source = new_src;
5085 else
5087 rtx targ = subtargets ? NULL_RTX : target;
5089 insns = arm_gen_constant (AND, mode, cond, new_val,
5090 targ, source, subtargets, 0);
5094 if (generate)
5096 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5097 rtx shift = GEN_INT (clear_zero_bit_copies);
5099 emit_insn (gen_lshrsi3 (new_src, source, shift));
5100 emit_insn (gen_ashlsi3 (target, new_src, shift));
5103 return insns + 2;
5106 break;
5108 default:
5109 break;
5112 /* Calculate what the instruction sequences would be if we generated it
5113 normally, negated, or inverted. */
5114 if (code == AND)
5115 /* AND cannot be split into multiple insns, so invert and use BIC. */
5116 insns = 99;
5117 else
5118 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5120 if (can_negate)
5121 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5122 &neg_immediates);
5123 else
5124 neg_insns = 99;
5126 if (can_invert || final_invert)
5127 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5128 &inv_immediates);
5129 else
5130 inv_insns = 99;
5132 immediates = &pos_immediates;
5134 /* Is the negated immediate sequence more efficient? */
5135 if (neg_insns < insns && neg_insns <= inv_insns)
5137 insns = neg_insns;
5138 immediates = &neg_immediates;
5140 else
5141 can_negate = 0;
5143 /* Is the inverted immediate sequence more efficient?
5144 We must allow for an extra NOT instruction for XOR operations, although
5145 there is some chance that the final 'mvn' will get optimized later. */
5146 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5148 insns = inv_insns;
5149 immediates = &inv_immediates;
5151 else
5153 can_invert = 0;
5154 final_invert = 0;
5157 /* Now output the chosen sequence as instructions. */
5158 if (generate)
5160 for (i = 0; i < insns; i++)
5162 rtx new_src, temp1_rtx;
5164 temp1 = immediates->i[i];
5166 if (code == SET || code == MINUS)
5167 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5168 else if ((final_invert || i < (insns - 1)) && subtargets)
5169 new_src = gen_reg_rtx (mode);
5170 else
5171 new_src = target;
5173 if (can_invert)
5174 temp1 = ~temp1;
5175 else if (can_negate)
5176 temp1 = -temp1;
5178 temp1 = trunc_int_for_mode (temp1, mode);
5179 temp1_rtx = GEN_INT (temp1);
5181 if (code == SET)
5183 else if (code == MINUS)
5184 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5185 else
5186 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5188 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5189 source = new_src;
5191 if (code == SET)
5193 can_negate = can_invert;
5194 can_invert = 0;
5195 code = PLUS;
5197 else if (code == MINUS)
5198 code = PLUS;
5202 if (final_invert)
5204 if (generate)
5205 emit_constant_insn (cond, gen_rtx_SET (target,
5206 gen_rtx_NOT (mode, source)));
5207 insns++;
5210 return insns;
5213 /* Canonicalize a comparison so that we are more likely to recognize it.
5214 This can be done for a few constant compares, where we can make the
5215 immediate value easier to load. */
5217 static void
5218 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5219 bool op0_preserve_value)
5221 machine_mode mode;
5222 unsigned HOST_WIDE_INT i, maxval;
5224 mode = GET_MODE (*op0);
5225 if (mode == VOIDmode)
5226 mode = GET_MODE (*op1);
5228 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5230 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5231 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5232 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5233 for GTU/LEU in Thumb mode. */
5234 if (mode == DImode)
5237 if (*code == GT || *code == LE
5238 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5240 /* Missing comparison. First try to use an available
5241 comparison. */
5242 if (CONST_INT_P (*op1))
5244 i = INTVAL (*op1);
5245 switch (*code)
5247 case GT:
5248 case LE:
5249 if (i != maxval
5250 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5252 *op1 = GEN_INT (i + 1);
5253 *code = *code == GT ? GE : LT;
5254 return;
5256 break;
5257 case GTU:
5258 case LEU:
5259 if (i != ~((unsigned HOST_WIDE_INT) 0)
5260 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5262 *op1 = GEN_INT (i + 1);
5263 *code = *code == GTU ? GEU : LTU;
5264 return;
5266 break;
5267 default:
5268 gcc_unreachable ();
5272 /* If that did not work, reverse the condition. */
5273 if (!op0_preserve_value)
5275 std::swap (*op0, *op1);
5276 *code = (int)swap_condition ((enum rtx_code)*code);
5279 return;
5282 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5283 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5284 to facilitate possible combining with a cmp into 'ands'. */
5285 if (mode == SImode
5286 && GET_CODE (*op0) == ZERO_EXTEND
5287 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5288 && GET_MODE (XEXP (*op0, 0)) == QImode
5289 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5290 && subreg_lowpart_p (XEXP (*op0, 0))
5291 && *op1 == const0_rtx)
5292 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5293 GEN_INT (255));
5295 /* Comparisons smaller than DImode. Only adjust comparisons against
5296 an out-of-range constant. */
5297 if (!CONST_INT_P (*op1)
5298 || const_ok_for_arm (INTVAL (*op1))
5299 || const_ok_for_arm (- INTVAL (*op1)))
5300 return;
5302 i = INTVAL (*op1);
5304 switch (*code)
5306 case EQ:
5307 case NE:
5308 return;
5310 case GT:
5311 case LE:
5312 if (i != maxval
5313 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5315 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5316 *code = *code == GT ? GE : LT;
5317 return;
5319 break;
5321 case GE:
5322 case LT:
5323 if (i != ~maxval
5324 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5326 *op1 = GEN_INT (i - 1);
5327 *code = *code == GE ? GT : LE;
5328 return;
5330 break;
5332 case GTU:
5333 case LEU:
5334 if (i != ~((unsigned HOST_WIDE_INT) 0)
5335 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5337 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5338 *code = *code == GTU ? GEU : LTU;
5339 return;
5341 break;
5343 case GEU:
5344 case LTU:
5345 if (i != 0
5346 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5348 *op1 = GEN_INT (i - 1);
5349 *code = *code == GEU ? GTU : LEU;
5350 return;
5352 break;
5354 default:
5355 gcc_unreachable ();
5360 /* Define how to find the value returned by a function. */
5362 static rtx
5363 arm_function_value(const_tree type, const_tree func,
5364 bool outgoing ATTRIBUTE_UNUSED)
5366 machine_mode mode;
5367 int unsignedp ATTRIBUTE_UNUSED;
5368 rtx r ATTRIBUTE_UNUSED;
5370 mode = TYPE_MODE (type);
5372 if (TARGET_AAPCS_BASED)
5373 return aapcs_allocate_return_reg (mode, type, func);
5375 /* Promote integer types. */
5376 if (INTEGRAL_TYPE_P (type))
5377 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5379 /* Promotes small structs returned in a register to full-word size
5380 for big-endian AAPCS. */
5381 if (arm_return_in_msb (type))
5383 HOST_WIDE_INT size = int_size_in_bytes (type);
5384 if (size % UNITS_PER_WORD != 0)
5386 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5387 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5391 return arm_libcall_value_1 (mode);
5394 /* libcall hashtable helpers. */
5396 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5398 static inline hashval_t hash (const rtx_def *);
5399 static inline bool equal (const rtx_def *, const rtx_def *);
5400 static inline void remove (rtx_def *);
5403 inline bool
5404 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5406 return rtx_equal_p (p1, p2);
5409 inline hashval_t
5410 libcall_hasher::hash (const rtx_def *p1)
5412 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5415 typedef hash_table<libcall_hasher> libcall_table_type;
5417 static void
5418 add_libcall (libcall_table_type *htab, rtx libcall)
5420 *htab->find_slot (libcall, INSERT) = libcall;
5423 static bool
5424 arm_libcall_uses_aapcs_base (const_rtx libcall)
5426 static bool init_done = false;
5427 static libcall_table_type *libcall_htab = NULL;
5429 if (!init_done)
5431 init_done = true;
5433 libcall_htab = new libcall_table_type (31);
5434 add_libcall (libcall_htab,
5435 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5436 add_libcall (libcall_htab,
5437 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5438 add_libcall (libcall_htab,
5439 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5440 add_libcall (libcall_htab,
5441 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5443 add_libcall (libcall_htab,
5444 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5445 add_libcall (libcall_htab,
5446 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5447 add_libcall (libcall_htab,
5448 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5449 add_libcall (libcall_htab,
5450 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5452 add_libcall (libcall_htab,
5453 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5454 add_libcall (libcall_htab,
5455 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5456 add_libcall (libcall_htab,
5457 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5458 add_libcall (libcall_htab,
5459 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5460 add_libcall (libcall_htab,
5461 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5462 add_libcall (libcall_htab,
5463 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5464 add_libcall (libcall_htab,
5465 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5466 add_libcall (libcall_htab,
5467 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5469 /* Values from double-precision helper functions are returned in core
5470 registers if the selected core only supports single-precision
5471 arithmetic, even if we are using the hard-float ABI. The same is
5472 true for single-precision helpers, but we will never be using the
5473 hard-float ABI on a CPU which doesn't support single-precision
5474 operations in hardware. */
5475 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5476 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5477 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5478 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5479 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5480 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5481 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5482 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5483 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5484 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5485 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5486 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5487 SFmode));
5488 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5489 DFmode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5494 return libcall && libcall_htab->find (libcall) != NULL;
5497 static rtx
5498 arm_libcall_value_1 (machine_mode mode)
5500 if (TARGET_AAPCS_BASED)
5501 return aapcs_libcall_value (mode);
5502 else if (TARGET_IWMMXT_ABI
5503 && arm_vector_mode_supported_p (mode))
5504 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5505 else
5506 return gen_rtx_REG (mode, ARG_REGISTER (1));
5509 /* Define how to find the value returned by a library function
5510 assuming the value has mode MODE. */
5512 static rtx
5513 arm_libcall_value (machine_mode mode, const_rtx libcall)
5515 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5516 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5518 /* The following libcalls return their result in integer registers,
5519 even though they return a floating point value. */
5520 if (arm_libcall_uses_aapcs_base (libcall))
5521 return gen_rtx_REG (mode, ARG_REGISTER(1));
5525 return arm_libcall_value_1 (mode);
5528 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5530 static bool
5531 arm_function_value_regno_p (const unsigned int regno)
5533 if (regno == ARG_REGISTER (1)
5534 || (TARGET_32BIT
5535 && TARGET_AAPCS_BASED
5536 && TARGET_HARD_FLOAT
5537 && regno == FIRST_VFP_REGNUM)
5538 || (TARGET_IWMMXT_ABI
5539 && regno == FIRST_IWMMXT_REGNUM))
5540 return true;
5542 return false;
5545 /* Determine the amount of memory needed to store the possible return
5546 registers of an untyped call. */
5548 arm_apply_result_size (void)
5550 int size = 16;
5552 if (TARGET_32BIT)
5554 if (TARGET_HARD_FLOAT_ABI)
5555 size += 32;
5556 if (TARGET_IWMMXT_ABI)
5557 size += 8;
5560 return size;
5563 /* Decide whether TYPE should be returned in memory (true)
5564 or in a register (false). FNTYPE is the type of the function making
5565 the call. */
5566 static bool
5567 arm_return_in_memory (const_tree type, const_tree fntype)
5569 HOST_WIDE_INT size;
5571 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5573 if (TARGET_AAPCS_BASED)
5575 /* Simple, non-aggregate types (ie not including vectors and
5576 complex) are always returned in a register (or registers).
5577 We don't care about which register here, so we can short-cut
5578 some of the detail. */
5579 if (!AGGREGATE_TYPE_P (type)
5580 && TREE_CODE (type) != VECTOR_TYPE
5581 && TREE_CODE (type) != COMPLEX_TYPE)
5582 return false;
5584 /* Any return value that is no larger than one word can be
5585 returned in r0. */
5586 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5587 return false;
5589 /* Check any available co-processors to see if they accept the
5590 type as a register candidate (VFP, for example, can return
5591 some aggregates in consecutive registers). These aren't
5592 available if the call is variadic. */
5593 if (aapcs_select_return_coproc (type, fntype) >= 0)
5594 return false;
5596 /* Vector values should be returned using ARM registers, not
5597 memory (unless they're over 16 bytes, which will break since
5598 we only have four call-clobbered registers to play with). */
5599 if (TREE_CODE (type) == VECTOR_TYPE)
5600 return (size < 0 || size > (4 * UNITS_PER_WORD));
5602 /* The rest go in memory. */
5603 return true;
5606 if (TREE_CODE (type) == VECTOR_TYPE)
5607 return (size < 0 || size > (4 * UNITS_PER_WORD));
5609 if (!AGGREGATE_TYPE_P (type) &&
5610 (TREE_CODE (type) != VECTOR_TYPE))
5611 /* All simple types are returned in registers. */
5612 return false;
5614 if (arm_abi != ARM_ABI_APCS)
5616 /* ATPCS and later return aggregate types in memory only if they are
5617 larger than a word (or are variable size). */
5618 return (size < 0 || size > UNITS_PER_WORD);
5621 /* For the arm-wince targets we choose to be compatible with Microsoft's
5622 ARM and Thumb compilers, which always return aggregates in memory. */
5623 #ifndef ARM_WINCE
5624 /* All structures/unions bigger than one word are returned in memory.
5625 Also catch the case where int_size_in_bytes returns -1. In this case
5626 the aggregate is either huge or of variable size, and in either case
5627 we will want to return it via memory and not in a register. */
5628 if (size < 0 || size > UNITS_PER_WORD)
5629 return true;
5631 if (TREE_CODE (type) == RECORD_TYPE)
5633 tree field;
5635 /* For a struct the APCS says that we only return in a register
5636 if the type is 'integer like' and every addressable element
5637 has an offset of zero. For practical purposes this means
5638 that the structure can have at most one non bit-field element
5639 and that this element must be the first one in the structure. */
5641 /* Find the first field, ignoring non FIELD_DECL things which will
5642 have been created by C++. */
5643 for (field = TYPE_FIELDS (type);
5644 field && TREE_CODE (field) != FIELD_DECL;
5645 field = DECL_CHAIN (field))
5646 continue;
5648 if (field == NULL)
5649 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5651 /* Check that the first field is valid for returning in a register. */
5653 /* ... Floats are not allowed */
5654 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5655 return true;
5657 /* ... Aggregates that are not themselves valid for returning in
5658 a register are not allowed. */
5659 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5660 return true;
5662 /* Now check the remaining fields, if any. Only bitfields are allowed,
5663 since they are not addressable. */
5664 for (field = DECL_CHAIN (field);
5665 field;
5666 field = DECL_CHAIN (field))
5668 if (TREE_CODE (field) != FIELD_DECL)
5669 continue;
5671 if (!DECL_BIT_FIELD_TYPE (field))
5672 return true;
5675 return false;
5678 if (TREE_CODE (type) == UNION_TYPE)
5680 tree field;
5682 /* Unions can be returned in registers if every element is
5683 integral, or can be returned in an integer register. */
5684 for (field = TYPE_FIELDS (type);
5685 field;
5686 field = DECL_CHAIN (field))
5688 if (TREE_CODE (field) != FIELD_DECL)
5689 continue;
5691 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5692 return true;
5694 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5695 return true;
5698 return false;
5700 #endif /* not ARM_WINCE */
5702 /* Return all other types in memory. */
5703 return true;
5706 const struct pcs_attribute_arg
5708 const char *arg;
5709 enum arm_pcs value;
5710 } pcs_attribute_args[] =
5712 {"aapcs", ARM_PCS_AAPCS},
5713 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5714 #if 0
5715 /* We could recognize these, but changes would be needed elsewhere
5716 * to implement them. */
5717 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5718 {"atpcs", ARM_PCS_ATPCS},
5719 {"apcs", ARM_PCS_APCS},
5720 #endif
5721 {NULL, ARM_PCS_UNKNOWN}
5724 static enum arm_pcs
5725 arm_pcs_from_attribute (tree attr)
5727 const struct pcs_attribute_arg *ptr;
5728 const char *arg;
5730 /* Get the value of the argument. */
5731 if (TREE_VALUE (attr) == NULL_TREE
5732 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5733 return ARM_PCS_UNKNOWN;
5735 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5737 /* Check it against the list of known arguments. */
5738 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5739 if (streq (arg, ptr->arg))
5740 return ptr->value;
5742 /* An unrecognized interrupt type. */
5743 return ARM_PCS_UNKNOWN;
5746 /* Get the PCS variant to use for this call. TYPE is the function's type
5747 specification, DECL is the specific declartion. DECL may be null if
5748 the call could be indirect or if this is a library call. */
5749 static enum arm_pcs
5750 arm_get_pcs_model (const_tree type, const_tree decl)
5752 bool user_convention = false;
5753 enum arm_pcs user_pcs = arm_pcs_default;
5754 tree attr;
5756 gcc_assert (type);
5758 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5759 if (attr)
5761 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5762 user_convention = true;
5765 if (TARGET_AAPCS_BASED)
5767 /* Detect varargs functions. These always use the base rules
5768 (no argument is ever a candidate for a co-processor
5769 register). */
5770 bool base_rules = stdarg_p (type);
5772 if (user_convention)
5774 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5775 sorry ("non-AAPCS derived PCS variant");
5776 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5777 error ("variadic functions must use the base AAPCS variant");
5780 if (base_rules)
5781 return ARM_PCS_AAPCS;
5782 else if (user_convention)
5783 return user_pcs;
5784 else if (decl && flag_unit_at_a_time)
5786 /* Local functions never leak outside this compilation unit,
5787 so we are free to use whatever conventions are
5788 appropriate. */
5789 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5790 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5791 if (i && i->local)
5792 return ARM_PCS_AAPCS_LOCAL;
5795 else if (user_convention && user_pcs != arm_pcs_default)
5796 sorry ("PCS variant");
5798 /* For everything else we use the target's default. */
5799 return arm_pcs_default;
5803 static void
5804 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5805 const_tree fntype ATTRIBUTE_UNUSED,
5806 rtx libcall ATTRIBUTE_UNUSED,
5807 const_tree fndecl ATTRIBUTE_UNUSED)
5809 /* Record the unallocated VFP registers. */
5810 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5811 pcum->aapcs_vfp_reg_alloc = 0;
5814 /* Walk down the type tree of TYPE counting consecutive base elements.
5815 If *MODEP is VOIDmode, then set it to the first valid floating point
5816 type. If a non-floating point type is found, or if a floating point
5817 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5818 otherwise return the count in the sub-tree. */
5819 static int
5820 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5822 machine_mode mode;
5823 HOST_WIDE_INT size;
5825 switch (TREE_CODE (type))
5827 case REAL_TYPE:
5828 mode = TYPE_MODE (type);
5829 if (mode != DFmode && mode != SFmode && mode != HFmode)
5830 return -1;
5832 if (*modep == VOIDmode)
5833 *modep = mode;
5835 if (*modep == mode)
5836 return 1;
5838 break;
5840 case COMPLEX_TYPE:
5841 mode = TYPE_MODE (TREE_TYPE (type));
5842 if (mode != DFmode && mode != SFmode)
5843 return -1;
5845 if (*modep == VOIDmode)
5846 *modep = mode;
5848 if (*modep == mode)
5849 return 2;
5851 break;
5853 case VECTOR_TYPE:
5854 /* Use V2SImode and V4SImode as representatives of all 64-bit
5855 and 128-bit vector types, whether or not those modes are
5856 supported with the present options. */
5857 size = int_size_in_bytes (type);
5858 switch (size)
5860 case 8:
5861 mode = V2SImode;
5862 break;
5863 case 16:
5864 mode = V4SImode;
5865 break;
5866 default:
5867 return -1;
5870 if (*modep == VOIDmode)
5871 *modep = mode;
5873 /* Vector modes are considered to be opaque: two vectors are
5874 equivalent for the purposes of being homogeneous aggregates
5875 if they are the same size. */
5876 if (*modep == mode)
5877 return 1;
5879 break;
5881 case ARRAY_TYPE:
5883 int count;
5884 tree index = TYPE_DOMAIN (type);
5886 /* Can't handle incomplete types nor sizes that are not
5887 fixed. */
5888 if (!COMPLETE_TYPE_P (type)
5889 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5890 return -1;
5892 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5893 if (count == -1
5894 || !index
5895 || !TYPE_MAX_VALUE (index)
5896 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5897 || !TYPE_MIN_VALUE (index)
5898 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5899 || count < 0)
5900 return -1;
5902 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5903 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5905 /* There must be no padding. */
5906 if (wi::to_wide (TYPE_SIZE (type))
5907 != count * GET_MODE_BITSIZE (*modep))
5908 return -1;
5910 return count;
5913 case RECORD_TYPE:
5915 int count = 0;
5916 int sub_count;
5917 tree field;
5919 /* Can't handle incomplete types nor sizes that are not
5920 fixed. */
5921 if (!COMPLETE_TYPE_P (type)
5922 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5923 return -1;
5925 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5927 if (TREE_CODE (field) != FIELD_DECL)
5928 continue;
5930 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5931 if (sub_count < 0)
5932 return -1;
5933 count += sub_count;
5936 /* There must be no padding. */
5937 if (wi::to_wide (TYPE_SIZE (type))
5938 != count * GET_MODE_BITSIZE (*modep))
5939 return -1;
5941 return count;
5944 case UNION_TYPE:
5945 case QUAL_UNION_TYPE:
5947 /* These aren't very interesting except in a degenerate case. */
5948 int count = 0;
5949 int sub_count;
5950 tree field;
5952 /* Can't handle incomplete types nor sizes that are not
5953 fixed. */
5954 if (!COMPLETE_TYPE_P (type)
5955 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5956 return -1;
5958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5960 if (TREE_CODE (field) != FIELD_DECL)
5961 continue;
5963 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5964 if (sub_count < 0)
5965 return -1;
5966 count = count > sub_count ? count : sub_count;
5969 /* There must be no padding. */
5970 if (wi::to_wide (TYPE_SIZE (type))
5971 != count * GET_MODE_BITSIZE (*modep))
5972 return -1;
5974 return count;
5977 default:
5978 break;
5981 return -1;
5984 /* Return true if PCS_VARIANT should use VFP registers. */
5985 static bool
5986 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5988 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5990 static bool seen_thumb1_vfp = false;
5992 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5994 sorry ("Thumb-1 hard-float VFP ABI");
5995 /* sorry() is not immediately fatal, so only display this once. */
5996 seen_thumb1_vfp = true;
5999 return true;
6002 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6003 return false;
6005 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6006 (TARGET_VFP_DOUBLE || !is_double));
6009 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6010 suitable for passing or returning in VFP registers for the PCS
6011 variant selected. If it is, then *BASE_MODE is updated to contain
6012 a machine mode describing each element of the argument's type and
6013 *COUNT to hold the number of such elements. */
6014 static bool
6015 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6016 machine_mode mode, const_tree type,
6017 machine_mode *base_mode, int *count)
6019 machine_mode new_mode = VOIDmode;
6021 /* If we have the type information, prefer that to working things
6022 out from the mode. */
6023 if (type)
6025 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6027 if (ag_count > 0 && ag_count <= 4)
6028 *count = ag_count;
6029 else
6030 return false;
6032 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6033 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6034 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6036 *count = 1;
6037 new_mode = mode;
6039 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6041 *count = 2;
6042 new_mode = (mode == DCmode ? DFmode : SFmode);
6044 else
6045 return false;
6048 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6049 return false;
6051 *base_mode = new_mode;
6052 return true;
6055 static bool
6056 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6057 machine_mode mode, const_tree type)
6059 int count ATTRIBUTE_UNUSED;
6060 machine_mode ag_mode ATTRIBUTE_UNUSED;
6062 if (!use_vfp_abi (pcs_variant, false))
6063 return false;
6064 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6065 &ag_mode, &count);
6068 static bool
6069 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6070 const_tree type)
6072 if (!use_vfp_abi (pcum->pcs_variant, false))
6073 return false;
6075 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6076 &pcum->aapcs_vfp_rmode,
6077 &pcum->aapcs_vfp_rcount);
6080 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6081 for the behaviour of this function. */
6083 static bool
6084 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6085 const_tree type ATTRIBUTE_UNUSED)
6087 int rmode_size
6088 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6089 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6090 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6091 int regno;
6093 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6094 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6096 pcum->aapcs_vfp_reg_alloc = mask << regno;
6097 if (mode == BLKmode
6098 || (mode == TImode && ! TARGET_NEON)
6099 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6101 int i;
6102 int rcount = pcum->aapcs_vfp_rcount;
6103 int rshift = shift;
6104 machine_mode rmode = pcum->aapcs_vfp_rmode;
6105 rtx par;
6106 if (!TARGET_NEON)
6108 /* Avoid using unsupported vector modes. */
6109 if (rmode == V2SImode)
6110 rmode = DImode;
6111 else if (rmode == V4SImode)
6113 rmode = DImode;
6114 rcount *= 2;
6115 rshift /= 2;
6118 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6119 for (i = 0; i < rcount; i++)
6121 rtx tmp = gen_rtx_REG (rmode,
6122 FIRST_VFP_REGNUM + regno + i * rshift);
6123 tmp = gen_rtx_EXPR_LIST
6124 (VOIDmode, tmp,
6125 GEN_INT (i * GET_MODE_SIZE (rmode)));
6126 XVECEXP (par, 0, i) = tmp;
6129 pcum->aapcs_reg = par;
6131 else
6132 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6133 return true;
6135 return false;
6138 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6139 comment there for the behaviour of this function. */
6141 static rtx
6142 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6143 machine_mode mode,
6144 const_tree type ATTRIBUTE_UNUSED)
6146 if (!use_vfp_abi (pcs_variant, false))
6147 return NULL;
6149 if (mode == BLKmode
6150 || (GET_MODE_CLASS (mode) == MODE_INT
6151 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6152 && !TARGET_NEON))
6154 int count;
6155 machine_mode ag_mode;
6156 int i;
6157 rtx par;
6158 int shift;
6160 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6161 &ag_mode, &count);
6163 if (!TARGET_NEON)
6165 if (ag_mode == V2SImode)
6166 ag_mode = DImode;
6167 else if (ag_mode == V4SImode)
6169 ag_mode = DImode;
6170 count *= 2;
6173 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6174 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6175 for (i = 0; i < count; i++)
6177 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6178 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6179 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6180 XVECEXP (par, 0, i) = tmp;
6183 return par;
6186 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6189 static void
6190 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6191 machine_mode mode ATTRIBUTE_UNUSED,
6192 const_tree type ATTRIBUTE_UNUSED)
6194 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6195 pcum->aapcs_vfp_reg_alloc = 0;
6196 return;
6199 #define AAPCS_CP(X) \
6201 aapcs_ ## X ## _cum_init, \
6202 aapcs_ ## X ## _is_call_candidate, \
6203 aapcs_ ## X ## _allocate, \
6204 aapcs_ ## X ## _is_return_candidate, \
6205 aapcs_ ## X ## _allocate_return_reg, \
6206 aapcs_ ## X ## _advance \
6209 /* Table of co-processors that can be used to pass arguments in
6210 registers. Idealy no arugment should be a candidate for more than
6211 one co-processor table entry, but the table is processed in order
6212 and stops after the first match. If that entry then fails to put
6213 the argument into a co-processor register, the argument will go on
6214 the stack. */
6215 static struct
6217 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6218 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6220 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6221 BLKmode) is a candidate for this co-processor's registers; this
6222 function should ignore any position-dependent state in
6223 CUMULATIVE_ARGS and only use call-type dependent information. */
6224 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6226 /* Return true if the argument does get a co-processor register; it
6227 should set aapcs_reg to an RTX of the register allocated as is
6228 required for a return from FUNCTION_ARG. */
6229 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6231 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6232 be returned in this co-processor's registers. */
6233 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6235 /* Allocate and return an RTX element to hold the return type of a call. This
6236 routine must not fail and will only be called if is_return_candidate
6237 returned true with the same parameters. */
6238 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6240 /* Finish processing this argument and prepare to start processing
6241 the next one. */
6242 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6243 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6245 AAPCS_CP(vfp)
6248 #undef AAPCS_CP
6250 static int
6251 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6252 const_tree type)
6254 int i;
6256 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6257 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6258 return i;
6260 return -1;
6263 static int
6264 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6266 /* We aren't passed a decl, so we can't check that a call is local.
6267 However, it isn't clear that that would be a win anyway, since it
6268 might limit some tail-calling opportunities. */
6269 enum arm_pcs pcs_variant;
6271 if (fntype)
6273 const_tree fndecl = NULL_TREE;
6275 if (TREE_CODE (fntype) == FUNCTION_DECL)
6277 fndecl = fntype;
6278 fntype = TREE_TYPE (fntype);
6281 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6283 else
6284 pcs_variant = arm_pcs_default;
6286 if (pcs_variant != ARM_PCS_AAPCS)
6288 int i;
6290 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6291 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6292 TYPE_MODE (type),
6293 type))
6294 return i;
6296 return -1;
6299 static rtx
6300 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6301 const_tree fntype)
6303 /* We aren't passed a decl, so we can't check that a call is local.
6304 However, it isn't clear that that would be a win anyway, since it
6305 might limit some tail-calling opportunities. */
6306 enum arm_pcs pcs_variant;
6307 int unsignedp ATTRIBUTE_UNUSED;
6309 if (fntype)
6311 const_tree fndecl = NULL_TREE;
6313 if (TREE_CODE (fntype) == FUNCTION_DECL)
6315 fndecl = fntype;
6316 fntype = TREE_TYPE (fntype);
6319 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6321 else
6322 pcs_variant = arm_pcs_default;
6324 /* Promote integer types. */
6325 if (type && INTEGRAL_TYPE_P (type))
6326 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6328 if (pcs_variant != ARM_PCS_AAPCS)
6330 int i;
6332 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6333 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6334 type))
6335 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6336 mode, type);
6339 /* Promotes small structs returned in a register to full-word size
6340 for big-endian AAPCS. */
6341 if (type && arm_return_in_msb (type))
6343 HOST_WIDE_INT size = int_size_in_bytes (type);
6344 if (size % UNITS_PER_WORD != 0)
6346 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6347 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6351 return gen_rtx_REG (mode, R0_REGNUM);
6354 static rtx
6355 aapcs_libcall_value (machine_mode mode)
6357 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6358 && GET_MODE_SIZE (mode) <= 4)
6359 mode = SImode;
6361 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6364 /* Lay out a function argument using the AAPCS rules. The rule
6365 numbers referred to here are those in the AAPCS. */
6366 static void
6367 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6368 const_tree type, bool named)
6370 int nregs, nregs2;
6371 int ncrn;
6373 /* We only need to do this once per argument. */
6374 if (pcum->aapcs_arg_processed)
6375 return;
6377 pcum->aapcs_arg_processed = true;
6379 /* Special case: if named is false then we are handling an incoming
6380 anonymous argument which is on the stack. */
6381 if (!named)
6382 return;
6384 /* Is this a potential co-processor register candidate? */
6385 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6387 int slot = aapcs_select_call_coproc (pcum, mode, type);
6388 pcum->aapcs_cprc_slot = slot;
6390 /* We don't have to apply any of the rules from part B of the
6391 preparation phase, these are handled elsewhere in the
6392 compiler. */
6394 if (slot >= 0)
6396 /* A Co-processor register candidate goes either in its own
6397 class of registers or on the stack. */
6398 if (!pcum->aapcs_cprc_failed[slot])
6400 /* C1.cp - Try to allocate the argument to co-processor
6401 registers. */
6402 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6403 return;
6405 /* C2.cp - Put the argument on the stack and note that we
6406 can't assign any more candidates in this slot. We also
6407 need to note that we have allocated stack space, so that
6408 we won't later try to split a non-cprc candidate between
6409 core registers and the stack. */
6410 pcum->aapcs_cprc_failed[slot] = true;
6411 pcum->can_split = false;
6414 /* We didn't get a register, so this argument goes on the
6415 stack. */
6416 gcc_assert (pcum->can_split == false);
6417 return;
6421 /* C3 - For double-word aligned arguments, round the NCRN up to the
6422 next even number. */
6423 ncrn = pcum->aapcs_ncrn;
6424 if (ncrn & 1)
6426 int res = arm_needs_doubleword_align (mode, type);
6427 /* Only warn during RTL expansion of call stmts, otherwise we would
6428 warn e.g. during gimplification even on functions that will be
6429 always inlined, and we'd warn multiple times. Don't warn when
6430 called in expand_function_start either, as we warn instead in
6431 arm_function_arg_boundary in that case. */
6432 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6433 inform (input_location, "parameter passing for argument of type "
6434 "%qT changed in GCC 7.1", type);
6435 else if (res > 0)
6436 ncrn++;
6439 nregs = ARM_NUM_REGS2(mode, type);
6441 /* Sigh, this test should really assert that nregs > 0, but a GCC
6442 extension allows empty structs and then gives them empty size; it
6443 then allows such a structure to be passed by value. For some of
6444 the code below we have to pretend that such an argument has
6445 non-zero size so that we 'locate' it correctly either in
6446 registers or on the stack. */
6447 gcc_assert (nregs >= 0);
6449 nregs2 = nregs ? nregs : 1;
6451 /* C4 - Argument fits entirely in core registers. */
6452 if (ncrn + nregs2 <= NUM_ARG_REGS)
6454 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6455 pcum->aapcs_next_ncrn = ncrn + nregs;
6456 return;
6459 /* C5 - Some core registers left and there are no arguments already
6460 on the stack: split this argument between the remaining core
6461 registers and the stack. */
6462 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6464 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6465 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6466 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6467 return;
6470 /* C6 - NCRN is set to 4. */
6471 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6473 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6474 return;
6477 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6478 for a call to a function whose data type is FNTYPE.
6479 For a library call, FNTYPE is NULL. */
6480 void
6481 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6482 rtx libname,
6483 tree fndecl ATTRIBUTE_UNUSED)
6485 /* Long call handling. */
6486 if (fntype)
6487 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6488 else
6489 pcum->pcs_variant = arm_pcs_default;
6491 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6493 if (arm_libcall_uses_aapcs_base (libname))
6494 pcum->pcs_variant = ARM_PCS_AAPCS;
6496 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6497 pcum->aapcs_reg = NULL_RTX;
6498 pcum->aapcs_partial = 0;
6499 pcum->aapcs_arg_processed = false;
6500 pcum->aapcs_cprc_slot = -1;
6501 pcum->can_split = true;
6503 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6505 int i;
6507 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6509 pcum->aapcs_cprc_failed[i] = false;
6510 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6513 return;
6516 /* Legacy ABIs */
6518 /* On the ARM, the offset starts at 0. */
6519 pcum->nregs = 0;
6520 pcum->iwmmxt_nregs = 0;
6521 pcum->can_split = true;
6523 /* Varargs vectors are treated the same as long long.
6524 named_count avoids having to change the way arm handles 'named' */
6525 pcum->named_count = 0;
6526 pcum->nargs = 0;
6528 if (TARGET_REALLY_IWMMXT && fntype)
6530 tree fn_arg;
6532 for (fn_arg = TYPE_ARG_TYPES (fntype);
6533 fn_arg;
6534 fn_arg = TREE_CHAIN (fn_arg))
6535 pcum->named_count += 1;
6537 if (! pcum->named_count)
6538 pcum->named_count = INT_MAX;
6542 /* Return 1 if double word alignment is required for argument passing.
6543 Return -1 if double word alignment used to be required for argument
6544 passing before PR77728 ABI fix, but is not required anymore.
6545 Return 0 if double word alignment is not required and wasn't requried
6546 before either. */
6547 static int
6548 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6550 if (!type)
6551 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6553 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6554 if (!AGGREGATE_TYPE_P (type))
6555 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6557 /* Array types: Use member alignment of element type. */
6558 if (TREE_CODE (type) == ARRAY_TYPE)
6559 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6561 int ret = 0;
6562 /* Record/aggregate types: Use greatest member alignment of any member. */
6563 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6564 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6566 if (TREE_CODE (field) == FIELD_DECL)
6567 return 1;
6568 else
6569 /* Before PR77728 fix, we were incorrectly considering also
6570 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6571 Make sure we can warn about that with -Wpsabi. */
6572 ret = -1;
6575 return ret;
6579 /* Determine where to put an argument to a function.
6580 Value is zero to push the argument on the stack,
6581 or a hard register in which to store the argument.
6583 MODE is the argument's machine mode.
6584 TYPE is the data type of the argument (as a tree).
6585 This is null for libcalls where that information may
6586 not be available.
6587 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6588 the preceding args and about the function being called.
6589 NAMED is nonzero if this argument is a named parameter
6590 (otherwise it is an extra parameter matching an ellipsis).
6592 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6593 other arguments are passed on the stack. If (NAMED == 0) (which happens
6594 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6595 defined), say it is passed in the stack (function_prologue will
6596 indeed make it pass in the stack if necessary). */
6598 static rtx
6599 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6600 const_tree type, bool named)
6602 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6603 int nregs;
6605 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6606 a call insn (op3 of a call_value insn). */
6607 if (mode == VOIDmode)
6608 return const0_rtx;
6610 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6612 aapcs_layout_arg (pcum, mode, type, named);
6613 return pcum->aapcs_reg;
6616 /* Varargs vectors are treated the same as long long.
6617 named_count avoids having to change the way arm handles 'named' */
6618 if (TARGET_IWMMXT_ABI
6619 && arm_vector_mode_supported_p (mode)
6620 && pcum->named_count > pcum->nargs + 1)
6622 if (pcum->iwmmxt_nregs <= 9)
6623 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6624 else
6626 pcum->can_split = false;
6627 return NULL_RTX;
6631 /* Put doubleword aligned quantities in even register pairs. */
6632 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6634 int res = arm_needs_doubleword_align (mode, type);
6635 if (res < 0 && warn_psabi)
6636 inform (input_location, "parameter passing for argument of type "
6637 "%qT changed in GCC 7.1", type);
6638 else if (res > 0)
6639 pcum->nregs++;
6642 /* Only allow splitting an arg between regs and memory if all preceding
6643 args were allocated to regs. For args passed by reference we only count
6644 the reference pointer. */
6645 if (pcum->can_split)
6646 nregs = 1;
6647 else
6648 nregs = ARM_NUM_REGS2 (mode, type);
6650 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6651 return NULL_RTX;
6653 return gen_rtx_REG (mode, pcum->nregs);
6656 static unsigned int
6657 arm_function_arg_boundary (machine_mode mode, const_tree type)
6659 if (!ARM_DOUBLEWORD_ALIGN)
6660 return PARM_BOUNDARY;
6662 int res = arm_needs_doubleword_align (mode, type);
6663 if (res < 0 && warn_psabi)
6664 inform (input_location, "parameter passing for argument of type %qT "
6665 "changed in GCC 7.1", type);
6667 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6670 static int
6671 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6672 tree type, bool named)
6674 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6675 int nregs = pcum->nregs;
6677 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6679 aapcs_layout_arg (pcum, mode, type, named);
6680 return pcum->aapcs_partial;
6683 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6684 return 0;
6686 if (NUM_ARG_REGS > nregs
6687 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6688 && pcum->can_split)
6689 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6691 return 0;
6694 /* Update the data in PCUM to advance over an argument
6695 of mode MODE and data type TYPE.
6696 (TYPE is null for libcalls where that information may not be available.) */
6698 static void
6699 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6700 const_tree type, bool named)
6702 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6704 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6706 aapcs_layout_arg (pcum, mode, type, named);
6708 if (pcum->aapcs_cprc_slot >= 0)
6710 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6711 type);
6712 pcum->aapcs_cprc_slot = -1;
6715 /* Generic stuff. */
6716 pcum->aapcs_arg_processed = false;
6717 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6718 pcum->aapcs_reg = NULL_RTX;
6719 pcum->aapcs_partial = 0;
6721 else
6723 pcum->nargs += 1;
6724 if (arm_vector_mode_supported_p (mode)
6725 && pcum->named_count > pcum->nargs
6726 && TARGET_IWMMXT_ABI)
6727 pcum->iwmmxt_nregs += 1;
6728 else
6729 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6733 /* Variable sized types are passed by reference. This is a GCC
6734 extension to the ARM ABI. */
6736 static bool
6737 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6738 machine_mode mode ATTRIBUTE_UNUSED,
6739 const_tree type, bool named ATTRIBUTE_UNUSED)
6741 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6744 /* Encode the current state of the #pragma [no_]long_calls. */
6745 typedef enum
6747 OFF, /* No #pragma [no_]long_calls is in effect. */
6748 LONG, /* #pragma long_calls is in effect. */
6749 SHORT /* #pragma no_long_calls is in effect. */
6750 } arm_pragma_enum;
6752 static arm_pragma_enum arm_pragma_long_calls = OFF;
6754 void
6755 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6757 arm_pragma_long_calls = LONG;
6760 void
6761 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6763 arm_pragma_long_calls = SHORT;
6766 void
6767 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6769 arm_pragma_long_calls = OFF;
6772 /* Handle an attribute requiring a FUNCTION_DECL;
6773 arguments as in struct attribute_spec.handler. */
6774 static tree
6775 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6776 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6778 if (TREE_CODE (*node) != FUNCTION_DECL)
6780 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6781 name);
6782 *no_add_attrs = true;
6785 return NULL_TREE;
6788 /* Handle an "interrupt" or "isr" attribute;
6789 arguments as in struct attribute_spec.handler. */
6790 static tree
6791 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6792 bool *no_add_attrs)
6794 if (DECL_P (*node))
6796 if (TREE_CODE (*node) != FUNCTION_DECL)
6798 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6799 name);
6800 *no_add_attrs = true;
6802 /* FIXME: the argument if any is checked for type attributes;
6803 should it be checked for decl ones? */
6805 else
6807 if (TREE_CODE (*node) == FUNCTION_TYPE
6808 || TREE_CODE (*node) == METHOD_TYPE)
6810 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6812 warning (OPT_Wattributes, "%qE attribute ignored",
6813 name);
6814 *no_add_attrs = true;
6817 else if (TREE_CODE (*node) == POINTER_TYPE
6818 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6819 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6820 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6822 *node = build_variant_type_copy (*node);
6823 TREE_TYPE (*node) = build_type_attribute_variant
6824 (TREE_TYPE (*node),
6825 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6826 *no_add_attrs = true;
6828 else
6830 /* Possibly pass this attribute on from the type to a decl. */
6831 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6832 | (int) ATTR_FLAG_FUNCTION_NEXT
6833 | (int) ATTR_FLAG_ARRAY_NEXT))
6835 *no_add_attrs = true;
6836 return tree_cons (name, args, NULL_TREE);
6838 else
6840 warning (OPT_Wattributes, "%qE attribute ignored",
6841 name);
6846 return NULL_TREE;
6849 /* Handle a "pcs" attribute; arguments as in struct
6850 attribute_spec.handler. */
6851 static tree
6852 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6853 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6855 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6857 warning (OPT_Wattributes, "%qE attribute ignored", name);
6858 *no_add_attrs = true;
6860 return NULL_TREE;
6863 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6864 /* Handle the "notshared" attribute. This attribute is another way of
6865 requesting hidden visibility. ARM's compiler supports
6866 "__declspec(notshared)"; we support the same thing via an
6867 attribute. */
6869 static tree
6870 arm_handle_notshared_attribute (tree *node,
6871 tree name ATTRIBUTE_UNUSED,
6872 tree args ATTRIBUTE_UNUSED,
6873 int flags ATTRIBUTE_UNUSED,
6874 bool *no_add_attrs)
6876 tree decl = TYPE_NAME (*node);
6878 if (decl)
6880 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6881 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6882 *no_add_attrs = false;
6884 return NULL_TREE;
6886 #endif
6888 /* This function returns true if a function with declaration FNDECL and type
6889 FNTYPE uses the stack to pass arguments or return variables and false
6890 otherwise. This is used for functions with the attributes
6891 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6892 diagnostic messages if the stack is used. NAME is the name of the attribute
6893 used. */
6895 static bool
6896 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6898 function_args_iterator args_iter;
6899 CUMULATIVE_ARGS args_so_far_v;
6900 cumulative_args_t args_so_far;
6901 bool first_param = true;
6902 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6904 /* Error out if any argument is passed on the stack. */
6905 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6906 args_so_far = pack_cumulative_args (&args_so_far_v);
6907 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6909 rtx arg_rtx;
6910 machine_mode arg_mode = TYPE_MODE (arg_type);
6912 prev_arg_type = arg_type;
6913 if (VOID_TYPE_P (arg_type))
6914 continue;
6916 if (!first_param)
6917 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6918 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6919 if (!arg_rtx
6920 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6922 error ("%qE attribute not available to functions with arguments "
6923 "passed on the stack", name);
6924 return true;
6926 first_param = false;
6929 /* Error out for variadic functions since we cannot control how many
6930 arguments will be passed and thus stack could be used. stdarg_p () is not
6931 used for the checking to avoid browsing arguments twice. */
6932 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6934 error ("%qE attribute not available to functions with variable number "
6935 "of arguments", name);
6936 return true;
6939 /* Error out if return value is passed on the stack. */
6940 ret_type = TREE_TYPE (fntype);
6941 if (arm_return_in_memory (ret_type, fntype))
6943 error ("%qE attribute not available to functions that return value on "
6944 "the stack", name);
6945 return true;
6947 return false;
6950 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6951 function will check whether the attribute is allowed here and will add the
6952 attribute to the function declaration tree or otherwise issue a warning. */
6954 static tree
6955 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6956 tree /* args */,
6957 int /* flags */,
6958 bool *no_add_attrs)
6960 tree fndecl;
6962 if (!use_cmse)
6964 *no_add_attrs = true;
6965 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6966 name);
6967 return NULL_TREE;
6970 /* Ignore attribute for function types. */
6971 if (TREE_CODE (*node) != FUNCTION_DECL)
6973 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6974 name);
6975 *no_add_attrs = true;
6976 return NULL_TREE;
6979 fndecl = *node;
6981 /* Warn for static linkage functions. */
6982 if (!TREE_PUBLIC (fndecl))
6984 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6985 "with static linkage", name);
6986 *no_add_attrs = true;
6987 return NULL_TREE;
6990 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6991 TREE_TYPE (fndecl));
6992 return NULL_TREE;
6996 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6997 function will check whether the attribute is allowed here and will add the
6998 attribute to the function type tree or otherwise issue a diagnostic. The
6999 reason we check this at declaration time is to only allow the use of the
7000 attribute with declarations of function pointers and not function
7001 declarations. This function checks NODE is of the expected type and issues
7002 diagnostics otherwise using NAME. If it is not of the expected type
7003 *NO_ADD_ATTRS will be set to true. */
7005 static tree
7006 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7007 tree /* args */,
7008 int /* flags */,
7009 bool *no_add_attrs)
7011 tree decl = NULL_TREE, fntype = NULL_TREE;
7012 tree type;
7014 if (!use_cmse)
7016 *no_add_attrs = true;
7017 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7018 name);
7019 return NULL_TREE;
7022 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7024 decl = *node;
7025 fntype = TREE_TYPE (decl);
7028 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7029 fntype = TREE_TYPE (fntype);
7031 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7033 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7034 "function pointer", name);
7035 *no_add_attrs = true;
7036 return NULL_TREE;
7039 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7041 if (*no_add_attrs)
7042 return NULL_TREE;
7044 /* Prevent trees being shared among function types with and without
7045 cmse_nonsecure_call attribute. */
7046 type = TREE_TYPE (decl);
7048 type = build_distinct_type_copy (type);
7049 TREE_TYPE (decl) = type;
7050 fntype = type;
7052 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7054 type = fntype;
7055 fntype = TREE_TYPE (fntype);
7056 fntype = build_distinct_type_copy (fntype);
7057 TREE_TYPE (type) = fntype;
7060 /* Construct a type attribute and add it to the function type. */
7061 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7062 TYPE_ATTRIBUTES (fntype));
7063 TYPE_ATTRIBUTES (fntype) = attrs;
7064 return NULL_TREE;
7067 /* Return 0 if the attributes for two types are incompatible, 1 if they
7068 are compatible, and 2 if they are nearly compatible (which causes a
7069 warning to be generated). */
7070 static int
7071 arm_comp_type_attributes (const_tree type1, const_tree type2)
7073 int l1, l2, s1, s2;
7075 /* Check for mismatch of non-default calling convention. */
7076 if (TREE_CODE (type1) != FUNCTION_TYPE)
7077 return 1;
7079 /* Check for mismatched call attributes. */
7080 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7081 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7082 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7083 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7085 /* Only bother to check if an attribute is defined. */
7086 if (l1 | l2 | s1 | s2)
7088 /* If one type has an attribute, the other must have the same attribute. */
7089 if ((l1 != l2) || (s1 != s2))
7090 return 0;
7092 /* Disallow mixed attributes. */
7093 if ((l1 & s2) || (l2 & s1))
7094 return 0;
7097 /* Check for mismatched ISR attribute. */
7098 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7099 if (! l1)
7100 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7101 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7102 if (! l2)
7103 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7104 if (l1 != l2)
7105 return 0;
7107 l1 = lookup_attribute ("cmse_nonsecure_call",
7108 TYPE_ATTRIBUTES (type1)) != NULL;
7109 l2 = lookup_attribute ("cmse_nonsecure_call",
7110 TYPE_ATTRIBUTES (type2)) != NULL;
7112 if (l1 != l2)
7113 return 0;
7115 return 1;
7118 /* Assigns default attributes to newly defined type. This is used to
7119 set short_call/long_call attributes for function types of
7120 functions defined inside corresponding #pragma scopes. */
7121 static void
7122 arm_set_default_type_attributes (tree type)
7124 /* Add __attribute__ ((long_call)) to all functions, when
7125 inside #pragma long_calls or __attribute__ ((short_call)),
7126 when inside #pragma no_long_calls. */
7127 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7129 tree type_attr_list, attr_name;
7130 type_attr_list = TYPE_ATTRIBUTES (type);
7132 if (arm_pragma_long_calls == LONG)
7133 attr_name = get_identifier ("long_call");
7134 else if (arm_pragma_long_calls == SHORT)
7135 attr_name = get_identifier ("short_call");
7136 else
7137 return;
7139 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7140 TYPE_ATTRIBUTES (type) = type_attr_list;
7144 /* Return true if DECL is known to be linked into section SECTION. */
7146 static bool
7147 arm_function_in_section_p (tree decl, section *section)
7149 /* We can only be certain about the prevailing symbol definition. */
7150 if (!decl_binds_to_current_def_p (decl))
7151 return false;
7153 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7154 if (!DECL_SECTION_NAME (decl))
7156 /* Make sure that we will not create a unique section for DECL. */
7157 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7158 return false;
7161 return function_section (decl) == section;
7164 /* Return nonzero if a 32-bit "long_call" should be generated for
7165 a call from the current function to DECL. We generate a long_call
7166 if the function:
7168 a. has an __attribute__((long call))
7169 or b. is within the scope of a #pragma long_calls
7170 or c. the -mlong-calls command line switch has been specified
7172 However we do not generate a long call if the function:
7174 d. has an __attribute__ ((short_call))
7175 or e. is inside the scope of a #pragma no_long_calls
7176 or f. is defined in the same section as the current function. */
7178 bool
7179 arm_is_long_call_p (tree decl)
7181 tree attrs;
7183 if (!decl)
7184 return TARGET_LONG_CALLS;
7186 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7187 if (lookup_attribute ("short_call", attrs))
7188 return false;
7190 /* For "f", be conservative, and only cater for cases in which the
7191 whole of the current function is placed in the same section. */
7192 if (!flag_reorder_blocks_and_partition
7193 && TREE_CODE (decl) == FUNCTION_DECL
7194 && arm_function_in_section_p (decl, current_function_section ()))
7195 return false;
7197 if (lookup_attribute ("long_call", attrs))
7198 return true;
7200 return TARGET_LONG_CALLS;
7203 /* Return nonzero if it is ok to make a tail-call to DECL. */
7204 static bool
7205 arm_function_ok_for_sibcall (tree decl, tree exp)
7207 unsigned long func_type;
7209 if (cfun->machine->sibcall_blocked)
7210 return false;
7212 /* Never tailcall something if we are generating code for Thumb-1. */
7213 if (TARGET_THUMB1)
7214 return false;
7216 /* The PIC register is live on entry to VxWorks PLT entries, so we
7217 must make the call before restoring the PIC register. */
7218 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7219 return false;
7221 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7222 may be used both as target of the call and base register for restoring
7223 the VFP registers */
7224 if (TARGET_APCS_FRAME && TARGET_ARM
7225 && TARGET_HARD_FLOAT
7226 && decl && arm_is_long_call_p (decl))
7227 return false;
7229 /* If we are interworking and the function is not declared static
7230 then we can't tail-call it unless we know that it exists in this
7231 compilation unit (since it might be a Thumb routine). */
7232 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7233 && !TREE_ASM_WRITTEN (decl))
7234 return false;
7236 func_type = arm_current_func_type ();
7237 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7238 if (IS_INTERRUPT (func_type))
7239 return false;
7241 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7242 generated for entry functions themselves. */
7243 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7244 return false;
7246 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7247 this would complicate matters for later code generation. */
7248 if (TREE_CODE (exp) == CALL_EXPR)
7250 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7251 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7252 return false;
7255 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7257 /* Check that the return value locations are the same. For
7258 example that we aren't returning a value from the sibling in
7259 a VFP register but then need to transfer it to a core
7260 register. */
7261 rtx a, b;
7262 tree decl_or_type = decl;
7264 /* If it is an indirect function pointer, get the function type. */
7265 if (!decl)
7266 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7268 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7269 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7270 cfun->decl, false);
7271 if (!rtx_equal_p (a, b))
7272 return false;
7275 /* Never tailcall if function may be called with a misaligned SP. */
7276 if (IS_STACKALIGN (func_type))
7277 return false;
7279 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7280 references should become a NOP. Don't convert such calls into
7281 sibling calls. */
7282 if (TARGET_AAPCS_BASED
7283 && arm_abi == ARM_ABI_AAPCS
7284 && decl
7285 && DECL_WEAK (decl))
7286 return false;
7288 /* We cannot do a tailcall for an indirect call by descriptor if all the
7289 argument registers are used because the only register left to load the
7290 address is IP and it will already contain the static chain. */
7291 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7293 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7294 CUMULATIVE_ARGS cum;
7295 cumulative_args_t cum_v;
7297 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7298 cum_v = pack_cumulative_args (&cum);
7300 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7302 tree type = TREE_VALUE (t);
7303 if (!VOID_TYPE_P (type))
7304 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7307 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7308 return false;
7311 /* Everything else is ok. */
7312 return true;
7316 /* Addressing mode support functions. */
7318 /* Return nonzero if X is a legitimate immediate operand when compiling
7319 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7321 legitimate_pic_operand_p (rtx x)
7323 if (GET_CODE (x) == SYMBOL_REF
7324 || (GET_CODE (x) == CONST
7325 && GET_CODE (XEXP (x, 0)) == PLUS
7326 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7327 return 0;
7329 return 1;
7332 /* Record that the current function needs a PIC register. Initialize
7333 cfun->machine->pic_reg if we have not already done so. */
7335 static void
7336 require_pic_register (void)
7338 /* A lot of the logic here is made obscure by the fact that this
7339 routine gets called as part of the rtx cost estimation process.
7340 We don't want those calls to affect any assumptions about the real
7341 function; and further, we can't call entry_of_function() until we
7342 start the real expansion process. */
7343 if (!crtl->uses_pic_offset_table)
7345 gcc_assert (can_create_pseudo_p ());
7346 if (arm_pic_register != INVALID_REGNUM
7347 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7349 if (!cfun->machine->pic_reg)
7350 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7352 /* Play games to avoid marking the function as needing pic
7353 if we are being called as part of the cost-estimation
7354 process. */
7355 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7356 crtl->uses_pic_offset_table = 1;
7358 else
7360 rtx_insn *seq, *insn;
7362 if (!cfun->machine->pic_reg)
7363 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7365 /* Play games to avoid marking the function as needing pic
7366 if we are being called as part of the cost-estimation
7367 process. */
7368 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7370 crtl->uses_pic_offset_table = 1;
7371 start_sequence ();
7373 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7374 && arm_pic_register > LAST_LO_REGNUM)
7375 emit_move_insn (cfun->machine->pic_reg,
7376 gen_rtx_REG (Pmode, arm_pic_register));
7377 else
7378 arm_load_pic_register (0UL);
7380 seq = get_insns ();
7381 end_sequence ();
7383 for (insn = seq; insn; insn = NEXT_INSN (insn))
7384 if (INSN_P (insn))
7385 INSN_LOCATION (insn) = prologue_location;
7387 /* We can be called during expansion of PHI nodes, where
7388 we can't yet emit instructions directly in the final
7389 insn stream. Queue the insns on the entry edge, they will
7390 be committed after everything else is expanded. */
7391 insert_insn_on_edge (seq,
7392 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7399 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7401 if (GET_CODE (orig) == SYMBOL_REF
7402 || GET_CODE (orig) == LABEL_REF)
7404 if (reg == 0)
7406 gcc_assert (can_create_pseudo_p ());
7407 reg = gen_reg_rtx (Pmode);
7410 /* VxWorks does not impose a fixed gap between segments; the run-time
7411 gap can be different from the object-file gap. We therefore can't
7412 use GOTOFF unless we are absolutely sure that the symbol is in the
7413 same segment as the GOT. Unfortunately, the flexibility of linker
7414 scripts means that we can't be sure of that in general, so assume
7415 that GOTOFF is never valid on VxWorks. */
7416 /* References to weak symbols cannot be resolved locally: they
7417 may be overridden by a non-weak definition at link time. */
7418 rtx_insn *insn;
7419 if ((GET_CODE (orig) == LABEL_REF
7420 || (GET_CODE (orig) == SYMBOL_REF
7421 && SYMBOL_REF_LOCAL_P (orig)
7422 && (SYMBOL_REF_DECL (orig)
7423 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7424 && NEED_GOT_RELOC
7425 && arm_pic_data_is_text_relative)
7426 insn = arm_pic_static_addr (orig, reg);
7427 else
7429 rtx pat;
7430 rtx mem;
7432 /* If this function doesn't have a pic register, create one now. */
7433 require_pic_register ();
7435 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7437 /* Make the MEM as close to a constant as possible. */
7438 mem = SET_SRC (pat);
7439 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7440 MEM_READONLY_P (mem) = 1;
7441 MEM_NOTRAP_P (mem) = 1;
7443 insn = emit_insn (pat);
7446 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7447 by loop. */
7448 set_unique_reg_note (insn, REG_EQUAL, orig);
7450 return reg;
7452 else if (GET_CODE (orig) == CONST)
7454 rtx base, offset;
7456 if (GET_CODE (XEXP (orig, 0)) == PLUS
7457 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7458 return orig;
7460 /* Handle the case where we have: const (UNSPEC_TLS). */
7461 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7462 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7463 return orig;
7465 /* Handle the case where we have:
7466 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7467 CONST_INT. */
7468 if (GET_CODE (XEXP (orig, 0)) == PLUS
7469 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7470 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7472 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7473 return orig;
7476 if (reg == 0)
7478 gcc_assert (can_create_pseudo_p ());
7479 reg = gen_reg_rtx (Pmode);
7482 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7484 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7485 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7486 base == reg ? 0 : reg);
7488 if (CONST_INT_P (offset))
7490 /* The base register doesn't really matter, we only want to
7491 test the index for the appropriate mode. */
7492 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7494 gcc_assert (can_create_pseudo_p ());
7495 offset = force_reg (Pmode, offset);
7498 if (CONST_INT_P (offset))
7499 return plus_constant (Pmode, base, INTVAL (offset));
7502 if (GET_MODE_SIZE (mode) > 4
7503 && (GET_MODE_CLASS (mode) == MODE_INT
7504 || TARGET_SOFT_FLOAT))
7506 emit_insn (gen_addsi3 (reg, base, offset));
7507 return reg;
7510 return gen_rtx_PLUS (Pmode, base, offset);
7513 return orig;
7517 /* Find a spare register to use during the prolog of a function. */
7519 static int
7520 thumb_find_work_register (unsigned long pushed_regs_mask)
7522 int reg;
7524 /* Check the argument registers first as these are call-used. The
7525 register allocation order means that sometimes r3 might be used
7526 but earlier argument registers might not, so check them all. */
7527 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7528 if (!df_regs_ever_live_p (reg))
7529 return reg;
7531 /* Before going on to check the call-saved registers we can try a couple
7532 more ways of deducing that r3 is available. The first is when we are
7533 pushing anonymous arguments onto the stack and we have less than 4
7534 registers worth of fixed arguments(*). In this case r3 will be part of
7535 the variable argument list and so we can be sure that it will be
7536 pushed right at the start of the function. Hence it will be available
7537 for the rest of the prologue.
7538 (*): ie crtl->args.pretend_args_size is greater than 0. */
7539 if (cfun->machine->uses_anonymous_args
7540 && crtl->args.pretend_args_size > 0)
7541 return LAST_ARG_REGNUM;
7543 /* The other case is when we have fixed arguments but less than 4 registers
7544 worth. In this case r3 might be used in the body of the function, but
7545 it is not being used to convey an argument into the function. In theory
7546 we could just check crtl->args.size to see how many bytes are
7547 being passed in argument registers, but it seems that it is unreliable.
7548 Sometimes it will have the value 0 when in fact arguments are being
7549 passed. (See testcase execute/20021111-1.c for an example). So we also
7550 check the args_info.nregs field as well. The problem with this field is
7551 that it makes no allowances for arguments that are passed to the
7552 function but which are not used. Hence we could miss an opportunity
7553 when a function has an unused argument in r3. But it is better to be
7554 safe than to be sorry. */
7555 if (! cfun->machine->uses_anonymous_args
7556 && crtl->args.size >= 0
7557 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7558 && (TARGET_AAPCS_BASED
7559 ? crtl->args.info.aapcs_ncrn < 4
7560 : crtl->args.info.nregs < 4))
7561 return LAST_ARG_REGNUM;
7563 /* Otherwise look for a call-saved register that is going to be pushed. */
7564 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7565 if (pushed_regs_mask & (1 << reg))
7566 return reg;
7568 if (TARGET_THUMB2)
7570 /* Thumb-2 can use high regs. */
7571 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7572 if (pushed_regs_mask & (1 << reg))
7573 return reg;
7575 /* Something went wrong - thumb_compute_save_reg_mask()
7576 should have arranged for a suitable register to be pushed. */
7577 gcc_unreachable ();
7580 static GTY(()) int pic_labelno;
7582 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7583 low register. */
7585 void
7586 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7588 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7590 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7591 return;
7593 gcc_assert (flag_pic);
7595 pic_reg = cfun->machine->pic_reg;
7596 if (TARGET_VXWORKS_RTP)
7598 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7599 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7600 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7602 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7604 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7605 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7607 else
7609 /* We use an UNSPEC rather than a LABEL_REF because this label
7610 never appears in the code stream. */
7612 labelno = GEN_INT (pic_labelno++);
7613 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7614 l1 = gen_rtx_CONST (VOIDmode, l1);
7616 /* On the ARM the PC register contains 'dot + 8' at the time of the
7617 addition, on the Thumb it is 'dot + 4'. */
7618 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7619 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7620 UNSPEC_GOTSYM_OFF);
7621 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7623 if (TARGET_32BIT)
7625 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7627 else /* TARGET_THUMB1 */
7629 if (arm_pic_register != INVALID_REGNUM
7630 && REGNO (pic_reg) > LAST_LO_REGNUM)
7632 /* We will have pushed the pic register, so we should always be
7633 able to find a work register. */
7634 pic_tmp = gen_rtx_REG (SImode,
7635 thumb_find_work_register (saved_regs));
7636 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7637 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7638 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7640 else if (arm_pic_register != INVALID_REGNUM
7641 && arm_pic_register > LAST_LO_REGNUM
7642 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7644 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7645 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7646 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7648 else
7649 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7653 /* Need to emit this whether or not we obey regdecls,
7654 since setjmp/longjmp can cause life info to screw up. */
7655 emit_use (pic_reg);
7658 /* Generate code to load the address of a static var when flag_pic is set. */
7659 static rtx_insn *
7660 arm_pic_static_addr (rtx orig, rtx reg)
7662 rtx l1, labelno, offset_rtx;
7664 gcc_assert (flag_pic);
7666 /* We use an UNSPEC rather than a LABEL_REF because this label
7667 never appears in the code stream. */
7668 labelno = GEN_INT (pic_labelno++);
7669 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7670 l1 = gen_rtx_CONST (VOIDmode, l1);
7672 /* On the ARM the PC register contains 'dot + 8' at the time of the
7673 addition, on the Thumb it is 'dot + 4'. */
7674 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7675 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7676 UNSPEC_SYMBOL_OFFSET);
7677 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7679 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7682 /* Return nonzero if X is valid as an ARM state addressing register. */
7683 static int
7684 arm_address_register_rtx_p (rtx x, int strict_p)
7686 int regno;
7688 if (!REG_P (x))
7689 return 0;
7691 regno = REGNO (x);
7693 if (strict_p)
7694 return ARM_REGNO_OK_FOR_BASE_P (regno);
7696 return (regno <= LAST_ARM_REGNUM
7697 || regno >= FIRST_PSEUDO_REGISTER
7698 || regno == FRAME_POINTER_REGNUM
7699 || regno == ARG_POINTER_REGNUM);
7702 /* Return TRUE if this rtx is the difference of a symbol and a label,
7703 and will reduce to a PC-relative relocation in the object file.
7704 Expressions like this can be left alone when generating PIC, rather
7705 than forced through the GOT. */
7706 static int
7707 pcrel_constant_p (rtx x)
7709 if (GET_CODE (x) == MINUS)
7710 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7712 return FALSE;
7715 /* Return true if X will surely end up in an index register after next
7716 splitting pass. */
7717 static bool
7718 will_be_in_index_register (const_rtx x)
7720 /* arm.md: calculate_pic_address will split this into a register. */
7721 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7724 /* Return nonzero if X is a valid ARM state address operand. */
7726 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7727 int strict_p)
7729 bool use_ldrd;
7730 enum rtx_code code = GET_CODE (x);
7732 if (arm_address_register_rtx_p (x, strict_p))
7733 return 1;
7735 use_ldrd = (TARGET_LDRD
7736 && (mode == DImode || mode == DFmode));
7738 if (code == POST_INC || code == PRE_DEC
7739 || ((code == PRE_INC || code == POST_DEC)
7740 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7741 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7743 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7744 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7745 && GET_CODE (XEXP (x, 1)) == PLUS
7746 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7748 rtx addend = XEXP (XEXP (x, 1), 1);
7750 /* Don't allow ldrd post increment by register because it's hard
7751 to fixup invalid register choices. */
7752 if (use_ldrd
7753 && GET_CODE (x) == POST_MODIFY
7754 && REG_P (addend))
7755 return 0;
7757 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7758 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7761 /* After reload constants split into minipools will have addresses
7762 from a LABEL_REF. */
7763 else if (reload_completed
7764 && (code == LABEL_REF
7765 || (code == CONST
7766 && GET_CODE (XEXP (x, 0)) == PLUS
7767 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7768 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7769 return 1;
7771 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7772 return 0;
7774 else if (code == PLUS)
7776 rtx xop0 = XEXP (x, 0);
7777 rtx xop1 = XEXP (x, 1);
7779 return ((arm_address_register_rtx_p (xop0, strict_p)
7780 && ((CONST_INT_P (xop1)
7781 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7782 || (!strict_p && will_be_in_index_register (xop1))))
7783 || (arm_address_register_rtx_p (xop1, strict_p)
7784 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7787 #if 0
7788 /* Reload currently can't handle MINUS, so disable this for now */
7789 else if (GET_CODE (x) == MINUS)
7791 rtx xop0 = XEXP (x, 0);
7792 rtx xop1 = XEXP (x, 1);
7794 return (arm_address_register_rtx_p (xop0, strict_p)
7795 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7797 #endif
7799 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7800 && code == SYMBOL_REF
7801 && CONSTANT_POOL_ADDRESS_P (x)
7802 && ! (flag_pic
7803 && symbol_mentioned_p (get_pool_constant (x))
7804 && ! pcrel_constant_p (get_pool_constant (x))))
7805 return 1;
7807 return 0;
7810 /* Return true if we can avoid creating a constant pool entry for x. */
7811 static bool
7812 can_avoid_literal_pool_for_label_p (rtx x)
7814 /* Normally we can assign constant values to target registers without
7815 the help of constant pool. But there are cases we have to use constant
7816 pool like:
7817 1) assign a label to register.
7818 2) sign-extend a 8bit value to 32bit and then assign to register.
7820 Constant pool access in format:
7821 (set (reg r0) (mem (symbol_ref (".LC0"))))
7822 will cause the use of literal pool (later in function arm_reorg).
7823 So here we mark such format as an invalid format, then the compiler
7824 will adjust it into:
7825 (set (reg r0) (symbol_ref (".LC0")))
7826 (set (reg r0) (mem (reg r0))).
7827 No extra register is required, and (mem (reg r0)) won't cause the use
7828 of literal pools. */
7829 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7830 && CONSTANT_POOL_ADDRESS_P (x))
7831 return 1;
7832 return 0;
7836 /* Return nonzero if X is a valid Thumb-2 address operand. */
7837 static int
7838 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7840 bool use_ldrd;
7841 enum rtx_code code = GET_CODE (x);
7843 if (arm_address_register_rtx_p (x, strict_p))
7844 return 1;
7846 use_ldrd = (TARGET_LDRD
7847 && (mode == DImode || mode == DFmode));
7849 if (code == POST_INC || code == PRE_DEC
7850 || ((code == PRE_INC || code == POST_DEC)
7851 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7852 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7854 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7855 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7856 && GET_CODE (XEXP (x, 1)) == PLUS
7857 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7859 /* Thumb-2 only has autoincrement by constant. */
7860 rtx addend = XEXP (XEXP (x, 1), 1);
7861 HOST_WIDE_INT offset;
7863 if (!CONST_INT_P (addend))
7864 return 0;
7866 offset = INTVAL(addend);
7867 if (GET_MODE_SIZE (mode) <= 4)
7868 return (offset > -256 && offset < 256);
7870 return (use_ldrd && offset > -1024 && offset < 1024
7871 && (offset & 3) == 0);
7874 /* After reload constants split into minipools will have addresses
7875 from a LABEL_REF. */
7876 else if (reload_completed
7877 && (code == LABEL_REF
7878 || (code == CONST
7879 && GET_CODE (XEXP (x, 0)) == PLUS
7880 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7881 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7882 return 1;
7884 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7885 return 0;
7887 else if (code == PLUS)
7889 rtx xop0 = XEXP (x, 0);
7890 rtx xop1 = XEXP (x, 1);
7892 return ((arm_address_register_rtx_p (xop0, strict_p)
7893 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7894 || (!strict_p && will_be_in_index_register (xop1))))
7895 || (arm_address_register_rtx_p (xop1, strict_p)
7896 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7899 else if (can_avoid_literal_pool_for_label_p (x))
7900 return 0;
7902 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7903 && code == SYMBOL_REF
7904 && CONSTANT_POOL_ADDRESS_P (x)
7905 && ! (flag_pic
7906 && symbol_mentioned_p (get_pool_constant (x))
7907 && ! pcrel_constant_p (get_pool_constant (x))))
7908 return 1;
7910 return 0;
7913 /* Return nonzero if INDEX is valid for an address index operand in
7914 ARM state. */
7915 static int
7916 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7917 int strict_p)
7919 HOST_WIDE_INT range;
7920 enum rtx_code code = GET_CODE (index);
7922 /* Standard coprocessor addressing modes. */
7923 if (TARGET_HARD_FLOAT
7924 && (mode == SFmode || mode == DFmode))
7925 return (code == CONST_INT && INTVAL (index) < 1024
7926 && INTVAL (index) > -1024
7927 && (INTVAL (index) & 3) == 0);
7929 /* For quad modes, we restrict the constant offset to be slightly less
7930 than what the instruction format permits. We do this because for
7931 quad mode moves, we will actually decompose them into two separate
7932 double-mode reads or writes. INDEX must therefore be a valid
7933 (double-mode) offset and so should INDEX+8. */
7934 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7935 return (code == CONST_INT
7936 && INTVAL (index) < 1016
7937 && INTVAL (index) > -1024
7938 && (INTVAL (index) & 3) == 0);
7940 /* We have no such constraint on double mode offsets, so we permit the
7941 full range of the instruction format. */
7942 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7943 return (code == CONST_INT
7944 && INTVAL (index) < 1024
7945 && INTVAL (index) > -1024
7946 && (INTVAL (index) & 3) == 0);
7948 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7949 return (code == CONST_INT
7950 && INTVAL (index) < 1024
7951 && INTVAL (index) > -1024
7952 && (INTVAL (index) & 3) == 0);
7954 if (arm_address_register_rtx_p (index, strict_p)
7955 && (GET_MODE_SIZE (mode) <= 4))
7956 return 1;
7958 if (mode == DImode || mode == DFmode)
7960 if (code == CONST_INT)
7962 HOST_WIDE_INT val = INTVAL (index);
7964 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7965 If vldr is selected it uses arm_coproc_mem_operand. */
7966 if (TARGET_LDRD)
7967 return val > -256 && val < 256;
7968 else
7969 return val > -4096 && val < 4092;
7972 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7975 if (GET_MODE_SIZE (mode) <= 4
7976 && ! (arm_arch4
7977 && (mode == HImode
7978 || mode == HFmode
7979 || (mode == QImode && outer == SIGN_EXTEND))))
7981 if (code == MULT)
7983 rtx xiop0 = XEXP (index, 0);
7984 rtx xiop1 = XEXP (index, 1);
7986 return ((arm_address_register_rtx_p (xiop0, strict_p)
7987 && power_of_two_operand (xiop1, SImode))
7988 || (arm_address_register_rtx_p (xiop1, strict_p)
7989 && power_of_two_operand (xiop0, SImode)));
7991 else if (code == LSHIFTRT || code == ASHIFTRT
7992 || code == ASHIFT || code == ROTATERT)
7994 rtx op = XEXP (index, 1);
7996 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7997 && CONST_INT_P (op)
7998 && INTVAL (op) > 0
7999 && INTVAL (op) <= 31);
8003 /* For ARM v4 we may be doing a sign-extend operation during the
8004 load. */
8005 if (arm_arch4)
8007 if (mode == HImode
8008 || mode == HFmode
8009 || (outer == SIGN_EXTEND && mode == QImode))
8010 range = 256;
8011 else
8012 range = 4096;
8014 else
8015 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8017 return (code == CONST_INT
8018 && INTVAL (index) < range
8019 && INTVAL (index) > -range);
8022 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8023 index operand. i.e. 1, 2, 4 or 8. */
8024 static bool
8025 thumb2_index_mul_operand (rtx op)
8027 HOST_WIDE_INT val;
8029 if (!CONST_INT_P (op))
8030 return false;
8032 val = INTVAL(op);
8033 return (val == 1 || val == 2 || val == 4 || val == 8);
8036 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8037 static int
8038 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8040 enum rtx_code code = GET_CODE (index);
8042 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8043 /* Standard coprocessor addressing modes. */
8044 if (TARGET_HARD_FLOAT
8045 && (mode == SFmode || mode == DFmode))
8046 return (code == CONST_INT && INTVAL (index) < 1024
8047 /* Thumb-2 allows only > -256 index range for it's core register
8048 load/stores. Since we allow SF/DF in core registers, we have
8049 to use the intersection between -256~4096 (core) and -1024~1024
8050 (coprocessor). */
8051 && INTVAL (index) > -256
8052 && (INTVAL (index) & 3) == 0);
8054 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8056 /* For DImode assume values will usually live in core regs
8057 and only allow LDRD addressing modes. */
8058 if (!TARGET_LDRD || mode != DImode)
8059 return (code == CONST_INT
8060 && INTVAL (index) < 1024
8061 && INTVAL (index) > -1024
8062 && (INTVAL (index) & 3) == 0);
8065 /* For quad modes, we restrict the constant offset to be slightly less
8066 than what the instruction format permits. We do this because for
8067 quad mode moves, we will actually decompose them into two separate
8068 double-mode reads or writes. INDEX must therefore be a valid
8069 (double-mode) offset and so should INDEX+8. */
8070 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8071 return (code == CONST_INT
8072 && INTVAL (index) < 1016
8073 && INTVAL (index) > -1024
8074 && (INTVAL (index) & 3) == 0);
8076 /* We have no such constraint on double mode offsets, so we permit the
8077 full range of the instruction format. */
8078 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8079 return (code == CONST_INT
8080 && INTVAL (index) < 1024
8081 && INTVAL (index) > -1024
8082 && (INTVAL (index) & 3) == 0);
8084 if (arm_address_register_rtx_p (index, strict_p)
8085 && (GET_MODE_SIZE (mode) <= 4))
8086 return 1;
8088 if (mode == DImode || mode == DFmode)
8090 if (code == CONST_INT)
8092 HOST_WIDE_INT val = INTVAL (index);
8093 /* Thumb-2 ldrd only has reg+const addressing modes.
8094 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8095 If vldr is selected it uses arm_coproc_mem_operand. */
8096 if (TARGET_LDRD)
8097 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8098 else
8099 return IN_RANGE (val, -255, 4095 - 4);
8101 else
8102 return 0;
8105 if (code == MULT)
8107 rtx xiop0 = XEXP (index, 0);
8108 rtx xiop1 = XEXP (index, 1);
8110 return ((arm_address_register_rtx_p (xiop0, strict_p)
8111 && thumb2_index_mul_operand (xiop1))
8112 || (arm_address_register_rtx_p (xiop1, strict_p)
8113 && thumb2_index_mul_operand (xiop0)));
8115 else if (code == ASHIFT)
8117 rtx op = XEXP (index, 1);
8119 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8120 && CONST_INT_P (op)
8121 && INTVAL (op) > 0
8122 && INTVAL (op) <= 3);
8125 return (code == CONST_INT
8126 && INTVAL (index) < 4096
8127 && INTVAL (index) > -256);
8130 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8131 static int
8132 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8134 int regno;
8136 if (!REG_P (x))
8137 return 0;
8139 regno = REGNO (x);
8141 if (strict_p)
8142 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8144 return (regno <= LAST_LO_REGNUM
8145 || regno > LAST_VIRTUAL_REGISTER
8146 || regno == FRAME_POINTER_REGNUM
8147 || (GET_MODE_SIZE (mode) >= 4
8148 && (regno == STACK_POINTER_REGNUM
8149 || regno >= FIRST_PSEUDO_REGISTER
8150 || x == hard_frame_pointer_rtx
8151 || x == arg_pointer_rtx)));
8154 /* Return nonzero if x is a legitimate index register. This is the case
8155 for any base register that can access a QImode object. */
8156 inline static int
8157 thumb1_index_register_rtx_p (rtx x, int strict_p)
8159 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8162 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8164 The AP may be eliminated to either the SP or the FP, so we use the
8165 least common denominator, e.g. SImode, and offsets from 0 to 64.
8167 ??? Verify whether the above is the right approach.
8169 ??? Also, the FP may be eliminated to the SP, so perhaps that
8170 needs special handling also.
8172 ??? Look at how the mips16 port solves this problem. It probably uses
8173 better ways to solve some of these problems.
8175 Although it is not incorrect, we don't accept QImode and HImode
8176 addresses based on the frame pointer or arg pointer until the
8177 reload pass starts. This is so that eliminating such addresses
8178 into stack based ones won't produce impossible code. */
8180 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8182 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8183 return 0;
8185 /* ??? Not clear if this is right. Experiment. */
8186 if (GET_MODE_SIZE (mode) < 4
8187 && !(reload_in_progress || reload_completed)
8188 && (reg_mentioned_p (frame_pointer_rtx, x)
8189 || reg_mentioned_p (arg_pointer_rtx, x)
8190 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8191 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8192 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8193 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8194 return 0;
8196 /* Accept any base register. SP only in SImode or larger. */
8197 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8198 return 1;
8200 /* This is PC relative data before arm_reorg runs. */
8201 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8202 && GET_CODE (x) == SYMBOL_REF
8203 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8204 return 1;
8206 /* This is PC relative data after arm_reorg runs. */
8207 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8208 && reload_completed
8209 && (GET_CODE (x) == LABEL_REF
8210 || (GET_CODE (x) == CONST
8211 && GET_CODE (XEXP (x, 0)) == PLUS
8212 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8213 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8214 return 1;
8216 /* Post-inc indexing only supported for SImode and larger. */
8217 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8218 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8219 return 1;
8221 else if (GET_CODE (x) == PLUS)
8223 /* REG+REG address can be any two index registers. */
8224 /* We disallow FRAME+REG addressing since we know that FRAME
8225 will be replaced with STACK, and SP relative addressing only
8226 permits SP+OFFSET. */
8227 if (GET_MODE_SIZE (mode) <= 4
8228 && XEXP (x, 0) != frame_pointer_rtx
8229 && XEXP (x, 1) != frame_pointer_rtx
8230 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8231 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8232 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8233 return 1;
8235 /* REG+const has 5-7 bit offset for non-SP registers. */
8236 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8237 || XEXP (x, 0) == arg_pointer_rtx)
8238 && CONST_INT_P (XEXP (x, 1))
8239 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8240 return 1;
8242 /* REG+const has 10-bit offset for SP, but only SImode and
8243 larger is supported. */
8244 /* ??? Should probably check for DI/DFmode overflow here
8245 just like GO_IF_LEGITIMATE_OFFSET does. */
8246 else if (REG_P (XEXP (x, 0))
8247 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8248 && GET_MODE_SIZE (mode) >= 4
8249 && CONST_INT_P (XEXP (x, 1))
8250 && INTVAL (XEXP (x, 1)) >= 0
8251 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8252 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8253 return 1;
8255 else if (REG_P (XEXP (x, 0))
8256 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8257 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8258 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8259 && REGNO (XEXP (x, 0))
8260 <= LAST_VIRTUAL_POINTER_REGISTER))
8261 && GET_MODE_SIZE (mode) >= 4
8262 && CONST_INT_P (XEXP (x, 1))
8263 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8264 return 1;
8267 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8268 && GET_MODE_SIZE (mode) == 4
8269 && GET_CODE (x) == SYMBOL_REF
8270 && CONSTANT_POOL_ADDRESS_P (x)
8271 && ! (flag_pic
8272 && symbol_mentioned_p (get_pool_constant (x))
8273 && ! pcrel_constant_p (get_pool_constant (x))))
8274 return 1;
8276 return 0;
8279 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8280 instruction of mode MODE. */
8282 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8284 switch (GET_MODE_SIZE (mode))
8286 case 1:
8287 return val >= 0 && val < 32;
8289 case 2:
8290 return val >= 0 && val < 64 && (val & 1) == 0;
8292 default:
8293 return (val >= 0
8294 && (val + GET_MODE_SIZE (mode)) <= 128
8295 && (val & 3) == 0);
8299 bool
8300 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8302 if (TARGET_ARM)
8303 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8304 else if (TARGET_THUMB2)
8305 return thumb2_legitimate_address_p (mode, x, strict_p);
8306 else /* if (TARGET_THUMB1) */
8307 return thumb1_legitimate_address_p (mode, x, strict_p);
8310 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8312 Given an rtx X being reloaded into a reg required to be
8313 in class CLASS, return the class of reg to actually use.
8314 In general this is just CLASS, but for the Thumb core registers and
8315 immediate constants we prefer a LO_REGS class or a subset. */
8317 static reg_class_t
8318 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8320 if (TARGET_32BIT)
8321 return rclass;
8322 else
8324 if (rclass == GENERAL_REGS)
8325 return LO_REGS;
8326 else
8327 return rclass;
8331 /* Build the SYMBOL_REF for __tls_get_addr. */
8333 static GTY(()) rtx tls_get_addr_libfunc;
8335 static rtx
8336 get_tls_get_addr (void)
8338 if (!tls_get_addr_libfunc)
8339 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8340 return tls_get_addr_libfunc;
8344 arm_load_tp (rtx target)
8346 if (!target)
8347 target = gen_reg_rtx (SImode);
8349 if (TARGET_HARD_TP)
8351 /* Can return in any reg. */
8352 emit_insn (gen_load_tp_hard (target));
8354 else
8356 /* Always returned in r0. Immediately copy the result into a pseudo,
8357 otherwise other uses of r0 (e.g. setting up function arguments) may
8358 clobber the value. */
8360 rtx tmp;
8362 emit_insn (gen_load_tp_soft ());
8364 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8365 emit_move_insn (target, tmp);
8367 return target;
8370 static rtx
8371 load_tls_operand (rtx x, rtx reg)
8373 rtx tmp;
8375 if (reg == NULL_RTX)
8376 reg = gen_reg_rtx (SImode);
8378 tmp = gen_rtx_CONST (SImode, x);
8380 emit_move_insn (reg, tmp);
8382 return reg;
8385 static rtx_insn *
8386 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8388 rtx label, labelno, sum;
8390 gcc_assert (reloc != TLS_DESCSEQ);
8391 start_sequence ();
8393 labelno = GEN_INT (pic_labelno++);
8394 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8395 label = gen_rtx_CONST (VOIDmode, label);
8397 sum = gen_rtx_UNSPEC (Pmode,
8398 gen_rtvec (4, x, GEN_INT (reloc), label,
8399 GEN_INT (TARGET_ARM ? 8 : 4)),
8400 UNSPEC_TLS);
8401 reg = load_tls_operand (sum, reg);
8403 if (TARGET_ARM)
8404 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8405 else
8406 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8408 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8409 LCT_PURE, /* LCT_CONST? */
8410 Pmode, reg, Pmode);
8412 rtx_insn *insns = get_insns ();
8413 end_sequence ();
8415 return insns;
8418 static rtx
8419 arm_tls_descseq_addr (rtx x, rtx reg)
8421 rtx labelno = GEN_INT (pic_labelno++);
8422 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8423 rtx sum = gen_rtx_UNSPEC (Pmode,
8424 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8425 gen_rtx_CONST (VOIDmode, label),
8426 GEN_INT (!TARGET_ARM)),
8427 UNSPEC_TLS);
8428 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8430 emit_insn (gen_tlscall (x, labelno));
8431 if (!reg)
8432 reg = gen_reg_rtx (SImode);
8433 else
8434 gcc_assert (REGNO (reg) != R0_REGNUM);
8436 emit_move_insn (reg, reg0);
8438 return reg;
8442 legitimize_tls_address (rtx x, rtx reg)
8444 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8445 rtx_insn *insns;
8446 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8448 switch (model)
8450 case TLS_MODEL_GLOBAL_DYNAMIC:
8451 if (TARGET_GNU2_TLS)
8453 reg = arm_tls_descseq_addr (x, reg);
8455 tp = arm_load_tp (NULL_RTX);
8457 dest = gen_rtx_PLUS (Pmode, tp, reg);
8459 else
8461 /* Original scheme */
8462 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8463 dest = gen_reg_rtx (Pmode);
8464 emit_libcall_block (insns, dest, ret, x);
8466 return dest;
8468 case TLS_MODEL_LOCAL_DYNAMIC:
8469 if (TARGET_GNU2_TLS)
8471 reg = arm_tls_descseq_addr (x, reg);
8473 tp = arm_load_tp (NULL_RTX);
8475 dest = gen_rtx_PLUS (Pmode, tp, reg);
8477 else
8479 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8481 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8482 share the LDM result with other LD model accesses. */
8483 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8484 UNSPEC_TLS);
8485 dest = gen_reg_rtx (Pmode);
8486 emit_libcall_block (insns, dest, ret, eqv);
8488 /* Load the addend. */
8489 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8490 GEN_INT (TLS_LDO32)),
8491 UNSPEC_TLS);
8492 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8493 dest = gen_rtx_PLUS (Pmode, dest, addend);
8495 return dest;
8497 case TLS_MODEL_INITIAL_EXEC:
8498 labelno = GEN_INT (pic_labelno++);
8499 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8500 label = gen_rtx_CONST (VOIDmode, label);
8501 sum = gen_rtx_UNSPEC (Pmode,
8502 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8503 GEN_INT (TARGET_ARM ? 8 : 4)),
8504 UNSPEC_TLS);
8505 reg = load_tls_operand (sum, reg);
8507 if (TARGET_ARM)
8508 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8509 else if (TARGET_THUMB2)
8510 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8511 else
8513 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8514 emit_move_insn (reg, gen_const_mem (SImode, reg));
8517 tp = arm_load_tp (NULL_RTX);
8519 return gen_rtx_PLUS (Pmode, tp, reg);
8521 case TLS_MODEL_LOCAL_EXEC:
8522 tp = arm_load_tp (NULL_RTX);
8524 reg = gen_rtx_UNSPEC (Pmode,
8525 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8526 UNSPEC_TLS);
8527 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8529 return gen_rtx_PLUS (Pmode, tp, reg);
8531 default:
8532 abort ();
8536 /* Try machine-dependent ways of modifying an illegitimate address
8537 to be legitimate. If we find one, return the new, valid address. */
8539 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8541 if (arm_tls_referenced_p (x))
8543 rtx addend = NULL;
8545 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8547 addend = XEXP (XEXP (x, 0), 1);
8548 x = XEXP (XEXP (x, 0), 0);
8551 if (GET_CODE (x) != SYMBOL_REF)
8552 return x;
8554 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8556 x = legitimize_tls_address (x, NULL_RTX);
8558 if (addend)
8560 x = gen_rtx_PLUS (SImode, x, addend);
8561 orig_x = x;
8563 else
8564 return x;
8567 if (!TARGET_ARM)
8569 /* TODO: legitimize_address for Thumb2. */
8570 if (TARGET_THUMB2)
8571 return x;
8572 return thumb_legitimize_address (x, orig_x, mode);
8575 if (GET_CODE (x) == PLUS)
8577 rtx xop0 = XEXP (x, 0);
8578 rtx xop1 = XEXP (x, 1);
8580 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8581 xop0 = force_reg (SImode, xop0);
8583 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8584 && !symbol_mentioned_p (xop1))
8585 xop1 = force_reg (SImode, xop1);
8587 if (ARM_BASE_REGISTER_RTX_P (xop0)
8588 && CONST_INT_P (xop1))
8590 HOST_WIDE_INT n, low_n;
8591 rtx base_reg, val;
8592 n = INTVAL (xop1);
8594 /* VFP addressing modes actually allow greater offsets, but for
8595 now we just stick with the lowest common denominator. */
8596 if (mode == DImode || mode == DFmode)
8598 low_n = n & 0x0f;
8599 n &= ~0x0f;
8600 if (low_n > 4)
8602 n += 16;
8603 low_n -= 16;
8606 else
8608 low_n = ((mode) == TImode ? 0
8609 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8610 n -= low_n;
8613 base_reg = gen_reg_rtx (SImode);
8614 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8615 emit_move_insn (base_reg, val);
8616 x = plus_constant (Pmode, base_reg, low_n);
8618 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619 x = gen_rtx_PLUS (SImode, xop0, xop1);
8622 /* XXX We don't allow MINUS any more -- see comment in
8623 arm_legitimate_address_outer_p (). */
8624 else if (GET_CODE (x) == MINUS)
8626 rtx xop0 = XEXP (x, 0);
8627 rtx xop1 = XEXP (x, 1);
8629 if (CONSTANT_P (xop0))
8630 xop0 = force_reg (SImode, xop0);
8632 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8633 xop1 = force_reg (SImode, xop1);
8635 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8636 x = gen_rtx_MINUS (SImode, xop0, xop1);
8639 /* Make sure to take full advantage of the pre-indexed addressing mode
8640 with absolute addresses which often allows for the base register to
8641 be factorized for multiple adjacent memory references, and it might
8642 even allows for the mini pool to be avoided entirely. */
8643 else if (CONST_INT_P (x) && optimize > 0)
8645 unsigned int bits;
8646 HOST_WIDE_INT mask, base, index;
8647 rtx base_reg;
8649 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8650 use a 8-bit index. So let's use a 12-bit index for SImode only and
8651 hope that arm_gen_constant will enable ldrb to use more bits. */
8652 bits = (mode == SImode) ? 12 : 8;
8653 mask = (1 << bits) - 1;
8654 base = INTVAL (x) & ~mask;
8655 index = INTVAL (x) & mask;
8656 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8658 /* It'll most probably be more efficient to generate the base
8659 with more bits set and use a negative index instead. */
8660 base |= mask;
8661 index -= mask;
8663 base_reg = force_reg (SImode, GEN_INT (base));
8664 x = plus_constant (Pmode, base_reg, index);
8667 if (flag_pic)
8669 /* We need to find and carefully transform any SYMBOL and LABEL
8670 references; so go back to the original address expression. */
8671 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8673 if (new_x != orig_x)
8674 x = new_x;
8677 return x;
8681 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8682 to be legitimate. If we find one, return the new, valid address. */
8684 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8686 if (GET_CODE (x) == PLUS
8687 && CONST_INT_P (XEXP (x, 1))
8688 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8689 || INTVAL (XEXP (x, 1)) < 0))
8691 rtx xop0 = XEXP (x, 0);
8692 rtx xop1 = XEXP (x, 1);
8693 HOST_WIDE_INT offset = INTVAL (xop1);
8695 /* Try and fold the offset into a biasing of the base register and
8696 then offsetting that. Don't do this when optimizing for space
8697 since it can cause too many CSEs. */
8698 if (optimize_size && offset >= 0
8699 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8701 HOST_WIDE_INT delta;
8703 if (offset >= 256)
8704 delta = offset - (256 - GET_MODE_SIZE (mode));
8705 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8706 delta = 31 * GET_MODE_SIZE (mode);
8707 else
8708 delta = offset & (~31 * GET_MODE_SIZE (mode));
8710 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8711 NULL_RTX);
8712 x = plus_constant (Pmode, xop0, delta);
8714 else if (offset < 0 && offset > -256)
8715 /* Small negative offsets are best done with a subtract before the
8716 dereference, forcing these into a register normally takes two
8717 instructions. */
8718 x = force_operand (x, NULL_RTX);
8719 else
8721 /* For the remaining cases, force the constant into a register. */
8722 xop1 = force_reg (SImode, xop1);
8723 x = gen_rtx_PLUS (SImode, xop0, xop1);
8726 else if (GET_CODE (x) == PLUS
8727 && s_register_operand (XEXP (x, 1), SImode)
8728 && !s_register_operand (XEXP (x, 0), SImode))
8730 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8732 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8735 if (flag_pic)
8737 /* We need to find and carefully transform any SYMBOL and LABEL
8738 references; so go back to the original address expression. */
8739 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8741 if (new_x != orig_x)
8742 x = new_x;
8745 return x;
8748 /* Return TRUE if X contains any TLS symbol references. */
8750 bool
8751 arm_tls_referenced_p (rtx x)
8753 if (! TARGET_HAVE_TLS)
8754 return false;
8756 subrtx_iterator::array_type array;
8757 FOR_EACH_SUBRTX (iter, array, x, ALL)
8759 const_rtx x = *iter;
8760 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8762 /* ARM currently does not provide relocations to encode TLS variables
8763 into AArch32 instructions, only data, so there is no way to
8764 currently implement these if a literal pool is disabled. */
8765 if (arm_disable_literal_pool)
8766 sorry ("accessing thread-local storage is not currently supported "
8767 "with -mpure-code or -mslow-flash-data");
8769 return true;
8772 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8773 TLS offsets, not real symbol references. */
8774 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8775 iter.skip_subrtxes ();
8777 return false;
8780 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8782 On the ARM, allow any integer (invalid ones are removed later by insn
8783 patterns), nice doubles and symbol_refs which refer to the function's
8784 constant pool XXX.
8786 When generating pic allow anything. */
8788 static bool
8789 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8791 return flag_pic || !label_mentioned_p (x);
8794 static bool
8795 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8797 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8798 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8799 for ARMv8-M Baseline or later the result is valid. */
8800 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8801 x = XEXP (x, 0);
8803 return (CONST_INT_P (x)
8804 || CONST_DOUBLE_P (x)
8805 || CONSTANT_ADDRESS_P (x)
8806 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8807 || flag_pic);
8810 static bool
8811 arm_legitimate_constant_p (machine_mode mode, rtx x)
8813 return (!arm_cannot_force_const_mem (mode, x)
8814 && (TARGET_32BIT
8815 ? arm_legitimate_constant_p_1 (mode, x)
8816 : thumb_legitimate_constant_p (mode, x)));
8819 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8821 static bool
8822 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8824 rtx base, offset;
8826 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8828 split_const (x, &base, &offset);
8829 if (GET_CODE (base) == SYMBOL_REF
8830 && !offset_within_block_p (base, INTVAL (offset)))
8831 return true;
8833 return arm_tls_referenced_p (x);
8836 #define REG_OR_SUBREG_REG(X) \
8837 (REG_P (X) \
8838 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8840 #define REG_OR_SUBREG_RTX(X) \
8841 (REG_P (X) ? (X) : SUBREG_REG (X))
8843 static inline int
8844 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8846 machine_mode mode = GET_MODE (x);
8847 int total, words;
8849 switch (code)
8851 case ASHIFT:
8852 case ASHIFTRT:
8853 case LSHIFTRT:
8854 case ROTATERT:
8855 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8857 case PLUS:
8858 case MINUS:
8859 case COMPARE:
8860 case NEG:
8861 case NOT:
8862 return COSTS_N_INSNS (1);
8864 case MULT:
8865 if (arm_arch6m && arm_m_profile_small_mul)
8866 return COSTS_N_INSNS (32);
8868 if (CONST_INT_P (XEXP (x, 1)))
8870 int cycles = 0;
8871 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8873 while (i)
8875 i >>= 2;
8876 cycles++;
8878 return COSTS_N_INSNS (2) + cycles;
8880 return COSTS_N_INSNS (1) + 16;
8882 case SET:
8883 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8884 the mode. */
8885 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8886 return (COSTS_N_INSNS (words)
8887 + 4 * ((MEM_P (SET_SRC (x)))
8888 + MEM_P (SET_DEST (x))));
8890 case CONST_INT:
8891 if (outer == SET)
8893 if (UINTVAL (x) < 256
8894 /* 16-bit constant. */
8895 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8896 return 0;
8897 if (thumb_shiftable_const (INTVAL (x)))
8898 return COSTS_N_INSNS (2);
8899 return COSTS_N_INSNS (3);
8901 else if ((outer == PLUS || outer == COMPARE)
8902 && INTVAL (x) < 256 && INTVAL (x) > -256)
8903 return 0;
8904 else if ((outer == IOR || outer == XOR || outer == AND)
8905 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8906 return COSTS_N_INSNS (1);
8907 else if (outer == AND)
8909 int i;
8910 /* This duplicates the tests in the andsi3 expander. */
8911 for (i = 9; i <= 31; i++)
8912 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8913 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8914 return COSTS_N_INSNS (2);
8916 else if (outer == ASHIFT || outer == ASHIFTRT
8917 || outer == LSHIFTRT)
8918 return 0;
8919 return COSTS_N_INSNS (2);
8921 case CONST:
8922 case CONST_DOUBLE:
8923 case LABEL_REF:
8924 case SYMBOL_REF:
8925 return COSTS_N_INSNS (3);
8927 case UDIV:
8928 case UMOD:
8929 case DIV:
8930 case MOD:
8931 return 100;
8933 case TRUNCATE:
8934 return 99;
8936 case AND:
8937 case XOR:
8938 case IOR:
8939 /* XXX guess. */
8940 return 8;
8942 case MEM:
8943 /* XXX another guess. */
8944 /* Memory costs quite a lot for the first word, but subsequent words
8945 load at the equivalent of a single insn each. */
8946 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8947 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8948 ? 4 : 0));
8950 case IF_THEN_ELSE:
8951 /* XXX a guess. */
8952 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8953 return 14;
8954 return 2;
8956 case SIGN_EXTEND:
8957 case ZERO_EXTEND:
8958 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8959 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8961 if (mode == SImode)
8962 return total;
8964 if (arm_arch6)
8965 return total + COSTS_N_INSNS (1);
8967 /* Assume a two-shift sequence. Increase the cost slightly so
8968 we prefer actual shifts over an extend operation. */
8969 return total + 1 + COSTS_N_INSNS (2);
8971 default:
8972 return 99;
8976 /* Estimates the size cost of thumb1 instructions.
8977 For now most of the code is copied from thumb1_rtx_costs. We need more
8978 fine grain tuning when we have more related test cases. */
8979 static inline int
8980 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8982 machine_mode mode = GET_MODE (x);
8983 int words, cost;
8985 switch (code)
8987 case ASHIFT:
8988 case ASHIFTRT:
8989 case LSHIFTRT:
8990 case ROTATERT:
8991 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8993 case PLUS:
8994 case MINUS:
8995 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8996 defined by RTL expansion, especially for the expansion of
8997 multiplication. */
8998 if ((GET_CODE (XEXP (x, 0)) == MULT
8999 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9000 || (GET_CODE (XEXP (x, 1)) == MULT
9001 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9002 return COSTS_N_INSNS (2);
9003 /* Fall through. */
9004 case COMPARE:
9005 case NEG:
9006 case NOT:
9007 return COSTS_N_INSNS (1);
9009 case MULT:
9010 if (CONST_INT_P (XEXP (x, 1)))
9012 /* Thumb1 mul instruction can't operate on const. We must Load it
9013 into a register first. */
9014 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9015 /* For the targets which have a very small and high-latency multiply
9016 unit, we prefer to synthesize the mult with up to 5 instructions,
9017 giving a good balance between size and performance. */
9018 if (arm_arch6m && arm_m_profile_small_mul)
9019 return COSTS_N_INSNS (5);
9020 else
9021 return COSTS_N_INSNS (1) + const_size;
9023 return COSTS_N_INSNS (1);
9025 case SET:
9026 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9027 the mode. */
9028 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9029 cost = COSTS_N_INSNS (words);
9030 if (satisfies_constraint_J (SET_SRC (x))
9031 || satisfies_constraint_K (SET_SRC (x))
9032 /* Too big an immediate for a 2-byte mov, using MOVT. */
9033 || (CONST_INT_P (SET_SRC (x))
9034 && UINTVAL (SET_SRC (x)) >= 256
9035 && TARGET_HAVE_MOVT
9036 && satisfies_constraint_j (SET_SRC (x)))
9037 /* thumb1_movdi_insn. */
9038 || ((words > 1) && MEM_P (SET_SRC (x))))
9039 cost += COSTS_N_INSNS (1);
9040 return cost;
9042 case CONST_INT:
9043 if (outer == SET)
9045 if (UINTVAL (x) < 256)
9046 return COSTS_N_INSNS (1);
9047 /* movw is 4byte long. */
9048 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9049 return COSTS_N_INSNS (2);
9050 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9051 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9052 return COSTS_N_INSNS (2);
9053 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9054 if (thumb_shiftable_const (INTVAL (x)))
9055 return COSTS_N_INSNS (2);
9056 return COSTS_N_INSNS (3);
9058 else if ((outer == PLUS || outer == COMPARE)
9059 && INTVAL (x) < 256 && INTVAL (x) > -256)
9060 return 0;
9061 else if ((outer == IOR || outer == XOR || outer == AND)
9062 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9063 return COSTS_N_INSNS (1);
9064 else if (outer == AND)
9066 int i;
9067 /* This duplicates the tests in the andsi3 expander. */
9068 for (i = 9; i <= 31; i++)
9069 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9070 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9071 return COSTS_N_INSNS (2);
9073 else if (outer == ASHIFT || outer == ASHIFTRT
9074 || outer == LSHIFTRT)
9075 return 0;
9076 return COSTS_N_INSNS (2);
9078 case CONST:
9079 case CONST_DOUBLE:
9080 case LABEL_REF:
9081 case SYMBOL_REF:
9082 return COSTS_N_INSNS (3);
9084 case UDIV:
9085 case UMOD:
9086 case DIV:
9087 case MOD:
9088 return 100;
9090 case TRUNCATE:
9091 return 99;
9093 case AND:
9094 case XOR:
9095 case IOR:
9096 return COSTS_N_INSNS (1);
9098 case MEM:
9099 return (COSTS_N_INSNS (1)
9100 + COSTS_N_INSNS (1)
9101 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9102 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9103 ? COSTS_N_INSNS (1) : 0));
9105 case IF_THEN_ELSE:
9106 /* XXX a guess. */
9107 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9108 return 14;
9109 return 2;
9111 case ZERO_EXTEND:
9112 /* XXX still guessing. */
9113 switch (GET_MODE (XEXP (x, 0)))
9115 case E_QImode:
9116 return (1 + (mode == DImode ? 4 : 0)
9117 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9119 case E_HImode:
9120 return (4 + (mode == DImode ? 4 : 0)
9121 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9123 case E_SImode:
9124 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9126 default:
9127 return 99;
9130 default:
9131 return 99;
9135 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9136 operand, then return the operand that is being shifted. If the shift
9137 is not by a constant, then set SHIFT_REG to point to the operand.
9138 Return NULL if OP is not a shifter operand. */
9139 static rtx
9140 shifter_op_p (rtx op, rtx *shift_reg)
9142 enum rtx_code code = GET_CODE (op);
9144 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9145 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9146 return XEXP (op, 0);
9147 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9148 return XEXP (op, 0);
9149 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9150 || code == ASHIFTRT)
9152 if (!CONST_INT_P (XEXP (op, 1)))
9153 *shift_reg = XEXP (op, 1);
9154 return XEXP (op, 0);
9157 return NULL;
9160 static bool
9161 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9163 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9164 rtx_code code = GET_CODE (x);
9165 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9167 switch (XINT (x, 1))
9169 case UNSPEC_UNALIGNED_LOAD:
9170 /* We can only do unaligned loads into the integer unit, and we can't
9171 use LDM or LDRD. */
9172 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9173 if (speed_p)
9174 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9175 + extra_cost->ldst.load_unaligned);
9177 #ifdef NOT_YET
9178 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9179 ADDR_SPACE_GENERIC, speed_p);
9180 #endif
9181 return true;
9183 case UNSPEC_UNALIGNED_STORE:
9184 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9185 if (speed_p)
9186 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9187 + extra_cost->ldst.store_unaligned);
9189 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9190 #ifdef NOT_YET
9191 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9192 ADDR_SPACE_GENERIC, speed_p);
9193 #endif
9194 return true;
9196 case UNSPEC_VRINTZ:
9197 case UNSPEC_VRINTP:
9198 case UNSPEC_VRINTM:
9199 case UNSPEC_VRINTR:
9200 case UNSPEC_VRINTX:
9201 case UNSPEC_VRINTA:
9202 if (speed_p)
9203 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9205 return true;
9206 default:
9207 *cost = COSTS_N_INSNS (2);
9208 break;
9210 return true;
9213 /* Cost of a libcall. We assume one insn per argument, an amount for the
9214 call (one insn for -Os) and then one for processing the result. */
9215 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9217 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9218 do \
9220 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9221 if (shift_op != NULL \
9222 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9224 if (shift_reg) \
9226 if (speed_p) \
9227 *cost += extra_cost->alu.arith_shift_reg; \
9228 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9229 ASHIFT, 1, speed_p); \
9231 else if (speed_p) \
9232 *cost += extra_cost->alu.arith_shift; \
9234 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9235 ASHIFT, 0, speed_p) \
9236 + rtx_cost (XEXP (x, 1 - IDX), \
9237 GET_MODE (shift_op), \
9238 OP, 1, speed_p)); \
9239 return true; \
9242 while (0)
9244 /* RTX costs. Make an estimate of the cost of executing the operation
9245 X, which is contained with an operation with code OUTER_CODE.
9246 SPEED_P indicates whether the cost desired is the performance cost,
9247 or the size cost. The estimate is stored in COST and the return
9248 value is TRUE if the cost calculation is final, or FALSE if the
9249 caller should recurse through the operands of X to add additional
9250 costs.
9252 We currently make no attempt to model the size savings of Thumb-2
9253 16-bit instructions. At the normal points in compilation where
9254 this code is called we have no measure of whether the condition
9255 flags are live or not, and thus no realistic way to determine what
9256 the size will eventually be. */
9257 static bool
9258 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9259 const struct cpu_cost_table *extra_cost,
9260 int *cost, bool speed_p)
9262 machine_mode mode = GET_MODE (x);
9264 *cost = COSTS_N_INSNS (1);
9266 if (TARGET_THUMB1)
9268 if (speed_p)
9269 *cost = thumb1_rtx_costs (x, code, outer_code);
9270 else
9271 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9272 return true;
9275 switch (code)
9277 case SET:
9278 *cost = 0;
9279 /* SET RTXs don't have a mode so we get it from the destination. */
9280 mode = GET_MODE (SET_DEST (x));
9282 if (REG_P (SET_SRC (x))
9283 && REG_P (SET_DEST (x)))
9285 /* Assume that most copies can be done with a single insn,
9286 unless we don't have HW FP, in which case everything
9287 larger than word mode will require two insns. */
9288 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9289 && GET_MODE_SIZE (mode) > 4)
9290 || mode == DImode)
9291 ? 2 : 1);
9292 /* Conditional register moves can be encoded
9293 in 16 bits in Thumb mode. */
9294 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9295 *cost >>= 1;
9297 return true;
9300 if (CONST_INT_P (SET_SRC (x)))
9302 /* Handle CONST_INT here, since the value doesn't have a mode
9303 and we would otherwise be unable to work out the true cost. */
9304 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9305 0, speed_p);
9306 outer_code = SET;
9307 /* Slightly lower the cost of setting a core reg to a constant.
9308 This helps break up chains and allows for better scheduling. */
9309 if (REG_P (SET_DEST (x))
9310 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9311 *cost -= 1;
9312 x = SET_SRC (x);
9313 /* Immediate moves with an immediate in the range [0, 255] can be
9314 encoded in 16 bits in Thumb mode. */
9315 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9316 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9317 *cost >>= 1;
9318 goto const_int_cost;
9321 return false;
9323 case MEM:
9324 /* A memory access costs 1 insn if the mode is small, or the address is
9325 a single register, otherwise it costs one insn per word. */
9326 if (REG_P (XEXP (x, 0)))
9327 *cost = COSTS_N_INSNS (1);
9328 else if (flag_pic
9329 && GET_CODE (XEXP (x, 0)) == PLUS
9330 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9331 /* This will be split into two instructions.
9332 See arm.md:calculate_pic_address. */
9333 *cost = COSTS_N_INSNS (2);
9334 else
9335 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9337 /* For speed optimizations, add the costs of the address and
9338 accessing memory. */
9339 if (speed_p)
9340 #ifdef NOT_YET
9341 *cost += (extra_cost->ldst.load
9342 + arm_address_cost (XEXP (x, 0), mode,
9343 ADDR_SPACE_GENERIC, speed_p));
9344 #else
9345 *cost += extra_cost->ldst.load;
9346 #endif
9347 return true;
9349 case PARALLEL:
9351 /* Calculations of LDM costs are complex. We assume an initial cost
9352 (ldm_1st) which will load the number of registers mentioned in
9353 ldm_regs_per_insn_1st registers; then each additional
9354 ldm_regs_per_insn_subsequent registers cost one more insn. The
9355 formula for N regs is thus:
9357 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9358 + ldm_regs_per_insn_subsequent - 1)
9359 / ldm_regs_per_insn_subsequent).
9361 Additional costs may also be added for addressing. A similar
9362 formula is used for STM. */
9364 bool is_ldm = load_multiple_operation (x, SImode);
9365 bool is_stm = store_multiple_operation (x, SImode);
9367 if (is_ldm || is_stm)
9369 if (speed_p)
9371 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9372 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9373 ? extra_cost->ldst.ldm_regs_per_insn_1st
9374 : extra_cost->ldst.stm_regs_per_insn_1st;
9375 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9376 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9377 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9379 *cost += regs_per_insn_1st
9380 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9381 + regs_per_insn_sub - 1)
9382 / regs_per_insn_sub);
9383 return true;
9387 return false;
9389 case DIV:
9390 case UDIV:
9391 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9392 && (mode == SFmode || !TARGET_VFP_SINGLE))
9393 *cost += COSTS_N_INSNS (speed_p
9394 ? extra_cost->fp[mode != SFmode].div : 0);
9395 else if (mode == SImode && TARGET_IDIV)
9396 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9397 else
9398 *cost = LIBCALL_COST (2);
9400 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9401 possible udiv is prefered. */
9402 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9403 return false; /* All arguments must be in registers. */
9405 case MOD:
9406 /* MOD by a power of 2 can be expanded as:
9407 rsbs r1, r0, #0
9408 and r0, r0, #(n - 1)
9409 and r1, r1, #(n - 1)
9410 rsbpl r0, r1, #0. */
9411 if (CONST_INT_P (XEXP (x, 1))
9412 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9413 && mode == SImode)
9415 *cost += COSTS_N_INSNS (3);
9417 if (speed_p)
9418 *cost += 2 * extra_cost->alu.logical
9419 + extra_cost->alu.arith;
9420 return true;
9423 /* Fall-through. */
9424 case UMOD:
9425 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9426 possible udiv is prefered. */
9427 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9428 return false; /* All arguments must be in registers. */
9430 case ROTATE:
9431 if (mode == SImode && REG_P (XEXP (x, 1)))
9433 *cost += (COSTS_N_INSNS (1)
9434 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9435 if (speed_p)
9436 *cost += extra_cost->alu.shift_reg;
9437 return true;
9439 /* Fall through */
9440 case ROTATERT:
9441 case ASHIFT:
9442 case LSHIFTRT:
9443 case ASHIFTRT:
9444 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9446 *cost += (COSTS_N_INSNS (2)
9447 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9448 if (speed_p)
9449 *cost += 2 * extra_cost->alu.shift;
9450 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9451 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9452 *cost += 1;
9453 return true;
9455 else if (mode == SImode)
9457 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9458 /* Slightly disparage register shifts at -Os, but not by much. */
9459 if (!CONST_INT_P (XEXP (x, 1)))
9460 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9461 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9462 return true;
9464 else if (GET_MODE_CLASS (mode) == MODE_INT
9465 && GET_MODE_SIZE (mode) < 4)
9467 if (code == ASHIFT)
9469 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9470 /* Slightly disparage register shifts at -Os, but not by
9471 much. */
9472 if (!CONST_INT_P (XEXP (x, 1)))
9473 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9474 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9476 else if (code == LSHIFTRT || code == ASHIFTRT)
9478 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9480 /* Can use SBFX/UBFX. */
9481 if (speed_p)
9482 *cost += extra_cost->alu.bfx;
9483 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9485 else
9487 *cost += COSTS_N_INSNS (1);
9488 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9489 if (speed_p)
9491 if (CONST_INT_P (XEXP (x, 1)))
9492 *cost += 2 * extra_cost->alu.shift;
9493 else
9494 *cost += (extra_cost->alu.shift
9495 + extra_cost->alu.shift_reg);
9497 else
9498 /* Slightly disparage register shifts. */
9499 *cost += !CONST_INT_P (XEXP (x, 1));
9502 else /* Rotates. */
9504 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9505 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9506 if (speed_p)
9508 if (CONST_INT_P (XEXP (x, 1)))
9509 *cost += (2 * extra_cost->alu.shift
9510 + extra_cost->alu.log_shift);
9511 else
9512 *cost += (extra_cost->alu.shift
9513 + extra_cost->alu.shift_reg
9514 + extra_cost->alu.log_shift_reg);
9517 return true;
9520 *cost = LIBCALL_COST (2);
9521 return false;
9523 case BSWAP:
9524 if (arm_arch6)
9526 if (mode == SImode)
9528 if (speed_p)
9529 *cost += extra_cost->alu.rev;
9531 return false;
9534 else
9536 /* No rev instruction available. Look at arm_legacy_rev
9537 and thumb_legacy_rev for the form of RTL used then. */
9538 if (TARGET_THUMB)
9540 *cost += COSTS_N_INSNS (9);
9542 if (speed_p)
9544 *cost += 6 * extra_cost->alu.shift;
9545 *cost += 3 * extra_cost->alu.logical;
9548 else
9550 *cost += COSTS_N_INSNS (4);
9552 if (speed_p)
9554 *cost += 2 * extra_cost->alu.shift;
9555 *cost += extra_cost->alu.arith_shift;
9556 *cost += 2 * extra_cost->alu.logical;
9559 return true;
9561 return false;
9563 case MINUS:
9564 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9565 && (mode == SFmode || !TARGET_VFP_SINGLE))
9567 if (GET_CODE (XEXP (x, 0)) == MULT
9568 || GET_CODE (XEXP (x, 1)) == MULT)
9570 rtx mul_op0, mul_op1, sub_op;
9572 if (speed_p)
9573 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9575 if (GET_CODE (XEXP (x, 0)) == MULT)
9577 mul_op0 = XEXP (XEXP (x, 0), 0);
9578 mul_op1 = XEXP (XEXP (x, 0), 1);
9579 sub_op = XEXP (x, 1);
9581 else
9583 mul_op0 = XEXP (XEXP (x, 1), 0);
9584 mul_op1 = XEXP (XEXP (x, 1), 1);
9585 sub_op = XEXP (x, 0);
9588 /* The first operand of the multiply may be optionally
9589 negated. */
9590 if (GET_CODE (mul_op0) == NEG)
9591 mul_op0 = XEXP (mul_op0, 0);
9593 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9594 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9595 + rtx_cost (sub_op, mode, code, 0, speed_p));
9597 return true;
9600 if (speed_p)
9601 *cost += extra_cost->fp[mode != SFmode].addsub;
9602 return false;
9605 if (mode == SImode)
9607 rtx shift_by_reg = NULL;
9608 rtx shift_op;
9609 rtx non_shift_op;
9611 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9612 if (shift_op == NULL)
9614 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9615 non_shift_op = XEXP (x, 0);
9617 else
9618 non_shift_op = XEXP (x, 1);
9620 if (shift_op != NULL)
9622 if (shift_by_reg != NULL)
9624 if (speed_p)
9625 *cost += extra_cost->alu.arith_shift_reg;
9626 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9628 else if (speed_p)
9629 *cost += extra_cost->alu.arith_shift;
9631 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9632 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9633 return true;
9636 if (arm_arch_thumb2
9637 && GET_CODE (XEXP (x, 1)) == MULT)
9639 /* MLS. */
9640 if (speed_p)
9641 *cost += extra_cost->mult[0].add;
9642 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9643 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9644 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9645 return true;
9648 if (CONST_INT_P (XEXP (x, 0)))
9650 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9651 INTVAL (XEXP (x, 0)), NULL_RTX,
9652 NULL_RTX, 1, 0);
9653 *cost = COSTS_N_INSNS (insns);
9654 if (speed_p)
9655 *cost += insns * extra_cost->alu.arith;
9656 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9657 return true;
9659 else if (speed_p)
9660 *cost += extra_cost->alu.arith;
9662 return false;
9665 if (GET_MODE_CLASS (mode) == MODE_INT
9666 && GET_MODE_SIZE (mode) < 4)
9668 rtx shift_op, shift_reg;
9669 shift_reg = NULL;
9671 /* We check both sides of the MINUS for shifter operands since,
9672 unlike PLUS, it's not commutative. */
9674 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9675 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9677 /* Slightly disparage, as we might need to widen the result. */
9678 *cost += 1;
9679 if (speed_p)
9680 *cost += extra_cost->alu.arith;
9682 if (CONST_INT_P (XEXP (x, 0)))
9684 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9685 return true;
9688 return false;
9691 if (mode == DImode)
9693 *cost += COSTS_N_INSNS (1);
9695 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9697 rtx op1 = XEXP (x, 1);
9699 if (speed_p)
9700 *cost += 2 * extra_cost->alu.arith;
9702 if (GET_CODE (op1) == ZERO_EXTEND)
9703 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9704 0, speed_p);
9705 else
9706 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9707 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9708 0, speed_p);
9709 return true;
9711 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9713 if (speed_p)
9714 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9715 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9716 0, speed_p)
9717 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9718 return true;
9720 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9721 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9723 if (speed_p)
9724 *cost += (extra_cost->alu.arith
9725 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9726 ? extra_cost->alu.arith
9727 : extra_cost->alu.arith_shift));
9728 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9729 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9730 GET_CODE (XEXP (x, 1)), 0, speed_p));
9731 return true;
9734 if (speed_p)
9735 *cost += 2 * extra_cost->alu.arith;
9736 return false;
9739 /* Vector mode? */
9741 *cost = LIBCALL_COST (2);
9742 return false;
9744 case PLUS:
9745 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9746 && (mode == SFmode || !TARGET_VFP_SINGLE))
9748 if (GET_CODE (XEXP (x, 0)) == MULT)
9750 rtx mul_op0, mul_op1, add_op;
9752 if (speed_p)
9753 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9755 mul_op0 = XEXP (XEXP (x, 0), 0);
9756 mul_op1 = XEXP (XEXP (x, 0), 1);
9757 add_op = XEXP (x, 1);
9759 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9760 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9761 + rtx_cost (add_op, mode, code, 0, speed_p));
9763 return true;
9766 if (speed_p)
9767 *cost += extra_cost->fp[mode != SFmode].addsub;
9768 return false;
9770 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9772 *cost = LIBCALL_COST (2);
9773 return false;
9776 /* Narrow modes can be synthesized in SImode, but the range
9777 of useful sub-operations is limited. Check for shift operations
9778 on one of the operands. Only left shifts can be used in the
9779 narrow modes. */
9780 if (GET_MODE_CLASS (mode) == MODE_INT
9781 && GET_MODE_SIZE (mode) < 4)
9783 rtx shift_op, shift_reg;
9784 shift_reg = NULL;
9786 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9788 if (CONST_INT_P (XEXP (x, 1)))
9790 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9791 INTVAL (XEXP (x, 1)), NULL_RTX,
9792 NULL_RTX, 1, 0);
9793 *cost = COSTS_N_INSNS (insns);
9794 if (speed_p)
9795 *cost += insns * extra_cost->alu.arith;
9796 /* Slightly penalize a narrow operation as the result may
9797 need widening. */
9798 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9799 return true;
9802 /* Slightly penalize a narrow operation as the result may
9803 need widening. */
9804 *cost += 1;
9805 if (speed_p)
9806 *cost += extra_cost->alu.arith;
9808 return false;
9811 if (mode == SImode)
9813 rtx shift_op, shift_reg;
9815 if (TARGET_INT_SIMD
9816 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9817 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9819 /* UXTA[BH] or SXTA[BH]. */
9820 if (speed_p)
9821 *cost += extra_cost->alu.extend_arith;
9822 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9823 0, speed_p)
9824 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9825 return true;
9828 shift_reg = NULL;
9829 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9830 if (shift_op != NULL)
9832 if (shift_reg)
9834 if (speed_p)
9835 *cost += extra_cost->alu.arith_shift_reg;
9836 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9838 else if (speed_p)
9839 *cost += extra_cost->alu.arith_shift;
9841 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9842 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9843 return true;
9845 if (GET_CODE (XEXP (x, 0)) == MULT)
9847 rtx mul_op = XEXP (x, 0);
9849 if (TARGET_DSP_MULTIPLY
9850 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9851 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9852 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9853 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9854 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9855 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9856 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9857 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9858 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9859 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9860 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9861 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9862 == 16))))))
9864 /* SMLA[BT][BT]. */
9865 if (speed_p)
9866 *cost += extra_cost->mult[0].extend_add;
9867 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9868 SIGN_EXTEND, 0, speed_p)
9869 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9870 SIGN_EXTEND, 0, speed_p)
9871 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9872 return true;
9875 if (speed_p)
9876 *cost += extra_cost->mult[0].add;
9877 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9878 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9879 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9880 return true;
9882 if (CONST_INT_P (XEXP (x, 1)))
9884 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9885 INTVAL (XEXP (x, 1)), NULL_RTX,
9886 NULL_RTX, 1, 0);
9887 *cost = COSTS_N_INSNS (insns);
9888 if (speed_p)
9889 *cost += insns * extra_cost->alu.arith;
9890 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9891 return true;
9893 else if (speed_p)
9894 *cost += extra_cost->alu.arith;
9896 return false;
9899 if (mode == DImode)
9901 if (arm_arch3m
9902 && GET_CODE (XEXP (x, 0)) == MULT
9903 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9904 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9905 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9906 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9908 if (speed_p)
9909 *cost += extra_cost->mult[1].extend_add;
9910 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9911 ZERO_EXTEND, 0, speed_p)
9912 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9913 ZERO_EXTEND, 0, speed_p)
9914 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9915 return true;
9918 *cost += COSTS_N_INSNS (1);
9920 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9921 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9923 if (speed_p)
9924 *cost += (extra_cost->alu.arith
9925 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9926 ? extra_cost->alu.arith
9927 : extra_cost->alu.arith_shift));
9929 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9930 0, speed_p)
9931 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9932 return true;
9935 if (speed_p)
9936 *cost += 2 * extra_cost->alu.arith;
9937 return false;
9940 /* Vector mode? */
9941 *cost = LIBCALL_COST (2);
9942 return false;
9943 case IOR:
9944 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9946 if (speed_p)
9947 *cost += extra_cost->alu.rev;
9949 return true;
9951 /* Fall through. */
9952 case AND: case XOR:
9953 if (mode == SImode)
9955 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9956 rtx op0 = XEXP (x, 0);
9957 rtx shift_op, shift_reg;
9959 if (subcode == NOT
9960 && (code == AND
9961 || (code == IOR && TARGET_THUMB2)))
9962 op0 = XEXP (op0, 0);
9964 shift_reg = NULL;
9965 shift_op = shifter_op_p (op0, &shift_reg);
9966 if (shift_op != NULL)
9968 if (shift_reg)
9970 if (speed_p)
9971 *cost += extra_cost->alu.log_shift_reg;
9972 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9974 else if (speed_p)
9975 *cost += extra_cost->alu.log_shift;
9977 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9978 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9979 return true;
9982 if (CONST_INT_P (XEXP (x, 1)))
9984 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9985 INTVAL (XEXP (x, 1)), NULL_RTX,
9986 NULL_RTX, 1, 0);
9988 *cost = COSTS_N_INSNS (insns);
9989 if (speed_p)
9990 *cost += insns * extra_cost->alu.logical;
9991 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9992 return true;
9995 if (speed_p)
9996 *cost += extra_cost->alu.logical;
9997 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9998 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9999 return true;
10002 if (mode == DImode)
10004 rtx op0 = XEXP (x, 0);
10005 enum rtx_code subcode = GET_CODE (op0);
10007 *cost += COSTS_N_INSNS (1);
10009 if (subcode == NOT
10010 && (code == AND
10011 || (code == IOR && TARGET_THUMB2)))
10012 op0 = XEXP (op0, 0);
10014 if (GET_CODE (op0) == ZERO_EXTEND)
10016 if (speed_p)
10017 *cost += 2 * extra_cost->alu.logical;
10019 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10020 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10022 return true;
10024 else if (GET_CODE (op0) == SIGN_EXTEND)
10026 if (speed_p)
10027 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10029 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10030 0, speed_p)
10031 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10032 return true;
10035 if (speed_p)
10036 *cost += 2 * extra_cost->alu.logical;
10038 return true;
10040 /* Vector mode? */
10042 *cost = LIBCALL_COST (2);
10043 return false;
10045 case MULT:
10046 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10047 && (mode == SFmode || !TARGET_VFP_SINGLE))
10049 rtx op0 = XEXP (x, 0);
10051 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10052 op0 = XEXP (op0, 0);
10054 if (speed_p)
10055 *cost += extra_cost->fp[mode != SFmode].mult;
10057 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10058 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10059 return true;
10061 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10063 *cost = LIBCALL_COST (2);
10064 return false;
10067 if (mode == SImode)
10069 if (TARGET_DSP_MULTIPLY
10070 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10071 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10072 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10073 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10074 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10075 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10076 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10077 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10078 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10079 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10080 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10081 && (INTVAL (XEXP (XEXP (x, 1), 1))
10082 == 16))))))
10084 /* SMUL[TB][TB]. */
10085 if (speed_p)
10086 *cost += extra_cost->mult[0].extend;
10087 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10088 SIGN_EXTEND, 0, speed_p);
10089 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10090 SIGN_EXTEND, 1, speed_p);
10091 return true;
10093 if (speed_p)
10094 *cost += extra_cost->mult[0].simple;
10095 return false;
10098 if (mode == DImode)
10100 if (arm_arch3m
10101 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10102 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10103 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10104 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10106 if (speed_p)
10107 *cost += extra_cost->mult[1].extend;
10108 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10109 ZERO_EXTEND, 0, speed_p)
10110 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10111 ZERO_EXTEND, 0, speed_p));
10112 return true;
10115 *cost = LIBCALL_COST (2);
10116 return false;
10119 /* Vector mode? */
10120 *cost = LIBCALL_COST (2);
10121 return false;
10123 case NEG:
10124 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10125 && (mode == SFmode || !TARGET_VFP_SINGLE))
10127 if (GET_CODE (XEXP (x, 0)) == MULT)
10129 /* VNMUL. */
10130 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10131 return true;
10134 if (speed_p)
10135 *cost += extra_cost->fp[mode != SFmode].neg;
10137 return false;
10139 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10141 *cost = LIBCALL_COST (1);
10142 return false;
10145 if (mode == SImode)
10147 if (GET_CODE (XEXP (x, 0)) == ABS)
10149 *cost += COSTS_N_INSNS (1);
10150 /* Assume the non-flag-changing variant. */
10151 if (speed_p)
10152 *cost += (extra_cost->alu.log_shift
10153 + extra_cost->alu.arith_shift);
10154 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10155 return true;
10158 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10159 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10161 *cost += COSTS_N_INSNS (1);
10162 /* No extra cost for MOV imm and MVN imm. */
10163 /* If the comparison op is using the flags, there's no further
10164 cost, otherwise we need to add the cost of the comparison. */
10165 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10166 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10167 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10169 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10170 *cost += (COSTS_N_INSNS (1)
10171 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10172 0, speed_p)
10173 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10174 1, speed_p));
10175 if (speed_p)
10176 *cost += extra_cost->alu.arith;
10178 return true;
10181 if (speed_p)
10182 *cost += extra_cost->alu.arith;
10183 return false;
10186 if (GET_MODE_CLASS (mode) == MODE_INT
10187 && GET_MODE_SIZE (mode) < 4)
10189 /* Slightly disparage, as we might need an extend operation. */
10190 *cost += 1;
10191 if (speed_p)
10192 *cost += extra_cost->alu.arith;
10193 return false;
10196 if (mode == DImode)
10198 *cost += COSTS_N_INSNS (1);
10199 if (speed_p)
10200 *cost += 2 * extra_cost->alu.arith;
10201 return false;
10204 /* Vector mode? */
10205 *cost = LIBCALL_COST (1);
10206 return false;
10208 case NOT:
10209 if (mode == SImode)
10211 rtx shift_op;
10212 rtx shift_reg = NULL;
10214 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10216 if (shift_op)
10218 if (shift_reg != NULL)
10220 if (speed_p)
10221 *cost += extra_cost->alu.log_shift_reg;
10222 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10224 else if (speed_p)
10225 *cost += extra_cost->alu.log_shift;
10226 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10227 return true;
10230 if (speed_p)
10231 *cost += extra_cost->alu.logical;
10232 return false;
10234 if (mode == DImode)
10236 *cost += COSTS_N_INSNS (1);
10237 return false;
10240 /* Vector mode? */
10242 *cost += LIBCALL_COST (1);
10243 return false;
10245 case IF_THEN_ELSE:
10247 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10249 *cost += COSTS_N_INSNS (3);
10250 return true;
10252 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10253 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10255 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10256 /* Assume that if one arm of the if_then_else is a register,
10257 that it will be tied with the result and eliminate the
10258 conditional insn. */
10259 if (REG_P (XEXP (x, 1)))
10260 *cost += op2cost;
10261 else if (REG_P (XEXP (x, 2)))
10262 *cost += op1cost;
10263 else
10265 if (speed_p)
10267 if (extra_cost->alu.non_exec_costs_exec)
10268 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10269 else
10270 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10272 else
10273 *cost += op1cost + op2cost;
10276 return true;
10278 case COMPARE:
10279 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10280 *cost = 0;
10281 else
10283 machine_mode op0mode;
10284 /* We'll mostly assume that the cost of a compare is the cost of the
10285 LHS. However, there are some notable exceptions. */
10287 /* Floating point compares are never done as side-effects. */
10288 op0mode = GET_MODE (XEXP (x, 0));
10289 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10290 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10292 if (speed_p)
10293 *cost += extra_cost->fp[op0mode != SFmode].compare;
10295 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10297 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10298 return true;
10301 return false;
10303 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10305 *cost = LIBCALL_COST (2);
10306 return false;
10309 /* DImode compares normally take two insns. */
10310 if (op0mode == DImode)
10312 *cost += COSTS_N_INSNS (1);
10313 if (speed_p)
10314 *cost += 2 * extra_cost->alu.arith;
10315 return false;
10318 if (op0mode == SImode)
10320 rtx shift_op;
10321 rtx shift_reg;
10323 if (XEXP (x, 1) == const0_rtx
10324 && !(REG_P (XEXP (x, 0))
10325 || (GET_CODE (XEXP (x, 0)) == SUBREG
10326 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10328 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10330 /* Multiply operations that set the flags are often
10331 significantly more expensive. */
10332 if (speed_p
10333 && GET_CODE (XEXP (x, 0)) == MULT
10334 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10335 *cost += extra_cost->mult[0].flag_setting;
10337 if (speed_p
10338 && GET_CODE (XEXP (x, 0)) == PLUS
10339 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10340 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10341 0), 1), mode))
10342 *cost += extra_cost->mult[0].flag_setting;
10343 return true;
10346 shift_reg = NULL;
10347 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10348 if (shift_op != NULL)
10350 if (shift_reg != NULL)
10352 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10353 1, speed_p);
10354 if (speed_p)
10355 *cost += extra_cost->alu.arith_shift_reg;
10357 else if (speed_p)
10358 *cost += extra_cost->alu.arith_shift;
10359 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10360 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10361 return true;
10364 if (speed_p)
10365 *cost += extra_cost->alu.arith;
10366 if (CONST_INT_P (XEXP (x, 1))
10367 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10369 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10370 return true;
10372 return false;
10375 /* Vector mode? */
10377 *cost = LIBCALL_COST (2);
10378 return false;
10380 return true;
10382 case EQ:
10383 case NE:
10384 case LT:
10385 case LE:
10386 case GT:
10387 case GE:
10388 case LTU:
10389 case LEU:
10390 case GEU:
10391 case GTU:
10392 case ORDERED:
10393 case UNORDERED:
10394 case UNEQ:
10395 case UNLE:
10396 case UNLT:
10397 case UNGE:
10398 case UNGT:
10399 case LTGT:
10400 if (outer_code == SET)
10402 /* Is it a store-flag operation? */
10403 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10404 && XEXP (x, 1) == const0_rtx)
10406 /* Thumb also needs an IT insn. */
10407 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10408 return true;
10410 if (XEXP (x, 1) == const0_rtx)
10412 switch (code)
10414 case LT:
10415 /* LSR Rd, Rn, #31. */
10416 if (speed_p)
10417 *cost += extra_cost->alu.shift;
10418 break;
10420 case EQ:
10421 /* RSBS T1, Rn, #0
10422 ADC Rd, Rn, T1. */
10424 case NE:
10425 /* SUBS T1, Rn, #1
10426 SBC Rd, Rn, T1. */
10427 *cost += COSTS_N_INSNS (1);
10428 break;
10430 case LE:
10431 /* RSBS T1, Rn, Rn, LSR #31
10432 ADC Rd, Rn, T1. */
10433 *cost += COSTS_N_INSNS (1);
10434 if (speed_p)
10435 *cost += extra_cost->alu.arith_shift;
10436 break;
10438 case GT:
10439 /* RSB Rd, Rn, Rn, ASR #1
10440 LSR Rd, Rd, #31. */
10441 *cost += COSTS_N_INSNS (1);
10442 if (speed_p)
10443 *cost += (extra_cost->alu.arith_shift
10444 + extra_cost->alu.shift);
10445 break;
10447 case GE:
10448 /* ASR Rd, Rn, #31
10449 ADD Rd, Rn, #1. */
10450 *cost += COSTS_N_INSNS (1);
10451 if (speed_p)
10452 *cost += extra_cost->alu.shift;
10453 break;
10455 default:
10456 /* Remaining cases are either meaningless or would take
10457 three insns anyway. */
10458 *cost = COSTS_N_INSNS (3);
10459 break;
10461 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10462 return true;
10464 else
10466 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10467 if (CONST_INT_P (XEXP (x, 1))
10468 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10470 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10471 return true;
10474 return false;
10477 /* Not directly inside a set. If it involves the condition code
10478 register it must be the condition for a branch, cond_exec or
10479 I_T_E operation. Since the comparison is performed elsewhere
10480 this is just the control part which has no additional
10481 cost. */
10482 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10483 && XEXP (x, 1) == const0_rtx)
10485 *cost = 0;
10486 return true;
10488 return false;
10490 case ABS:
10491 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10492 && (mode == SFmode || !TARGET_VFP_SINGLE))
10494 if (speed_p)
10495 *cost += extra_cost->fp[mode != SFmode].neg;
10497 return false;
10499 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10501 *cost = LIBCALL_COST (1);
10502 return false;
10505 if (mode == SImode)
10507 if (speed_p)
10508 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10509 return false;
10511 /* Vector mode? */
10512 *cost = LIBCALL_COST (1);
10513 return false;
10515 case SIGN_EXTEND:
10516 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10517 && MEM_P (XEXP (x, 0)))
10519 if (mode == DImode)
10520 *cost += COSTS_N_INSNS (1);
10522 if (!speed_p)
10523 return true;
10525 if (GET_MODE (XEXP (x, 0)) == SImode)
10526 *cost += extra_cost->ldst.load;
10527 else
10528 *cost += extra_cost->ldst.load_sign_extend;
10530 if (mode == DImode)
10531 *cost += extra_cost->alu.shift;
10533 return true;
10536 /* Widening from less than 32-bits requires an extend operation. */
10537 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10539 /* We have SXTB/SXTH. */
10540 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10541 if (speed_p)
10542 *cost += extra_cost->alu.extend;
10544 else if (GET_MODE (XEXP (x, 0)) != SImode)
10546 /* Needs two shifts. */
10547 *cost += COSTS_N_INSNS (1);
10548 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10549 if (speed_p)
10550 *cost += 2 * extra_cost->alu.shift;
10553 /* Widening beyond 32-bits requires one more insn. */
10554 if (mode == DImode)
10556 *cost += COSTS_N_INSNS (1);
10557 if (speed_p)
10558 *cost += extra_cost->alu.shift;
10561 return true;
10563 case ZERO_EXTEND:
10564 if ((arm_arch4
10565 || GET_MODE (XEXP (x, 0)) == SImode
10566 || GET_MODE (XEXP (x, 0)) == QImode)
10567 && MEM_P (XEXP (x, 0)))
10569 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10571 if (mode == DImode)
10572 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10574 return true;
10577 /* Widening from less than 32-bits requires an extend operation. */
10578 if (GET_MODE (XEXP (x, 0)) == QImode)
10580 /* UXTB can be a shorter instruction in Thumb2, but it might
10581 be slower than the AND Rd, Rn, #255 alternative. When
10582 optimizing for speed it should never be slower to use
10583 AND, and we don't really model 16-bit vs 32-bit insns
10584 here. */
10585 if (speed_p)
10586 *cost += extra_cost->alu.logical;
10588 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10590 /* We have UXTB/UXTH. */
10591 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10592 if (speed_p)
10593 *cost += extra_cost->alu.extend;
10595 else if (GET_MODE (XEXP (x, 0)) != SImode)
10597 /* Needs two shifts. It's marginally preferable to use
10598 shifts rather than two BIC instructions as the second
10599 shift may merge with a subsequent insn as a shifter
10600 op. */
10601 *cost = COSTS_N_INSNS (2);
10602 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10603 if (speed_p)
10604 *cost += 2 * extra_cost->alu.shift;
10607 /* Widening beyond 32-bits requires one more insn. */
10608 if (mode == DImode)
10610 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10613 return true;
10615 case CONST_INT:
10616 *cost = 0;
10617 /* CONST_INT has no mode, so we cannot tell for sure how many
10618 insns are really going to be needed. The best we can do is
10619 look at the value passed. If it fits in SImode, then assume
10620 that's the mode it will be used for. Otherwise assume it
10621 will be used in DImode. */
10622 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10623 mode = SImode;
10624 else
10625 mode = DImode;
10627 /* Avoid blowing up in arm_gen_constant (). */
10628 if (!(outer_code == PLUS
10629 || outer_code == AND
10630 || outer_code == IOR
10631 || outer_code == XOR
10632 || outer_code == MINUS))
10633 outer_code = SET;
10635 const_int_cost:
10636 if (mode == SImode)
10638 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10639 INTVAL (x), NULL, NULL,
10640 0, 0));
10641 /* Extra costs? */
10643 else
10645 *cost += COSTS_N_INSNS (arm_gen_constant
10646 (outer_code, SImode, NULL,
10647 trunc_int_for_mode (INTVAL (x), SImode),
10648 NULL, NULL, 0, 0)
10649 + arm_gen_constant (outer_code, SImode, NULL,
10650 INTVAL (x) >> 32, NULL,
10651 NULL, 0, 0));
10652 /* Extra costs? */
10655 return true;
10657 case CONST:
10658 case LABEL_REF:
10659 case SYMBOL_REF:
10660 if (speed_p)
10662 if (arm_arch_thumb2 && !flag_pic)
10663 *cost += COSTS_N_INSNS (1);
10664 else
10665 *cost += extra_cost->ldst.load;
10667 else
10668 *cost += COSTS_N_INSNS (1);
10670 if (flag_pic)
10672 *cost += COSTS_N_INSNS (1);
10673 if (speed_p)
10674 *cost += extra_cost->alu.arith;
10677 return true;
10679 case CONST_FIXED:
10680 *cost = COSTS_N_INSNS (4);
10681 /* Fixme. */
10682 return true;
10684 case CONST_DOUBLE:
10685 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10686 && (mode == SFmode || !TARGET_VFP_SINGLE))
10688 if (vfp3_const_double_rtx (x))
10690 if (speed_p)
10691 *cost += extra_cost->fp[mode == DFmode].fpconst;
10692 return true;
10695 if (speed_p)
10697 if (mode == DFmode)
10698 *cost += extra_cost->ldst.loadd;
10699 else
10700 *cost += extra_cost->ldst.loadf;
10702 else
10703 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10705 return true;
10707 *cost = COSTS_N_INSNS (4);
10708 return true;
10710 case CONST_VECTOR:
10711 /* Fixme. */
10712 if (TARGET_NEON
10713 && TARGET_HARD_FLOAT
10714 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10715 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10716 *cost = COSTS_N_INSNS (1);
10717 else
10718 *cost = COSTS_N_INSNS (4);
10719 return true;
10721 case HIGH:
10722 case LO_SUM:
10723 /* When optimizing for size, we prefer constant pool entries to
10724 MOVW/MOVT pairs, so bump the cost of these slightly. */
10725 if (!speed_p)
10726 *cost += 1;
10727 return true;
10729 case CLZ:
10730 if (speed_p)
10731 *cost += extra_cost->alu.clz;
10732 return false;
10734 case SMIN:
10735 if (XEXP (x, 1) == const0_rtx)
10737 if (speed_p)
10738 *cost += extra_cost->alu.log_shift;
10739 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10740 return true;
10742 /* Fall through. */
10743 case SMAX:
10744 case UMIN:
10745 case UMAX:
10746 *cost += COSTS_N_INSNS (1);
10747 return false;
10749 case TRUNCATE:
10750 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10751 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10752 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10753 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10754 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10755 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10756 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10757 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10758 == ZERO_EXTEND))))
10760 if (speed_p)
10761 *cost += extra_cost->mult[1].extend;
10762 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10763 ZERO_EXTEND, 0, speed_p)
10764 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10765 ZERO_EXTEND, 0, speed_p));
10766 return true;
10768 *cost = LIBCALL_COST (1);
10769 return false;
10771 case UNSPEC_VOLATILE:
10772 case UNSPEC:
10773 return arm_unspec_cost (x, outer_code, speed_p, cost);
10775 case PC:
10776 /* Reading the PC is like reading any other register. Writing it
10777 is more expensive, but we take that into account elsewhere. */
10778 *cost = 0;
10779 return true;
10781 case ZERO_EXTRACT:
10782 /* TODO: Simple zero_extract of bottom bits using AND. */
10783 /* Fall through. */
10784 case SIGN_EXTRACT:
10785 if (arm_arch6
10786 && mode == SImode
10787 && CONST_INT_P (XEXP (x, 1))
10788 && CONST_INT_P (XEXP (x, 2)))
10790 if (speed_p)
10791 *cost += extra_cost->alu.bfx;
10792 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10793 return true;
10795 /* Without UBFX/SBFX, need to resort to shift operations. */
10796 *cost += COSTS_N_INSNS (1);
10797 if (speed_p)
10798 *cost += 2 * extra_cost->alu.shift;
10799 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10800 return true;
10802 case FLOAT_EXTEND:
10803 if (TARGET_HARD_FLOAT)
10805 if (speed_p)
10806 *cost += extra_cost->fp[mode == DFmode].widen;
10807 if (!TARGET_VFP5
10808 && GET_MODE (XEXP (x, 0)) == HFmode)
10810 /* Pre v8, widening HF->DF is a two-step process, first
10811 widening to SFmode. */
10812 *cost += COSTS_N_INSNS (1);
10813 if (speed_p)
10814 *cost += extra_cost->fp[0].widen;
10816 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10817 return true;
10820 *cost = LIBCALL_COST (1);
10821 return false;
10823 case FLOAT_TRUNCATE:
10824 if (TARGET_HARD_FLOAT)
10826 if (speed_p)
10827 *cost += extra_cost->fp[mode == DFmode].narrow;
10828 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10829 return true;
10830 /* Vector modes? */
10832 *cost = LIBCALL_COST (1);
10833 return false;
10835 case FMA:
10836 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10838 rtx op0 = XEXP (x, 0);
10839 rtx op1 = XEXP (x, 1);
10840 rtx op2 = XEXP (x, 2);
10843 /* vfms or vfnma. */
10844 if (GET_CODE (op0) == NEG)
10845 op0 = XEXP (op0, 0);
10847 /* vfnms or vfnma. */
10848 if (GET_CODE (op2) == NEG)
10849 op2 = XEXP (op2, 0);
10851 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10852 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10853 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10855 if (speed_p)
10856 *cost += extra_cost->fp[mode ==DFmode].fma;
10858 return true;
10861 *cost = LIBCALL_COST (3);
10862 return false;
10864 case FIX:
10865 case UNSIGNED_FIX:
10866 if (TARGET_HARD_FLOAT)
10868 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10869 a vcvt fixed-point conversion. */
10870 if (code == FIX && mode == SImode
10871 && GET_CODE (XEXP (x, 0)) == FIX
10872 && GET_MODE (XEXP (x, 0)) == SFmode
10873 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10874 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10875 > 0)
10877 if (speed_p)
10878 *cost += extra_cost->fp[0].toint;
10880 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10881 code, 0, speed_p);
10882 return true;
10885 if (GET_MODE_CLASS (mode) == MODE_INT)
10887 mode = GET_MODE (XEXP (x, 0));
10888 if (speed_p)
10889 *cost += extra_cost->fp[mode == DFmode].toint;
10890 /* Strip of the 'cost' of rounding towards zero. */
10891 if (GET_CODE (XEXP (x, 0)) == FIX)
10892 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10893 0, speed_p);
10894 else
10895 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10896 /* ??? Increase the cost to deal with transferring from
10897 FP -> CORE registers? */
10898 return true;
10900 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10901 && TARGET_VFP5)
10903 if (speed_p)
10904 *cost += extra_cost->fp[mode == DFmode].roundint;
10905 return false;
10907 /* Vector costs? */
10909 *cost = LIBCALL_COST (1);
10910 return false;
10912 case FLOAT:
10913 case UNSIGNED_FLOAT:
10914 if (TARGET_HARD_FLOAT)
10916 /* ??? Increase the cost to deal with transferring from CORE
10917 -> FP registers? */
10918 if (speed_p)
10919 *cost += extra_cost->fp[mode == DFmode].fromint;
10920 return false;
10922 *cost = LIBCALL_COST (1);
10923 return false;
10925 case CALL:
10926 return true;
10928 case ASM_OPERANDS:
10930 /* Just a guess. Guess number of instructions in the asm
10931 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10932 though (see PR60663). */
10933 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10934 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10936 *cost = COSTS_N_INSNS (asm_length + num_operands);
10937 return true;
10939 default:
10940 if (mode != VOIDmode)
10941 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10942 else
10943 *cost = COSTS_N_INSNS (4); /* Who knows? */
10944 return false;
10948 #undef HANDLE_NARROW_SHIFT_ARITH
10950 /* RTX costs entry point. */
10952 static bool
10953 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10954 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10956 bool result;
10957 int code = GET_CODE (x);
10958 gcc_assert (current_tune->insn_extra_cost);
10960 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10961 (enum rtx_code) outer_code,
10962 current_tune->insn_extra_cost,
10963 total, speed);
10965 if (dump_file && (dump_flags & TDF_DETAILS))
10967 print_rtl_single (dump_file, x);
10968 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10969 *total, result ? "final" : "partial");
10971 return result;
10974 /* All address computations that can be done are free, but rtx cost returns
10975 the same for practically all of them. So we weight the different types
10976 of address here in the order (most pref first):
10977 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10978 static inline int
10979 arm_arm_address_cost (rtx x)
10981 enum rtx_code c = GET_CODE (x);
10983 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10984 return 0;
10985 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10986 return 10;
10988 if (c == PLUS)
10990 if (CONST_INT_P (XEXP (x, 1)))
10991 return 2;
10993 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10994 return 3;
10996 return 4;
10999 return 6;
11002 static inline int
11003 arm_thumb_address_cost (rtx x)
11005 enum rtx_code c = GET_CODE (x);
11007 if (c == REG)
11008 return 1;
11009 if (c == PLUS
11010 && REG_P (XEXP (x, 0))
11011 && CONST_INT_P (XEXP (x, 1)))
11012 return 1;
11014 return 2;
11017 static int
11018 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11019 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11021 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11024 /* Adjust cost hook for XScale. */
11025 static bool
11026 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11027 int * cost)
11029 /* Some true dependencies can have a higher cost depending
11030 on precisely how certain input operands are used. */
11031 if (dep_type == 0
11032 && recog_memoized (insn) >= 0
11033 && recog_memoized (dep) >= 0)
11035 int shift_opnum = get_attr_shift (insn);
11036 enum attr_type attr_type = get_attr_type (dep);
11038 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11039 operand for INSN. If we have a shifted input operand and the
11040 instruction we depend on is another ALU instruction, then we may
11041 have to account for an additional stall. */
11042 if (shift_opnum != 0
11043 && (attr_type == TYPE_ALU_SHIFT_IMM
11044 || attr_type == TYPE_ALUS_SHIFT_IMM
11045 || attr_type == TYPE_LOGIC_SHIFT_IMM
11046 || attr_type == TYPE_LOGICS_SHIFT_IMM
11047 || attr_type == TYPE_ALU_SHIFT_REG
11048 || attr_type == TYPE_ALUS_SHIFT_REG
11049 || attr_type == TYPE_LOGIC_SHIFT_REG
11050 || attr_type == TYPE_LOGICS_SHIFT_REG
11051 || attr_type == TYPE_MOV_SHIFT
11052 || attr_type == TYPE_MVN_SHIFT
11053 || attr_type == TYPE_MOV_SHIFT_REG
11054 || attr_type == TYPE_MVN_SHIFT_REG))
11056 rtx shifted_operand;
11057 int opno;
11059 /* Get the shifted operand. */
11060 extract_insn (insn);
11061 shifted_operand = recog_data.operand[shift_opnum];
11063 /* Iterate over all the operands in DEP. If we write an operand
11064 that overlaps with SHIFTED_OPERAND, then we have increase the
11065 cost of this dependency. */
11066 extract_insn (dep);
11067 preprocess_constraints (dep);
11068 for (opno = 0; opno < recog_data.n_operands; opno++)
11070 /* We can ignore strict inputs. */
11071 if (recog_data.operand_type[opno] == OP_IN)
11072 continue;
11074 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11075 shifted_operand))
11077 *cost = 2;
11078 return false;
11083 return true;
11086 /* Adjust cost hook for Cortex A9. */
11087 static bool
11088 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11089 int * cost)
11091 switch (dep_type)
11093 case REG_DEP_ANTI:
11094 *cost = 0;
11095 return false;
11097 case REG_DEP_TRUE:
11098 case REG_DEP_OUTPUT:
11099 if (recog_memoized (insn) >= 0
11100 && recog_memoized (dep) >= 0)
11102 if (GET_CODE (PATTERN (insn)) == SET)
11104 if (GET_MODE_CLASS
11105 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11106 || GET_MODE_CLASS
11107 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11109 enum attr_type attr_type_insn = get_attr_type (insn);
11110 enum attr_type attr_type_dep = get_attr_type (dep);
11112 /* By default all dependencies of the form
11113 s0 = s0 <op> s1
11114 s0 = s0 <op> s2
11115 have an extra latency of 1 cycle because
11116 of the input and output dependency in this
11117 case. However this gets modeled as an true
11118 dependency and hence all these checks. */
11119 if (REG_P (SET_DEST (PATTERN (insn)))
11120 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11122 /* FMACS is a special case where the dependent
11123 instruction can be issued 3 cycles before
11124 the normal latency in case of an output
11125 dependency. */
11126 if ((attr_type_insn == TYPE_FMACS
11127 || attr_type_insn == TYPE_FMACD)
11128 && (attr_type_dep == TYPE_FMACS
11129 || attr_type_dep == TYPE_FMACD))
11131 if (dep_type == REG_DEP_OUTPUT)
11132 *cost = insn_default_latency (dep) - 3;
11133 else
11134 *cost = insn_default_latency (dep);
11135 return false;
11137 else
11139 if (dep_type == REG_DEP_OUTPUT)
11140 *cost = insn_default_latency (dep) + 1;
11141 else
11142 *cost = insn_default_latency (dep);
11144 return false;
11149 break;
11151 default:
11152 gcc_unreachable ();
11155 return true;
11158 /* Adjust cost hook for FA726TE. */
11159 static bool
11160 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11161 int * cost)
11163 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11164 have penalty of 3. */
11165 if (dep_type == REG_DEP_TRUE
11166 && recog_memoized (insn) >= 0
11167 && recog_memoized (dep) >= 0
11168 && get_attr_conds (dep) == CONDS_SET)
11170 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11171 if (get_attr_conds (insn) == CONDS_USE
11172 && get_attr_type (insn) != TYPE_BRANCH)
11174 *cost = 3;
11175 return false;
11178 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11179 || get_attr_conds (insn) == CONDS_USE)
11181 *cost = 0;
11182 return false;
11186 return true;
11189 /* Implement TARGET_REGISTER_MOVE_COST.
11191 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11192 it is typically more expensive than a single memory access. We set
11193 the cost to less than two memory accesses so that floating
11194 point to integer conversion does not go through memory. */
11197 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11198 reg_class_t from, reg_class_t to)
11200 if (TARGET_32BIT)
11202 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11203 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11204 return 15;
11205 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11206 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11207 return 4;
11208 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11209 return 20;
11210 else
11211 return 2;
11213 else
11215 if (from == HI_REGS || to == HI_REGS)
11216 return 4;
11217 else
11218 return 2;
11222 /* Implement TARGET_MEMORY_MOVE_COST. */
11225 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11226 bool in ATTRIBUTE_UNUSED)
11228 if (TARGET_32BIT)
11229 return 10;
11230 else
11232 if (GET_MODE_SIZE (mode) < 4)
11233 return 8;
11234 else
11235 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11239 /* Vectorizer cost model implementation. */
11241 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11242 static int
11243 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11244 tree vectype,
11245 int misalign ATTRIBUTE_UNUSED)
11247 unsigned elements;
11249 switch (type_of_cost)
11251 case scalar_stmt:
11252 return current_tune->vec_costs->scalar_stmt_cost;
11254 case scalar_load:
11255 return current_tune->vec_costs->scalar_load_cost;
11257 case scalar_store:
11258 return current_tune->vec_costs->scalar_store_cost;
11260 case vector_stmt:
11261 return current_tune->vec_costs->vec_stmt_cost;
11263 case vector_load:
11264 return current_tune->vec_costs->vec_align_load_cost;
11266 case vector_store:
11267 return current_tune->vec_costs->vec_store_cost;
11269 case vec_to_scalar:
11270 return current_tune->vec_costs->vec_to_scalar_cost;
11272 case scalar_to_vec:
11273 return current_tune->vec_costs->scalar_to_vec_cost;
11275 case unaligned_load:
11276 case vector_gather_load:
11277 return current_tune->vec_costs->vec_unalign_load_cost;
11279 case unaligned_store:
11280 case vector_scatter_store:
11281 return current_tune->vec_costs->vec_unalign_store_cost;
11283 case cond_branch_taken:
11284 return current_tune->vec_costs->cond_taken_branch_cost;
11286 case cond_branch_not_taken:
11287 return current_tune->vec_costs->cond_not_taken_branch_cost;
11289 case vec_perm:
11290 case vec_promote_demote:
11291 return current_tune->vec_costs->vec_stmt_cost;
11293 case vec_construct:
11294 elements = TYPE_VECTOR_SUBPARTS (vectype);
11295 return elements / 2 + 1;
11297 default:
11298 gcc_unreachable ();
11302 /* Implement targetm.vectorize.add_stmt_cost. */
11304 static unsigned
11305 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11306 struct _stmt_vec_info *stmt_info, int misalign,
11307 enum vect_cost_model_location where)
11309 unsigned *cost = (unsigned *) data;
11310 unsigned retval = 0;
11312 if (flag_vect_cost_model)
11314 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11315 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11317 /* Statements in an inner loop relative to the loop being
11318 vectorized are weighted more heavily. The value here is
11319 arbitrary and could potentially be improved with analysis. */
11320 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11321 count *= 50; /* FIXME. */
11323 retval = (unsigned) (count * stmt_cost);
11324 cost[where] += retval;
11327 return retval;
11330 /* Return true if and only if this insn can dual-issue only as older. */
11331 static bool
11332 cortexa7_older_only (rtx_insn *insn)
11334 if (recog_memoized (insn) < 0)
11335 return false;
11337 switch (get_attr_type (insn))
11339 case TYPE_ALU_DSP_REG:
11340 case TYPE_ALU_SREG:
11341 case TYPE_ALUS_SREG:
11342 case TYPE_LOGIC_REG:
11343 case TYPE_LOGICS_REG:
11344 case TYPE_ADC_REG:
11345 case TYPE_ADCS_REG:
11346 case TYPE_ADR:
11347 case TYPE_BFM:
11348 case TYPE_REV:
11349 case TYPE_MVN_REG:
11350 case TYPE_SHIFT_IMM:
11351 case TYPE_SHIFT_REG:
11352 case TYPE_LOAD_BYTE:
11353 case TYPE_LOAD_4:
11354 case TYPE_STORE_4:
11355 case TYPE_FFARITHS:
11356 case TYPE_FADDS:
11357 case TYPE_FFARITHD:
11358 case TYPE_FADDD:
11359 case TYPE_FMOV:
11360 case TYPE_F_CVT:
11361 case TYPE_FCMPS:
11362 case TYPE_FCMPD:
11363 case TYPE_FCONSTS:
11364 case TYPE_FCONSTD:
11365 case TYPE_FMULS:
11366 case TYPE_FMACS:
11367 case TYPE_FMULD:
11368 case TYPE_FMACD:
11369 case TYPE_FDIVS:
11370 case TYPE_FDIVD:
11371 case TYPE_F_MRC:
11372 case TYPE_F_MRRC:
11373 case TYPE_F_FLAG:
11374 case TYPE_F_LOADS:
11375 case TYPE_F_STORES:
11376 return true;
11377 default:
11378 return false;
11382 /* Return true if and only if this insn can dual-issue as younger. */
11383 static bool
11384 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11386 if (recog_memoized (insn) < 0)
11388 if (verbose > 5)
11389 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11390 return false;
11393 switch (get_attr_type (insn))
11395 case TYPE_ALU_IMM:
11396 case TYPE_ALUS_IMM:
11397 case TYPE_LOGIC_IMM:
11398 case TYPE_LOGICS_IMM:
11399 case TYPE_EXTEND:
11400 case TYPE_MVN_IMM:
11401 case TYPE_MOV_IMM:
11402 case TYPE_MOV_REG:
11403 case TYPE_MOV_SHIFT:
11404 case TYPE_MOV_SHIFT_REG:
11405 case TYPE_BRANCH:
11406 case TYPE_CALL:
11407 return true;
11408 default:
11409 return false;
11414 /* Look for an instruction that can dual issue only as an older
11415 instruction, and move it in front of any instructions that can
11416 dual-issue as younger, while preserving the relative order of all
11417 other instructions in the ready list. This is a hueuristic to help
11418 dual-issue in later cycles, by postponing issue of more flexible
11419 instructions. This heuristic may affect dual issue opportunities
11420 in the current cycle. */
11421 static void
11422 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11423 int *n_readyp, int clock)
11425 int i;
11426 int first_older_only = -1, first_younger = -1;
11428 if (verbose > 5)
11429 fprintf (file,
11430 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11431 clock,
11432 *n_readyp);
11434 /* Traverse the ready list from the head (the instruction to issue
11435 first), and looking for the first instruction that can issue as
11436 younger and the first instruction that can dual-issue only as
11437 older. */
11438 for (i = *n_readyp - 1; i >= 0; i--)
11440 rtx_insn *insn = ready[i];
11441 if (cortexa7_older_only (insn))
11443 first_older_only = i;
11444 if (verbose > 5)
11445 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11446 break;
11448 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11449 first_younger = i;
11452 /* Nothing to reorder because either no younger insn found or insn
11453 that can dual-issue only as older appears before any insn that
11454 can dual-issue as younger. */
11455 if (first_younger == -1)
11457 if (verbose > 5)
11458 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11459 return;
11462 /* Nothing to reorder because no older-only insn in the ready list. */
11463 if (first_older_only == -1)
11465 if (verbose > 5)
11466 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11467 return;
11470 /* Move first_older_only insn before first_younger. */
11471 if (verbose > 5)
11472 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11473 INSN_UID(ready [first_older_only]),
11474 INSN_UID(ready [first_younger]));
11475 rtx_insn *first_older_only_insn = ready [first_older_only];
11476 for (i = first_older_only; i < first_younger; i++)
11478 ready[i] = ready[i+1];
11481 ready[i] = first_older_only_insn;
11482 return;
11485 /* Implement TARGET_SCHED_REORDER. */
11486 static int
11487 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11488 int clock)
11490 switch (arm_tune)
11492 case TARGET_CPU_cortexa7:
11493 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11494 break;
11495 default:
11496 /* Do nothing for other cores. */
11497 break;
11500 return arm_issue_rate ();
11503 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11504 It corrects the value of COST based on the relationship between
11505 INSN and DEP through the dependence LINK. It returns the new
11506 value. There is a per-core adjust_cost hook to adjust scheduler costs
11507 and the per-core hook can choose to completely override the generic
11508 adjust_cost function. Only put bits of code into arm_adjust_cost that
11509 are common across all cores. */
11510 static int
11511 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11512 unsigned int)
11514 rtx i_pat, d_pat;
11516 /* When generating Thumb-1 code, we want to place flag-setting operations
11517 close to a conditional branch which depends on them, so that we can
11518 omit the comparison. */
11519 if (TARGET_THUMB1
11520 && dep_type == 0
11521 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11522 && recog_memoized (dep) >= 0
11523 && get_attr_conds (dep) == CONDS_SET)
11524 return 0;
11526 if (current_tune->sched_adjust_cost != NULL)
11528 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11529 return cost;
11532 /* XXX Is this strictly true? */
11533 if (dep_type == REG_DEP_ANTI
11534 || dep_type == REG_DEP_OUTPUT)
11535 return 0;
11537 /* Call insns don't incur a stall, even if they follow a load. */
11538 if (dep_type == 0
11539 && CALL_P (insn))
11540 return 1;
11542 if ((i_pat = single_set (insn)) != NULL
11543 && MEM_P (SET_SRC (i_pat))
11544 && (d_pat = single_set (dep)) != NULL
11545 && MEM_P (SET_DEST (d_pat)))
11547 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11548 /* This is a load after a store, there is no conflict if the load reads
11549 from a cached area. Assume that loads from the stack, and from the
11550 constant pool are cached, and that others will miss. This is a
11551 hack. */
11553 if ((GET_CODE (src_mem) == SYMBOL_REF
11554 && CONSTANT_POOL_ADDRESS_P (src_mem))
11555 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11556 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11557 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11558 return 1;
11561 return cost;
11565 arm_max_conditional_execute (void)
11567 return max_insns_skipped;
11570 static int
11571 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11573 if (TARGET_32BIT)
11574 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11575 else
11576 return (optimize > 0) ? 2 : 0;
11579 static int
11580 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11582 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11585 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11586 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11587 sequences of non-executed instructions in IT blocks probably take the same
11588 amount of time as executed instructions (and the IT instruction itself takes
11589 space in icache). This function was experimentally determined to give good
11590 results on a popular embedded benchmark. */
11592 static int
11593 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11595 return (TARGET_32BIT && speed_p) ? 1
11596 : arm_default_branch_cost (speed_p, predictable_p);
11599 static int
11600 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11602 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11605 static bool fp_consts_inited = false;
11607 static REAL_VALUE_TYPE value_fp0;
11609 static void
11610 init_fp_table (void)
11612 REAL_VALUE_TYPE r;
11614 r = REAL_VALUE_ATOF ("0", DFmode);
11615 value_fp0 = r;
11616 fp_consts_inited = true;
11619 /* Return TRUE if rtx X is a valid immediate FP constant. */
11621 arm_const_double_rtx (rtx x)
11623 const REAL_VALUE_TYPE *r;
11625 if (!fp_consts_inited)
11626 init_fp_table ();
11628 r = CONST_DOUBLE_REAL_VALUE (x);
11629 if (REAL_VALUE_MINUS_ZERO (*r))
11630 return 0;
11632 if (real_equal (r, &value_fp0))
11633 return 1;
11635 return 0;
11638 /* VFPv3 has a fairly wide range of representable immediates, formed from
11639 "quarter-precision" floating-point values. These can be evaluated using this
11640 formula (with ^ for exponentiation):
11642 -1^s * n * 2^-r
11644 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11645 16 <= n <= 31 and 0 <= r <= 7.
11647 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11649 - A (most-significant) is the sign bit.
11650 - BCD are the exponent (encoded as r XOR 3).
11651 - EFGH are the mantissa (encoded as n - 16).
11654 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11655 fconst[sd] instruction, or -1 if X isn't suitable. */
11656 static int
11657 vfp3_const_double_index (rtx x)
11659 REAL_VALUE_TYPE r, m;
11660 int sign, exponent;
11661 unsigned HOST_WIDE_INT mantissa, mant_hi;
11662 unsigned HOST_WIDE_INT mask;
11663 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11664 bool fail;
11666 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11667 return -1;
11669 r = *CONST_DOUBLE_REAL_VALUE (x);
11671 /* We can't represent these things, so detect them first. */
11672 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11673 return -1;
11675 /* Extract sign, exponent and mantissa. */
11676 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11677 r = real_value_abs (&r);
11678 exponent = REAL_EXP (&r);
11679 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11680 highest (sign) bit, with a fixed binary point at bit point_pos.
11681 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11682 bits for the mantissa, this may fail (low bits would be lost). */
11683 real_ldexp (&m, &r, point_pos - exponent);
11684 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11685 mantissa = w.elt (0);
11686 mant_hi = w.elt (1);
11688 /* If there are bits set in the low part of the mantissa, we can't
11689 represent this value. */
11690 if (mantissa != 0)
11691 return -1;
11693 /* Now make it so that mantissa contains the most-significant bits, and move
11694 the point_pos to indicate that the least-significant bits have been
11695 discarded. */
11696 point_pos -= HOST_BITS_PER_WIDE_INT;
11697 mantissa = mant_hi;
11699 /* We can permit four significant bits of mantissa only, plus a high bit
11700 which is always 1. */
11701 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11702 if ((mantissa & mask) != 0)
11703 return -1;
11705 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11706 mantissa >>= point_pos - 5;
11708 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11709 floating-point immediate zero with Neon using an integer-zero load, but
11710 that case is handled elsewhere.) */
11711 if (mantissa == 0)
11712 return -1;
11714 gcc_assert (mantissa >= 16 && mantissa <= 31);
11716 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11717 normalized significands are in the range [1, 2). (Our mantissa is shifted
11718 left 4 places at this point relative to normalized IEEE754 values). GCC
11719 internally uses [0.5, 1) (see real.c), so the exponent returned from
11720 REAL_EXP must be altered. */
11721 exponent = 5 - exponent;
11723 if (exponent < 0 || exponent > 7)
11724 return -1;
11726 /* Sign, mantissa and exponent are now in the correct form to plug into the
11727 formula described in the comment above. */
11728 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11731 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11733 vfp3_const_double_rtx (rtx x)
11735 if (!TARGET_VFP3)
11736 return 0;
11738 return vfp3_const_double_index (x) != -1;
11741 /* Recognize immediates which can be used in various Neon instructions. Legal
11742 immediates are described by the following table (for VMVN variants, the
11743 bitwise inverse of the constant shown is recognized. In either case, VMOV
11744 is output and the correct instruction to use for a given constant is chosen
11745 by the assembler). The constant shown is replicated across all elements of
11746 the destination vector.
11748 insn elems variant constant (binary)
11749 ---- ----- ------- -----------------
11750 vmov i32 0 00000000 00000000 00000000 abcdefgh
11751 vmov i32 1 00000000 00000000 abcdefgh 00000000
11752 vmov i32 2 00000000 abcdefgh 00000000 00000000
11753 vmov i32 3 abcdefgh 00000000 00000000 00000000
11754 vmov i16 4 00000000 abcdefgh
11755 vmov i16 5 abcdefgh 00000000
11756 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11757 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11758 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11759 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11760 vmvn i16 10 00000000 abcdefgh
11761 vmvn i16 11 abcdefgh 00000000
11762 vmov i32 12 00000000 00000000 abcdefgh 11111111
11763 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11764 vmov i32 14 00000000 abcdefgh 11111111 11111111
11765 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11766 vmov i8 16 abcdefgh
11767 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11768 eeeeeeee ffffffff gggggggg hhhhhhhh
11769 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11770 vmov f32 19 00000000 00000000 00000000 00000000
11772 For case 18, B = !b. Representable values are exactly those accepted by
11773 vfp3_const_double_index, but are output as floating-point numbers rather
11774 than indices.
11776 For case 19, we will change it to vmov.i32 when assembling.
11778 Variants 0-5 (inclusive) may also be used as immediates for the second
11779 operand of VORR/VBIC instructions.
11781 The INVERSE argument causes the bitwise inverse of the given operand to be
11782 recognized instead (used for recognizing legal immediates for the VAND/VORN
11783 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11784 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11785 output, rather than the real insns vbic/vorr).
11787 INVERSE makes no difference to the recognition of float vectors.
11789 The return value is the variant of immediate as shown in the above table, or
11790 -1 if the given value doesn't match any of the listed patterns.
11792 static int
11793 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11794 rtx *modconst, int *elementwidth)
11796 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11797 matches = 1; \
11798 for (i = 0; i < idx; i += (STRIDE)) \
11799 if (!(TEST)) \
11800 matches = 0; \
11801 if (matches) \
11803 immtype = (CLASS); \
11804 elsize = (ELSIZE); \
11805 break; \
11808 unsigned int i, elsize = 0, idx = 0, n_elts;
11809 unsigned int innersize;
11810 unsigned char bytes[16];
11811 int immtype = -1, matches;
11812 unsigned int invmask = inverse ? 0xff : 0;
11813 bool vector = GET_CODE (op) == CONST_VECTOR;
11815 if (vector)
11816 n_elts = CONST_VECTOR_NUNITS (op);
11817 else
11819 n_elts = 1;
11820 if (mode == VOIDmode)
11821 mode = DImode;
11824 innersize = GET_MODE_UNIT_SIZE (mode);
11826 /* Vectors of float constants. */
11827 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11829 rtx el0 = CONST_VECTOR_ELT (op, 0);
11831 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11832 return -1;
11834 /* FP16 vectors cannot be represented. */
11835 if (GET_MODE_INNER (mode) == HFmode)
11836 return -1;
11838 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11839 are distinct in this context. */
11840 if (!const_vec_duplicate_p (op))
11841 return -1;
11843 if (modconst)
11844 *modconst = CONST_VECTOR_ELT (op, 0);
11846 if (elementwidth)
11847 *elementwidth = 0;
11849 if (el0 == CONST0_RTX (GET_MODE (el0)))
11850 return 19;
11851 else
11852 return 18;
11855 /* The tricks done in the code below apply for little-endian vector layout.
11856 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11857 FIXME: Implement logic for big-endian vectors. */
11858 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11859 return -1;
11861 /* Splat vector constant out into a byte vector. */
11862 for (i = 0; i < n_elts; i++)
11864 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11865 unsigned HOST_WIDE_INT elpart;
11867 gcc_assert (CONST_INT_P (el));
11868 elpart = INTVAL (el);
11870 for (unsigned int byte = 0; byte < innersize; byte++)
11872 bytes[idx++] = (elpart & 0xff) ^ invmask;
11873 elpart >>= BITS_PER_UNIT;
11877 /* Sanity check. */
11878 gcc_assert (idx == GET_MODE_SIZE (mode));
11882 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11883 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11885 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11886 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11888 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11889 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11891 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11892 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11894 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11896 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11898 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11899 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11901 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11902 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11904 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11905 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11907 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11908 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11910 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11912 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11914 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11915 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11917 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11918 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11920 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11921 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11923 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11924 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11926 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11928 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11929 && bytes[i] == bytes[(i + 8) % idx]);
11931 while (0);
11933 if (immtype == -1)
11934 return -1;
11936 if (elementwidth)
11937 *elementwidth = elsize;
11939 if (modconst)
11941 unsigned HOST_WIDE_INT imm = 0;
11943 /* Un-invert bytes of recognized vector, if necessary. */
11944 if (invmask != 0)
11945 for (i = 0; i < idx; i++)
11946 bytes[i] ^= invmask;
11948 if (immtype == 17)
11950 /* FIXME: Broken on 32-bit H_W_I hosts. */
11951 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11953 for (i = 0; i < 8; i++)
11954 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11955 << (i * BITS_PER_UNIT);
11957 *modconst = GEN_INT (imm);
11959 else
11961 unsigned HOST_WIDE_INT imm = 0;
11963 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11964 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11966 *modconst = GEN_INT (imm);
11970 return immtype;
11971 #undef CHECK
11974 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11975 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11976 float elements), and a modified constant (whatever should be output for a
11977 VMOV) in *MODCONST. */
11980 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11981 rtx *modconst, int *elementwidth)
11983 rtx tmpconst;
11984 int tmpwidth;
11985 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11987 if (retval == -1)
11988 return 0;
11990 if (modconst)
11991 *modconst = tmpconst;
11993 if (elementwidth)
11994 *elementwidth = tmpwidth;
11996 return 1;
11999 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12000 the immediate is valid, write a constant suitable for using as an operand
12001 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12002 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12005 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12006 rtx *modconst, int *elementwidth)
12008 rtx tmpconst;
12009 int tmpwidth;
12010 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12012 if (retval < 0 || retval > 5)
12013 return 0;
12015 if (modconst)
12016 *modconst = tmpconst;
12018 if (elementwidth)
12019 *elementwidth = tmpwidth;
12021 return 1;
12024 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12025 the immediate is valid, write a constant suitable for using as an operand
12026 to VSHR/VSHL to *MODCONST and the corresponding element width to
12027 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12028 because they have different limitations. */
12031 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12032 rtx *modconst, int *elementwidth,
12033 bool isleftshift)
12035 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12036 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12037 unsigned HOST_WIDE_INT last_elt = 0;
12038 unsigned HOST_WIDE_INT maxshift;
12040 /* Split vector constant out into a byte vector. */
12041 for (i = 0; i < n_elts; i++)
12043 rtx el = CONST_VECTOR_ELT (op, i);
12044 unsigned HOST_WIDE_INT elpart;
12046 if (CONST_INT_P (el))
12047 elpart = INTVAL (el);
12048 else if (CONST_DOUBLE_P (el))
12049 return 0;
12050 else
12051 gcc_unreachable ();
12053 if (i != 0 && elpart != last_elt)
12054 return 0;
12056 last_elt = elpart;
12059 /* Shift less than element size. */
12060 maxshift = innersize * 8;
12062 if (isleftshift)
12064 /* Left shift immediate value can be from 0 to <size>-1. */
12065 if (last_elt >= maxshift)
12066 return 0;
12068 else
12070 /* Right shift immediate value can be from 1 to <size>. */
12071 if (last_elt == 0 || last_elt > maxshift)
12072 return 0;
12075 if (elementwidth)
12076 *elementwidth = innersize * 8;
12078 if (modconst)
12079 *modconst = CONST_VECTOR_ELT (op, 0);
12081 return 1;
12084 /* Return a string suitable for output of Neon immediate logic operation
12085 MNEM. */
12087 char *
12088 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12089 int inverse, int quad)
12091 int width, is_valid;
12092 static char templ[40];
12094 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12096 gcc_assert (is_valid != 0);
12098 if (quad)
12099 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12100 else
12101 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12103 return templ;
12106 /* Return a string suitable for output of Neon immediate shift operation
12107 (VSHR or VSHL) MNEM. */
12109 char *
12110 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12111 machine_mode mode, int quad,
12112 bool isleftshift)
12114 int width, is_valid;
12115 static char templ[40];
12117 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12118 gcc_assert (is_valid != 0);
12120 if (quad)
12121 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12122 else
12123 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12125 return templ;
12128 /* Output a sequence of pairwise operations to implement a reduction.
12129 NOTE: We do "too much work" here, because pairwise operations work on two
12130 registers-worth of operands in one go. Unfortunately we can't exploit those
12131 extra calculations to do the full operation in fewer steps, I don't think.
12132 Although all vector elements of the result but the first are ignored, we
12133 actually calculate the same result in each of the elements. An alternative
12134 such as initially loading a vector with zero to use as each of the second
12135 operands would use up an additional register and take an extra instruction,
12136 for no particular gain. */
12138 void
12139 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12140 rtx (*reduc) (rtx, rtx, rtx))
12142 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12143 rtx tmpsum = op1;
12145 for (i = parts / 2; i >= 1; i /= 2)
12147 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12148 emit_insn (reduc (dest, tmpsum, tmpsum));
12149 tmpsum = dest;
12153 /* If VALS is a vector constant that can be loaded into a register
12154 using VDUP, generate instructions to do so and return an RTX to
12155 assign to the register. Otherwise return NULL_RTX. */
12157 static rtx
12158 neon_vdup_constant (rtx vals)
12160 machine_mode mode = GET_MODE (vals);
12161 machine_mode inner_mode = GET_MODE_INNER (mode);
12162 rtx x;
12164 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12165 return NULL_RTX;
12167 if (!const_vec_duplicate_p (vals, &x))
12168 /* The elements are not all the same. We could handle repeating
12169 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12170 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12171 vdup.i16). */
12172 return NULL_RTX;
12174 /* We can load this constant by using VDUP and a constant in a
12175 single ARM register. This will be cheaper than a vector
12176 load. */
12178 x = copy_to_mode_reg (inner_mode, x);
12179 return gen_vec_duplicate (mode, x);
12182 /* Generate code to load VALS, which is a PARALLEL containing only
12183 constants (for vec_init) or CONST_VECTOR, efficiently into a
12184 register. Returns an RTX to copy into the register, or NULL_RTX
12185 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12188 neon_make_constant (rtx vals)
12190 machine_mode mode = GET_MODE (vals);
12191 rtx target;
12192 rtx const_vec = NULL_RTX;
12193 int n_elts = GET_MODE_NUNITS (mode);
12194 int n_const = 0;
12195 int i;
12197 if (GET_CODE (vals) == CONST_VECTOR)
12198 const_vec = vals;
12199 else if (GET_CODE (vals) == PARALLEL)
12201 /* A CONST_VECTOR must contain only CONST_INTs and
12202 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12203 Only store valid constants in a CONST_VECTOR. */
12204 for (i = 0; i < n_elts; ++i)
12206 rtx x = XVECEXP (vals, 0, i);
12207 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12208 n_const++;
12210 if (n_const == n_elts)
12211 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12213 else
12214 gcc_unreachable ();
12216 if (const_vec != NULL
12217 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12218 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12219 return const_vec;
12220 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12221 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12222 pipeline cycle; creating the constant takes one or two ARM
12223 pipeline cycles. */
12224 return target;
12225 else if (const_vec != NULL_RTX)
12226 /* Load from constant pool. On Cortex-A8 this takes two cycles
12227 (for either double or quad vectors). We can not take advantage
12228 of single-cycle VLD1 because we need a PC-relative addressing
12229 mode. */
12230 return const_vec;
12231 else
12232 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12233 We can not construct an initializer. */
12234 return NULL_RTX;
12237 /* Initialize vector TARGET to VALS. */
12239 void
12240 neon_expand_vector_init (rtx target, rtx vals)
12242 machine_mode mode = GET_MODE (target);
12243 machine_mode inner_mode = GET_MODE_INNER (mode);
12244 int n_elts = GET_MODE_NUNITS (mode);
12245 int n_var = 0, one_var = -1;
12246 bool all_same = true;
12247 rtx x, mem;
12248 int i;
12250 for (i = 0; i < n_elts; ++i)
12252 x = XVECEXP (vals, 0, i);
12253 if (!CONSTANT_P (x))
12254 ++n_var, one_var = i;
12256 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12257 all_same = false;
12260 if (n_var == 0)
12262 rtx constant = neon_make_constant (vals);
12263 if (constant != NULL_RTX)
12265 emit_move_insn (target, constant);
12266 return;
12270 /* Splat a single non-constant element if we can. */
12271 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12273 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12274 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12275 return;
12278 /* One field is non-constant. Load constant then overwrite varying
12279 field. This is more efficient than using the stack. */
12280 if (n_var == 1)
12282 rtx copy = copy_rtx (vals);
12283 rtx index = GEN_INT (one_var);
12285 /* Load constant part of vector, substitute neighboring value for
12286 varying element. */
12287 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12288 neon_expand_vector_init (target, copy);
12290 /* Insert variable. */
12291 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12292 switch (mode)
12294 case E_V8QImode:
12295 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12296 break;
12297 case E_V16QImode:
12298 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12299 break;
12300 case E_V4HImode:
12301 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12302 break;
12303 case E_V8HImode:
12304 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12305 break;
12306 case E_V2SImode:
12307 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12308 break;
12309 case E_V4SImode:
12310 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12311 break;
12312 case E_V2SFmode:
12313 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12314 break;
12315 case E_V4SFmode:
12316 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12317 break;
12318 case E_V2DImode:
12319 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12320 break;
12321 default:
12322 gcc_unreachable ();
12324 return;
12327 /* Construct the vector in memory one field at a time
12328 and load the whole vector. */
12329 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12330 for (i = 0; i < n_elts; i++)
12331 emit_move_insn (adjust_address_nv (mem, inner_mode,
12332 i * GET_MODE_SIZE (inner_mode)),
12333 XVECEXP (vals, 0, i));
12334 emit_move_insn (target, mem);
12337 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12338 ERR if it doesn't. EXP indicates the source location, which includes the
12339 inlining history for intrinsics. */
12341 static void
12342 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12343 const_tree exp, const char *desc)
12345 HOST_WIDE_INT lane;
12347 gcc_assert (CONST_INT_P (operand));
12349 lane = INTVAL (operand);
12351 if (lane < low || lane >= high)
12353 if (exp)
12354 error ("%K%s %wd out of range %wd - %wd",
12355 exp, desc, lane, low, high - 1);
12356 else
12357 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12361 /* Bounds-check lanes. */
12363 void
12364 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12365 const_tree exp)
12367 bounds_check (operand, low, high, exp, "lane");
12370 /* Bounds-check constants. */
12372 void
12373 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12375 bounds_check (operand, low, high, NULL_TREE, "constant");
12378 HOST_WIDE_INT
12379 neon_element_bits (machine_mode mode)
12381 return GET_MODE_UNIT_BITSIZE (mode);
12385 /* Predicates for `match_operand' and `match_operator'. */
12387 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12388 WB is true if full writeback address modes are allowed and is false
12389 if limited writeback address modes (POST_INC and PRE_DEC) are
12390 allowed. */
12393 arm_coproc_mem_operand (rtx op, bool wb)
12395 rtx ind;
12397 /* Reject eliminable registers. */
12398 if (! (reload_in_progress || reload_completed || lra_in_progress)
12399 && ( reg_mentioned_p (frame_pointer_rtx, op)
12400 || reg_mentioned_p (arg_pointer_rtx, op)
12401 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12402 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12403 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12404 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12405 return FALSE;
12407 /* Constants are converted into offsets from labels. */
12408 if (!MEM_P (op))
12409 return FALSE;
12411 ind = XEXP (op, 0);
12413 if (reload_completed
12414 && (GET_CODE (ind) == LABEL_REF
12415 || (GET_CODE (ind) == CONST
12416 && GET_CODE (XEXP (ind, 0)) == PLUS
12417 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12418 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12419 return TRUE;
12421 /* Match: (mem (reg)). */
12422 if (REG_P (ind))
12423 return arm_address_register_rtx_p (ind, 0);
12425 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12426 acceptable in any case (subject to verification by
12427 arm_address_register_rtx_p). We need WB to be true to accept
12428 PRE_INC and POST_DEC. */
12429 if (GET_CODE (ind) == POST_INC
12430 || GET_CODE (ind) == PRE_DEC
12431 || (wb
12432 && (GET_CODE (ind) == PRE_INC
12433 || GET_CODE (ind) == POST_DEC)))
12434 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12436 if (wb
12437 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12438 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12439 && GET_CODE (XEXP (ind, 1)) == PLUS
12440 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12441 ind = XEXP (ind, 1);
12443 /* Match:
12444 (plus (reg)
12445 (const)). */
12446 if (GET_CODE (ind) == PLUS
12447 && REG_P (XEXP (ind, 0))
12448 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12449 && CONST_INT_P (XEXP (ind, 1))
12450 && INTVAL (XEXP (ind, 1)) > -1024
12451 && INTVAL (XEXP (ind, 1)) < 1024
12452 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12453 return TRUE;
12455 return FALSE;
12458 /* Return TRUE if OP is a memory operand which we can load or store a vector
12459 to/from. TYPE is one of the following values:
12460 0 - Vector load/stor (vldr)
12461 1 - Core registers (ldm)
12462 2 - Element/structure loads (vld1)
12465 neon_vector_mem_operand (rtx op, int type, bool strict)
12467 rtx ind;
12469 /* Reject eliminable registers. */
12470 if (strict && ! (reload_in_progress || reload_completed)
12471 && (reg_mentioned_p (frame_pointer_rtx, op)
12472 || reg_mentioned_p (arg_pointer_rtx, op)
12473 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12474 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12475 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12476 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12477 return FALSE;
12479 /* Constants are converted into offsets from labels. */
12480 if (!MEM_P (op))
12481 return FALSE;
12483 ind = XEXP (op, 0);
12485 if (reload_completed
12486 && (GET_CODE (ind) == LABEL_REF
12487 || (GET_CODE (ind) == CONST
12488 && GET_CODE (XEXP (ind, 0)) == PLUS
12489 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12490 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12491 return TRUE;
12493 /* Match: (mem (reg)). */
12494 if (REG_P (ind))
12495 return arm_address_register_rtx_p (ind, 0);
12497 /* Allow post-increment with Neon registers. */
12498 if ((type != 1 && GET_CODE (ind) == POST_INC)
12499 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12500 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12502 /* Allow post-increment by register for VLDn */
12503 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12504 && GET_CODE (XEXP (ind, 1)) == PLUS
12505 && REG_P (XEXP (XEXP (ind, 1), 1)))
12506 return true;
12508 /* Match:
12509 (plus (reg)
12510 (const)). */
12511 if (type == 0
12512 && GET_CODE (ind) == PLUS
12513 && REG_P (XEXP (ind, 0))
12514 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12515 && CONST_INT_P (XEXP (ind, 1))
12516 && INTVAL (XEXP (ind, 1)) > -1024
12517 /* For quad modes, we restrict the constant offset to be slightly less
12518 than what the instruction format permits. We have no such constraint
12519 on double mode offsets. (This must match arm_legitimate_index_p.) */
12520 && (INTVAL (XEXP (ind, 1))
12521 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12522 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12523 return TRUE;
12525 return FALSE;
12528 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12529 type. */
12531 neon_struct_mem_operand (rtx op)
12533 rtx ind;
12535 /* Reject eliminable registers. */
12536 if (! (reload_in_progress || reload_completed)
12537 && ( reg_mentioned_p (frame_pointer_rtx, op)
12538 || reg_mentioned_p (arg_pointer_rtx, op)
12539 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12540 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12541 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12542 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12543 return FALSE;
12545 /* Constants are converted into offsets from labels. */
12546 if (!MEM_P (op))
12547 return FALSE;
12549 ind = XEXP (op, 0);
12551 if (reload_completed
12552 && (GET_CODE (ind) == LABEL_REF
12553 || (GET_CODE (ind) == CONST
12554 && GET_CODE (XEXP (ind, 0)) == PLUS
12555 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12556 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12557 return TRUE;
12559 /* Match: (mem (reg)). */
12560 if (REG_P (ind))
12561 return arm_address_register_rtx_p (ind, 0);
12563 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12564 if (GET_CODE (ind) == POST_INC
12565 || GET_CODE (ind) == PRE_DEC)
12566 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12568 return FALSE;
12571 /* Return true if X is a register that will be eliminated later on. */
12573 arm_eliminable_register (rtx x)
12575 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12576 || REGNO (x) == ARG_POINTER_REGNUM
12577 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12578 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12581 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12582 coprocessor registers. Otherwise return NO_REGS. */
12584 enum reg_class
12585 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12587 if (mode == HFmode)
12589 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12590 return GENERAL_REGS;
12591 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12592 return NO_REGS;
12593 return GENERAL_REGS;
12596 /* The neon move patterns handle all legitimate vector and struct
12597 addresses. */
12598 if (TARGET_NEON
12599 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12600 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12601 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12602 || VALID_NEON_STRUCT_MODE (mode)))
12603 return NO_REGS;
12605 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12606 return NO_REGS;
12608 return GENERAL_REGS;
12611 /* Values which must be returned in the most-significant end of the return
12612 register. */
12614 static bool
12615 arm_return_in_msb (const_tree valtype)
12617 return (TARGET_AAPCS_BASED
12618 && BYTES_BIG_ENDIAN
12619 && (AGGREGATE_TYPE_P (valtype)
12620 || TREE_CODE (valtype) == COMPLEX_TYPE
12621 || FIXED_POINT_TYPE_P (valtype)));
12624 /* Return TRUE if X references a SYMBOL_REF. */
12626 symbol_mentioned_p (rtx x)
12628 const char * fmt;
12629 int i;
12631 if (GET_CODE (x) == SYMBOL_REF)
12632 return 1;
12634 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12635 are constant offsets, not symbols. */
12636 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12637 return 0;
12639 fmt = GET_RTX_FORMAT (GET_CODE (x));
12641 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12643 if (fmt[i] == 'E')
12645 int j;
12647 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12648 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12649 return 1;
12651 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12652 return 1;
12655 return 0;
12658 /* Return TRUE if X references a LABEL_REF. */
12660 label_mentioned_p (rtx x)
12662 const char * fmt;
12663 int i;
12665 if (GET_CODE (x) == LABEL_REF)
12666 return 1;
12668 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12669 instruction, but they are constant offsets, not symbols. */
12670 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12671 return 0;
12673 fmt = GET_RTX_FORMAT (GET_CODE (x));
12674 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12676 if (fmt[i] == 'E')
12678 int j;
12680 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12681 if (label_mentioned_p (XVECEXP (x, i, j)))
12682 return 1;
12684 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12685 return 1;
12688 return 0;
12692 tls_mentioned_p (rtx x)
12694 switch (GET_CODE (x))
12696 case CONST:
12697 return tls_mentioned_p (XEXP (x, 0));
12699 case UNSPEC:
12700 if (XINT (x, 1) == UNSPEC_TLS)
12701 return 1;
12703 /* Fall through. */
12704 default:
12705 return 0;
12709 /* Must not copy any rtx that uses a pc-relative address.
12710 Also, disallow copying of load-exclusive instructions that
12711 may appear after splitting of compare-and-swap-style operations
12712 so as to prevent those loops from being transformed away from their
12713 canonical forms (see PR 69904). */
12715 static bool
12716 arm_cannot_copy_insn_p (rtx_insn *insn)
12718 /* The tls call insn cannot be copied, as it is paired with a data
12719 word. */
12720 if (recog_memoized (insn) == CODE_FOR_tlscall)
12721 return true;
12723 subrtx_iterator::array_type array;
12724 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12726 const_rtx x = *iter;
12727 if (GET_CODE (x) == UNSPEC
12728 && (XINT (x, 1) == UNSPEC_PIC_BASE
12729 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12730 return true;
12733 rtx set = single_set (insn);
12734 if (set)
12736 rtx src = SET_SRC (set);
12737 if (GET_CODE (src) == ZERO_EXTEND)
12738 src = XEXP (src, 0);
12740 /* Catch the load-exclusive and load-acquire operations. */
12741 if (GET_CODE (src) == UNSPEC_VOLATILE
12742 && (XINT (src, 1) == VUNSPEC_LL
12743 || XINT (src, 1) == VUNSPEC_LAX))
12744 return true;
12746 return false;
12749 enum rtx_code
12750 minmax_code (rtx x)
12752 enum rtx_code code = GET_CODE (x);
12754 switch (code)
12756 case SMAX:
12757 return GE;
12758 case SMIN:
12759 return LE;
12760 case UMIN:
12761 return LEU;
12762 case UMAX:
12763 return GEU;
12764 default:
12765 gcc_unreachable ();
12769 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12771 bool
12772 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12773 int *mask, bool *signed_sat)
12775 /* The high bound must be a power of two minus one. */
12776 int log = exact_log2 (INTVAL (hi_bound) + 1);
12777 if (log == -1)
12778 return false;
12780 /* The low bound is either zero (for usat) or one less than the
12781 negation of the high bound (for ssat). */
12782 if (INTVAL (lo_bound) == 0)
12784 if (mask)
12785 *mask = log;
12786 if (signed_sat)
12787 *signed_sat = false;
12789 return true;
12792 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12794 if (mask)
12795 *mask = log + 1;
12796 if (signed_sat)
12797 *signed_sat = true;
12799 return true;
12802 return false;
12805 /* Return 1 if memory locations are adjacent. */
12807 adjacent_mem_locations (rtx a, rtx b)
12809 /* We don't guarantee to preserve the order of these memory refs. */
12810 if (volatile_refs_p (a) || volatile_refs_p (b))
12811 return 0;
12813 if ((REG_P (XEXP (a, 0))
12814 || (GET_CODE (XEXP (a, 0)) == PLUS
12815 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12816 && (REG_P (XEXP (b, 0))
12817 || (GET_CODE (XEXP (b, 0)) == PLUS
12818 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12820 HOST_WIDE_INT val0 = 0, val1 = 0;
12821 rtx reg0, reg1;
12822 int val_diff;
12824 if (GET_CODE (XEXP (a, 0)) == PLUS)
12826 reg0 = XEXP (XEXP (a, 0), 0);
12827 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12829 else
12830 reg0 = XEXP (a, 0);
12832 if (GET_CODE (XEXP (b, 0)) == PLUS)
12834 reg1 = XEXP (XEXP (b, 0), 0);
12835 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12837 else
12838 reg1 = XEXP (b, 0);
12840 /* Don't accept any offset that will require multiple
12841 instructions to handle, since this would cause the
12842 arith_adjacentmem pattern to output an overlong sequence. */
12843 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12844 return 0;
12846 /* Don't allow an eliminable register: register elimination can make
12847 the offset too large. */
12848 if (arm_eliminable_register (reg0))
12849 return 0;
12851 val_diff = val1 - val0;
12853 if (arm_ld_sched)
12855 /* If the target has load delay slots, then there's no benefit
12856 to using an ldm instruction unless the offset is zero and
12857 we are optimizing for size. */
12858 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12859 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12860 && (val_diff == 4 || val_diff == -4));
12863 return ((REGNO (reg0) == REGNO (reg1))
12864 && (val_diff == 4 || val_diff == -4));
12867 return 0;
12870 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12871 for load operations, false for store operations. CONSECUTIVE is true
12872 if the register numbers in the operation must be consecutive in the register
12873 bank. RETURN_PC is true if value is to be loaded in PC.
12874 The pattern we are trying to match for load is:
12875 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12876 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12879 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12881 where
12882 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12883 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12884 3. If consecutive is TRUE, then for kth register being loaded,
12885 REGNO (R_dk) = REGNO (R_d0) + k.
12886 The pattern for store is similar. */
12887 bool
12888 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12889 bool consecutive, bool return_pc)
12891 HOST_WIDE_INT count = XVECLEN (op, 0);
12892 rtx reg, mem, addr;
12893 unsigned regno;
12894 unsigned first_regno;
12895 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12896 rtx elt;
12897 bool addr_reg_in_reglist = false;
12898 bool update = false;
12899 int reg_increment;
12900 int offset_adj;
12901 int regs_per_val;
12903 /* If not in SImode, then registers must be consecutive
12904 (e.g., VLDM instructions for DFmode). */
12905 gcc_assert ((mode == SImode) || consecutive);
12906 /* Setting return_pc for stores is illegal. */
12907 gcc_assert (!return_pc || load);
12909 /* Set up the increments and the regs per val based on the mode. */
12910 reg_increment = GET_MODE_SIZE (mode);
12911 regs_per_val = reg_increment / 4;
12912 offset_adj = return_pc ? 1 : 0;
12914 if (count <= 1
12915 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12916 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12917 return false;
12919 /* Check if this is a write-back. */
12920 elt = XVECEXP (op, 0, offset_adj);
12921 if (GET_CODE (SET_SRC (elt)) == PLUS)
12923 i++;
12924 base = 1;
12925 update = true;
12927 /* The offset adjustment must be the number of registers being
12928 popped times the size of a single register. */
12929 if (!REG_P (SET_DEST (elt))
12930 || !REG_P (XEXP (SET_SRC (elt), 0))
12931 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12932 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12933 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12934 ((count - 1 - offset_adj) * reg_increment))
12935 return false;
12938 i = i + offset_adj;
12939 base = base + offset_adj;
12940 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12941 success depends on the type: VLDM can do just one reg,
12942 LDM must do at least two. */
12943 if ((count <= i) && (mode == SImode))
12944 return false;
12946 elt = XVECEXP (op, 0, i - 1);
12947 if (GET_CODE (elt) != SET)
12948 return false;
12950 if (load)
12952 reg = SET_DEST (elt);
12953 mem = SET_SRC (elt);
12955 else
12957 reg = SET_SRC (elt);
12958 mem = SET_DEST (elt);
12961 if (!REG_P (reg) || !MEM_P (mem))
12962 return false;
12964 regno = REGNO (reg);
12965 first_regno = regno;
12966 addr = XEXP (mem, 0);
12967 if (GET_CODE (addr) == PLUS)
12969 if (!CONST_INT_P (XEXP (addr, 1)))
12970 return false;
12972 offset = INTVAL (XEXP (addr, 1));
12973 addr = XEXP (addr, 0);
12976 if (!REG_P (addr))
12977 return false;
12979 /* Don't allow SP to be loaded unless it is also the base register. It
12980 guarantees that SP is reset correctly when an LDM instruction
12981 is interrupted. Otherwise, we might end up with a corrupt stack. */
12982 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12983 return false;
12985 for (; i < count; i++)
12987 elt = XVECEXP (op, 0, i);
12988 if (GET_CODE (elt) != SET)
12989 return false;
12991 if (load)
12993 reg = SET_DEST (elt);
12994 mem = SET_SRC (elt);
12996 else
12998 reg = SET_SRC (elt);
12999 mem = SET_DEST (elt);
13002 if (!REG_P (reg)
13003 || GET_MODE (reg) != mode
13004 || REGNO (reg) <= regno
13005 || (consecutive
13006 && (REGNO (reg) !=
13007 (unsigned int) (first_regno + regs_per_val * (i - base))))
13008 /* Don't allow SP to be loaded unless it is also the base register. It
13009 guarantees that SP is reset correctly when an LDM instruction
13010 is interrupted. Otherwise, we might end up with a corrupt stack. */
13011 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13012 || !MEM_P (mem)
13013 || GET_MODE (mem) != mode
13014 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13015 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13016 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13017 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13018 offset + (i - base) * reg_increment))
13019 && (!REG_P (XEXP (mem, 0))
13020 || offset + (i - base) * reg_increment != 0)))
13021 return false;
13023 regno = REGNO (reg);
13024 if (regno == REGNO (addr))
13025 addr_reg_in_reglist = true;
13028 if (load)
13030 if (update && addr_reg_in_reglist)
13031 return false;
13033 /* For Thumb-1, address register is always modified - either by write-back
13034 or by explicit load. If the pattern does not describe an update,
13035 then the address register must be in the list of loaded registers. */
13036 if (TARGET_THUMB1)
13037 return update || addr_reg_in_reglist;
13040 return true;
13043 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13044 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13045 instruction. ADD_OFFSET is nonzero if the base address register needs
13046 to be modified with an add instruction before we can use it. */
13048 static bool
13049 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13050 int nops, HOST_WIDE_INT add_offset)
13052 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13053 if the offset isn't small enough. The reason 2 ldrs are faster
13054 is because these ARMs are able to do more than one cache access
13055 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13056 whilst the ARM8 has a double bandwidth cache. This means that
13057 these cores can do both an instruction fetch and a data fetch in
13058 a single cycle, so the trick of calculating the address into a
13059 scratch register (one of the result regs) and then doing a load
13060 multiple actually becomes slower (and no smaller in code size).
13061 That is the transformation
13063 ldr rd1, [rbase + offset]
13064 ldr rd2, [rbase + offset + 4]
13068 add rd1, rbase, offset
13069 ldmia rd1, {rd1, rd2}
13071 produces worse code -- '3 cycles + any stalls on rd2' instead of
13072 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13073 access per cycle, the first sequence could never complete in less
13074 than 6 cycles, whereas the ldm sequence would only take 5 and
13075 would make better use of sequential accesses if not hitting the
13076 cache.
13078 We cheat here and test 'arm_ld_sched' which we currently know to
13079 only be true for the ARM8, ARM9 and StrongARM. If this ever
13080 changes, then the test below needs to be reworked. */
13081 if (nops == 2 && arm_ld_sched && add_offset != 0)
13082 return false;
13084 /* XScale has load-store double instructions, but they have stricter
13085 alignment requirements than load-store multiple, so we cannot
13086 use them.
13088 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13089 the pipeline until completion.
13091 NREGS CYCLES
13097 An ldr instruction takes 1-3 cycles, but does not block the
13098 pipeline.
13100 NREGS CYCLES
13101 1 1-3
13102 2 2-6
13103 3 3-9
13104 4 4-12
13106 Best case ldr will always win. However, the more ldr instructions
13107 we issue, the less likely we are to be able to schedule them well.
13108 Using ldr instructions also increases code size.
13110 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13111 for counts of 3 or 4 regs. */
13112 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13113 return false;
13114 return true;
13117 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13118 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13119 an array ORDER which describes the sequence to use when accessing the
13120 offsets that produces an ascending order. In this sequence, each
13121 offset must be larger by exactly 4 than the previous one. ORDER[0]
13122 must have been filled in with the lowest offset by the caller.
13123 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13124 we use to verify that ORDER produces an ascending order of registers.
13125 Return true if it was possible to construct such an order, false if
13126 not. */
13128 static bool
13129 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13130 int *unsorted_regs)
13132 int i;
13133 for (i = 1; i < nops; i++)
13135 int j;
13137 order[i] = order[i - 1];
13138 for (j = 0; j < nops; j++)
13139 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13141 /* We must find exactly one offset that is higher than the
13142 previous one by 4. */
13143 if (order[i] != order[i - 1])
13144 return false;
13145 order[i] = j;
13147 if (order[i] == order[i - 1])
13148 return false;
13149 /* The register numbers must be ascending. */
13150 if (unsorted_regs != NULL
13151 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13152 return false;
13154 return true;
13157 /* Used to determine in a peephole whether a sequence of load
13158 instructions can be changed into a load-multiple instruction.
13159 NOPS is the number of separate load instructions we are examining. The
13160 first NOPS entries in OPERANDS are the destination registers, the
13161 next NOPS entries are memory operands. If this function is
13162 successful, *BASE is set to the common base register of the memory
13163 accesses; *LOAD_OFFSET is set to the first memory location's offset
13164 from that base register.
13165 REGS is an array filled in with the destination register numbers.
13166 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13167 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13168 the sequence of registers in REGS matches the loads from ascending memory
13169 locations, and the function verifies that the register numbers are
13170 themselves ascending. If CHECK_REGS is false, the register numbers
13171 are stored in the order they are found in the operands. */
13172 static int
13173 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13174 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13176 int unsorted_regs[MAX_LDM_STM_OPS];
13177 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13178 int order[MAX_LDM_STM_OPS];
13179 rtx base_reg_rtx = NULL;
13180 int base_reg = -1;
13181 int i, ldm_case;
13183 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13184 easily extended if required. */
13185 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13187 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13189 /* Loop over the operands and check that the memory references are
13190 suitable (i.e. immediate offsets from the same base register). At
13191 the same time, extract the target register, and the memory
13192 offsets. */
13193 for (i = 0; i < nops; i++)
13195 rtx reg;
13196 rtx offset;
13198 /* Convert a subreg of a mem into the mem itself. */
13199 if (GET_CODE (operands[nops + i]) == SUBREG)
13200 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13202 gcc_assert (MEM_P (operands[nops + i]));
13204 /* Don't reorder volatile memory references; it doesn't seem worth
13205 looking for the case where the order is ok anyway. */
13206 if (MEM_VOLATILE_P (operands[nops + i]))
13207 return 0;
13209 offset = const0_rtx;
13211 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13212 || (GET_CODE (reg) == SUBREG
13213 && REG_P (reg = SUBREG_REG (reg))))
13214 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13215 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13216 || (GET_CODE (reg) == SUBREG
13217 && REG_P (reg = SUBREG_REG (reg))))
13218 && (CONST_INT_P (offset
13219 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13221 if (i == 0)
13223 base_reg = REGNO (reg);
13224 base_reg_rtx = reg;
13225 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13226 return 0;
13228 else if (base_reg != (int) REGNO (reg))
13229 /* Not addressed from the same base register. */
13230 return 0;
13232 unsorted_regs[i] = (REG_P (operands[i])
13233 ? REGNO (operands[i])
13234 : REGNO (SUBREG_REG (operands[i])));
13236 /* If it isn't an integer register, or if it overwrites the
13237 base register but isn't the last insn in the list, then
13238 we can't do this. */
13239 if (unsorted_regs[i] < 0
13240 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13241 || unsorted_regs[i] > 14
13242 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13243 return 0;
13245 /* Don't allow SP to be loaded unless it is also the base
13246 register. It guarantees that SP is reset correctly when
13247 an LDM instruction is interrupted. Otherwise, we might
13248 end up with a corrupt stack. */
13249 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13250 return 0;
13252 unsorted_offsets[i] = INTVAL (offset);
13253 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13254 order[0] = i;
13256 else
13257 /* Not a suitable memory address. */
13258 return 0;
13261 /* All the useful information has now been extracted from the
13262 operands into unsorted_regs and unsorted_offsets; additionally,
13263 order[0] has been set to the lowest offset in the list. Sort
13264 the offsets into order, verifying that they are adjacent, and
13265 check that the register numbers are ascending. */
13266 if (!compute_offset_order (nops, unsorted_offsets, order,
13267 check_regs ? unsorted_regs : NULL))
13268 return 0;
13270 if (saved_order)
13271 memcpy (saved_order, order, sizeof order);
13273 if (base)
13275 *base = base_reg;
13277 for (i = 0; i < nops; i++)
13278 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13280 *load_offset = unsorted_offsets[order[0]];
13283 if (TARGET_THUMB1
13284 && !peep2_reg_dead_p (nops, base_reg_rtx))
13285 return 0;
13287 if (unsorted_offsets[order[0]] == 0)
13288 ldm_case = 1; /* ldmia */
13289 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13290 ldm_case = 2; /* ldmib */
13291 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13292 ldm_case = 3; /* ldmda */
13293 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13294 ldm_case = 4; /* ldmdb */
13295 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13296 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13297 ldm_case = 5;
13298 else
13299 return 0;
13301 if (!multiple_operation_profitable_p (false, nops,
13302 ldm_case == 5
13303 ? unsorted_offsets[order[0]] : 0))
13304 return 0;
13306 return ldm_case;
13309 /* Used to determine in a peephole whether a sequence of store instructions can
13310 be changed into a store-multiple instruction.
13311 NOPS is the number of separate store instructions we are examining.
13312 NOPS_TOTAL is the total number of instructions recognized by the peephole
13313 pattern.
13314 The first NOPS entries in OPERANDS are the source registers, the next
13315 NOPS entries are memory operands. If this function is successful, *BASE is
13316 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13317 to the first memory location's offset from that base register. REGS is an
13318 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13319 likewise filled with the corresponding rtx's.
13320 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13321 numbers to an ascending order of stores.
13322 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13323 from ascending memory locations, and the function verifies that the register
13324 numbers are themselves ascending. If CHECK_REGS is false, the register
13325 numbers are stored in the order they are found in the operands. */
13326 static int
13327 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13328 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13329 HOST_WIDE_INT *load_offset, bool check_regs)
13331 int unsorted_regs[MAX_LDM_STM_OPS];
13332 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13333 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13334 int order[MAX_LDM_STM_OPS];
13335 int base_reg = -1;
13336 rtx base_reg_rtx = NULL;
13337 int i, stm_case;
13339 /* Write back of base register is currently only supported for Thumb 1. */
13340 int base_writeback = TARGET_THUMB1;
13342 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13343 easily extended if required. */
13344 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13346 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13348 /* Loop over the operands and check that the memory references are
13349 suitable (i.e. immediate offsets from the same base register). At
13350 the same time, extract the target register, and the memory
13351 offsets. */
13352 for (i = 0; i < nops; i++)
13354 rtx reg;
13355 rtx offset;
13357 /* Convert a subreg of a mem into the mem itself. */
13358 if (GET_CODE (operands[nops + i]) == SUBREG)
13359 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13361 gcc_assert (MEM_P (operands[nops + i]));
13363 /* Don't reorder volatile memory references; it doesn't seem worth
13364 looking for the case where the order is ok anyway. */
13365 if (MEM_VOLATILE_P (operands[nops + i]))
13366 return 0;
13368 offset = const0_rtx;
13370 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13371 || (GET_CODE (reg) == SUBREG
13372 && REG_P (reg = SUBREG_REG (reg))))
13373 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13374 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13375 || (GET_CODE (reg) == SUBREG
13376 && REG_P (reg = SUBREG_REG (reg))))
13377 && (CONST_INT_P (offset
13378 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13380 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13381 ? operands[i] : SUBREG_REG (operands[i]));
13382 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13384 if (i == 0)
13386 base_reg = REGNO (reg);
13387 base_reg_rtx = reg;
13388 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13389 return 0;
13391 else if (base_reg != (int) REGNO (reg))
13392 /* Not addressed from the same base register. */
13393 return 0;
13395 /* If it isn't an integer register, then we can't do this. */
13396 if (unsorted_regs[i] < 0
13397 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13398 /* The effects are unpredictable if the base register is
13399 both updated and stored. */
13400 || (base_writeback && unsorted_regs[i] == base_reg)
13401 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13402 || unsorted_regs[i] > 14)
13403 return 0;
13405 unsorted_offsets[i] = INTVAL (offset);
13406 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13407 order[0] = i;
13409 else
13410 /* Not a suitable memory address. */
13411 return 0;
13414 /* All the useful information has now been extracted from the
13415 operands into unsorted_regs and unsorted_offsets; additionally,
13416 order[0] has been set to the lowest offset in the list. Sort
13417 the offsets into order, verifying that they are adjacent, and
13418 check that the register numbers are ascending. */
13419 if (!compute_offset_order (nops, unsorted_offsets, order,
13420 check_regs ? unsorted_regs : NULL))
13421 return 0;
13423 if (saved_order)
13424 memcpy (saved_order, order, sizeof order);
13426 if (base)
13428 *base = base_reg;
13430 for (i = 0; i < nops; i++)
13432 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13433 if (reg_rtxs)
13434 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13437 *load_offset = unsorted_offsets[order[0]];
13440 if (TARGET_THUMB1
13441 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13442 return 0;
13444 if (unsorted_offsets[order[0]] == 0)
13445 stm_case = 1; /* stmia */
13446 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13447 stm_case = 2; /* stmib */
13448 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13449 stm_case = 3; /* stmda */
13450 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13451 stm_case = 4; /* stmdb */
13452 else
13453 return 0;
13455 if (!multiple_operation_profitable_p (false, nops, 0))
13456 return 0;
13458 return stm_case;
13461 /* Routines for use in generating RTL. */
13463 /* Generate a load-multiple instruction. COUNT is the number of loads in
13464 the instruction; REGS and MEMS are arrays containing the operands.
13465 BASEREG is the base register to be used in addressing the memory operands.
13466 WBACK_OFFSET is nonzero if the instruction should update the base
13467 register. */
13469 static rtx
13470 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13471 HOST_WIDE_INT wback_offset)
13473 int i = 0, j;
13474 rtx result;
13476 if (!multiple_operation_profitable_p (false, count, 0))
13478 rtx seq;
13480 start_sequence ();
13482 for (i = 0; i < count; i++)
13483 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13485 if (wback_offset != 0)
13486 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13488 seq = get_insns ();
13489 end_sequence ();
13491 return seq;
13494 result = gen_rtx_PARALLEL (VOIDmode,
13495 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13496 if (wback_offset != 0)
13498 XVECEXP (result, 0, 0)
13499 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13500 i = 1;
13501 count++;
13504 for (j = 0; i < count; i++, j++)
13505 XVECEXP (result, 0, i)
13506 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13508 return result;
13511 /* Generate a store-multiple instruction. COUNT is the number of stores in
13512 the instruction; REGS and MEMS are arrays containing the operands.
13513 BASEREG is the base register to be used in addressing the memory operands.
13514 WBACK_OFFSET is nonzero if the instruction should update the base
13515 register. */
13517 static rtx
13518 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13519 HOST_WIDE_INT wback_offset)
13521 int i = 0, j;
13522 rtx result;
13524 if (GET_CODE (basereg) == PLUS)
13525 basereg = XEXP (basereg, 0);
13527 if (!multiple_operation_profitable_p (false, count, 0))
13529 rtx seq;
13531 start_sequence ();
13533 for (i = 0; i < count; i++)
13534 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13536 if (wback_offset != 0)
13537 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13539 seq = get_insns ();
13540 end_sequence ();
13542 return seq;
13545 result = gen_rtx_PARALLEL (VOIDmode,
13546 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13547 if (wback_offset != 0)
13549 XVECEXP (result, 0, 0)
13550 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13551 i = 1;
13552 count++;
13555 for (j = 0; i < count; i++, j++)
13556 XVECEXP (result, 0, i)
13557 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13559 return result;
13562 /* Generate either a load-multiple or a store-multiple instruction. This
13563 function can be used in situations where we can start with a single MEM
13564 rtx and adjust its address upwards.
13565 COUNT is the number of operations in the instruction, not counting a
13566 possible update of the base register. REGS is an array containing the
13567 register operands.
13568 BASEREG is the base register to be used in addressing the memory operands,
13569 which are constructed from BASEMEM.
13570 WRITE_BACK specifies whether the generated instruction should include an
13571 update of the base register.
13572 OFFSETP is used to pass an offset to and from this function; this offset
13573 is not used when constructing the address (instead BASEMEM should have an
13574 appropriate offset in its address), it is used only for setting
13575 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13577 static rtx
13578 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13579 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13581 rtx mems[MAX_LDM_STM_OPS];
13582 HOST_WIDE_INT offset = *offsetp;
13583 int i;
13585 gcc_assert (count <= MAX_LDM_STM_OPS);
13587 if (GET_CODE (basereg) == PLUS)
13588 basereg = XEXP (basereg, 0);
13590 for (i = 0; i < count; i++)
13592 rtx addr = plus_constant (Pmode, basereg, i * 4);
13593 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13594 offset += 4;
13597 if (write_back)
13598 *offsetp = offset;
13600 if (is_load)
13601 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13602 write_back ? 4 * count : 0);
13603 else
13604 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13605 write_back ? 4 * count : 0);
13609 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13610 rtx basemem, HOST_WIDE_INT *offsetp)
13612 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13613 offsetp);
13617 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13618 rtx basemem, HOST_WIDE_INT *offsetp)
13620 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13621 offsetp);
13624 /* Called from a peephole2 expander to turn a sequence of loads into an
13625 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13626 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13627 is true if we can reorder the registers because they are used commutatively
13628 subsequently.
13629 Returns true iff we could generate a new instruction. */
13631 bool
13632 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13634 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13635 rtx mems[MAX_LDM_STM_OPS];
13636 int i, j, base_reg;
13637 rtx base_reg_rtx;
13638 HOST_WIDE_INT offset;
13639 int write_back = FALSE;
13640 int ldm_case;
13641 rtx addr;
13643 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13644 &base_reg, &offset, !sort_regs);
13646 if (ldm_case == 0)
13647 return false;
13649 if (sort_regs)
13650 for (i = 0; i < nops - 1; i++)
13651 for (j = i + 1; j < nops; j++)
13652 if (regs[i] > regs[j])
13654 int t = regs[i];
13655 regs[i] = regs[j];
13656 regs[j] = t;
13658 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13660 if (TARGET_THUMB1)
13662 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13663 gcc_assert (ldm_case == 1 || ldm_case == 5);
13664 write_back = TRUE;
13667 if (ldm_case == 5)
13669 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13670 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13671 offset = 0;
13672 if (!TARGET_THUMB1)
13673 base_reg_rtx = newbase;
13676 for (i = 0; i < nops; i++)
13678 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13679 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13680 SImode, addr, 0);
13682 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13683 write_back ? offset + i * 4 : 0));
13684 return true;
13687 /* Called from a peephole2 expander to turn a sequence of stores into an
13688 STM instruction. OPERANDS are the operands found by the peephole matcher;
13689 NOPS indicates how many separate stores we are trying to combine.
13690 Returns true iff we could generate a new instruction. */
13692 bool
13693 gen_stm_seq (rtx *operands, int nops)
13695 int i;
13696 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13697 rtx mems[MAX_LDM_STM_OPS];
13698 int base_reg;
13699 rtx base_reg_rtx;
13700 HOST_WIDE_INT offset;
13701 int write_back = FALSE;
13702 int stm_case;
13703 rtx addr;
13704 bool base_reg_dies;
13706 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13707 mem_order, &base_reg, &offset, true);
13709 if (stm_case == 0)
13710 return false;
13712 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13714 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13715 if (TARGET_THUMB1)
13717 gcc_assert (base_reg_dies);
13718 write_back = TRUE;
13721 if (stm_case == 5)
13723 gcc_assert (base_reg_dies);
13724 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13725 offset = 0;
13728 addr = plus_constant (Pmode, base_reg_rtx, offset);
13730 for (i = 0; i < nops; i++)
13732 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13733 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13734 SImode, addr, 0);
13736 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13737 write_back ? offset + i * 4 : 0));
13738 return true;
13741 /* Called from a peephole2 expander to turn a sequence of stores that are
13742 preceded by constant loads into an STM instruction. OPERANDS are the
13743 operands found by the peephole matcher; NOPS indicates how many
13744 separate stores we are trying to combine; there are 2 * NOPS
13745 instructions in the peephole.
13746 Returns true iff we could generate a new instruction. */
13748 bool
13749 gen_const_stm_seq (rtx *operands, int nops)
13751 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13752 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13753 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13754 rtx mems[MAX_LDM_STM_OPS];
13755 int base_reg;
13756 rtx base_reg_rtx;
13757 HOST_WIDE_INT offset;
13758 int write_back = FALSE;
13759 int stm_case;
13760 rtx addr;
13761 bool base_reg_dies;
13762 int i, j;
13763 HARD_REG_SET allocated;
13765 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13766 mem_order, &base_reg, &offset, false);
13768 if (stm_case == 0)
13769 return false;
13771 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13773 /* If the same register is used more than once, try to find a free
13774 register. */
13775 CLEAR_HARD_REG_SET (allocated);
13776 for (i = 0; i < nops; i++)
13778 for (j = i + 1; j < nops; j++)
13779 if (regs[i] == regs[j])
13781 rtx t = peep2_find_free_register (0, nops * 2,
13782 TARGET_THUMB1 ? "l" : "r",
13783 SImode, &allocated);
13784 if (t == NULL_RTX)
13785 return false;
13786 reg_rtxs[i] = t;
13787 regs[i] = REGNO (t);
13791 /* Compute an ordering that maps the register numbers to an ascending
13792 sequence. */
13793 reg_order[0] = 0;
13794 for (i = 0; i < nops; i++)
13795 if (regs[i] < regs[reg_order[0]])
13796 reg_order[0] = i;
13798 for (i = 1; i < nops; i++)
13800 int this_order = reg_order[i - 1];
13801 for (j = 0; j < nops; j++)
13802 if (regs[j] > regs[reg_order[i - 1]]
13803 && (this_order == reg_order[i - 1]
13804 || regs[j] < regs[this_order]))
13805 this_order = j;
13806 reg_order[i] = this_order;
13809 /* Ensure that registers that must be live after the instruction end
13810 up with the correct value. */
13811 for (i = 0; i < nops; i++)
13813 int this_order = reg_order[i];
13814 if ((this_order != mem_order[i]
13815 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13816 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13817 return false;
13820 /* Load the constants. */
13821 for (i = 0; i < nops; i++)
13823 rtx op = operands[2 * nops + mem_order[i]];
13824 sorted_regs[i] = regs[reg_order[i]];
13825 emit_move_insn (reg_rtxs[reg_order[i]], op);
13828 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13830 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13831 if (TARGET_THUMB1)
13833 gcc_assert (base_reg_dies);
13834 write_back = TRUE;
13837 if (stm_case == 5)
13839 gcc_assert (base_reg_dies);
13840 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13841 offset = 0;
13844 addr = plus_constant (Pmode, base_reg_rtx, offset);
13846 for (i = 0; i < nops; i++)
13848 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13849 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13850 SImode, addr, 0);
13852 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13853 write_back ? offset + i * 4 : 0));
13854 return true;
13857 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13858 unaligned copies on processors which support unaligned semantics for those
13859 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13860 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13861 An interleave factor of 1 (the minimum) will perform no interleaving.
13862 Load/store multiple are used for aligned addresses where possible. */
13864 static void
13865 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13866 HOST_WIDE_INT length,
13867 unsigned int interleave_factor)
13869 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13870 int *regnos = XALLOCAVEC (int, interleave_factor);
13871 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13872 HOST_WIDE_INT i, j;
13873 HOST_WIDE_INT remaining = length, words;
13874 rtx halfword_tmp = NULL, byte_tmp = NULL;
13875 rtx dst, src;
13876 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13877 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13878 HOST_WIDE_INT srcoffset, dstoffset;
13879 HOST_WIDE_INT src_autoinc, dst_autoinc;
13880 rtx mem, addr;
13882 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13884 /* Use hard registers if we have aligned source or destination so we can use
13885 load/store multiple with contiguous registers. */
13886 if (dst_aligned || src_aligned)
13887 for (i = 0; i < interleave_factor; i++)
13888 regs[i] = gen_rtx_REG (SImode, i);
13889 else
13890 for (i = 0; i < interleave_factor; i++)
13891 regs[i] = gen_reg_rtx (SImode);
13893 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13894 src = copy_addr_to_reg (XEXP (srcbase, 0));
13896 srcoffset = dstoffset = 0;
13898 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13899 For copying the last bytes we want to subtract this offset again. */
13900 src_autoinc = dst_autoinc = 0;
13902 for (i = 0; i < interleave_factor; i++)
13903 regnos[i] = i;
13905 /* Copy BLOCK_SIZE_BYTES chunks. */
13907 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13909 /* Load words. */
13910 if (src_aligned && interleave_factor > 1)
13912 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13913 TRUE, srcbase, &srcoffset));
13914 src_autoinc += UNITS_PER_WORD * interleave_factor;
13916 else
13918 for (j = 0; j < interleave_factor; j++)
13920 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13921 - src_autoinc));
13922 mem = adjust_automodify_address (srcbase, SImode, addr,
13923 srcoffset + j * UNITS_PER_WORD);
13924 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13926 srcoffset += block_size_bytes;
13929 /* Store words. */
13930 if (dst_aligned && interleave_factor > 1)
13932 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13933 TRUE, dstbase, &dstoffset));
13934 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13936 else
13938 for (j = 0; j < interleave_factor; j++)
13940 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13941 - dst_autoinc));
13942 mem = adjust_automodify_address (dstbase, SImode, addr,
13943 dstoffset + j * UNITS_PER_WORD);
13944 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13946 dstoffset += block_size_bytes;
13949 remaining -= block_size_bytes;
13952 /* Copy any whole words left (note these aren't interleaved with any
13953 subsequent halfword/byte load/stores in the interests of simplicity). */
13955 words = remaining / UNITS_PER_WORD;
13957 gcc_assert (words < interleave_factor);
13959 if (src_aligned && words > 1)
13961 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13962 &srcoffset));
13963 src_autoinc += UNITS_PER_WORD * words;
13965 else
13967 for (j = 0; j < words; j++)
13969 addr = plus_constant (Pmode, src,
13970 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13971 mem = adjust_automodify_address (srcbase, SImode, addr,
13972 srcoffset + j * UNITS_PER_WORD);
13973 if (src_aligned)
13974 emit_move_insn (regs[j], mem);
13975 else
13976 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13978 srcoffset += words * UNITS_PER_WORD;
13981 if (dst_aligned && words > 1)
13983 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13984 &dstoffset));
13985 dst_autoinc += words * UNITS_PER_WORD;
13987 else
13989 for (j = 0; j < words; j++)
13991 addr = plus_constant (Pmode, dst,
13992 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13993 mem = adjust_automodify_address (dstbase, SImode, addr,
13994 dstoffset + j * UNITS_PER_WORD);
13995 if (dst_aligned)
13996 emit_move_insn (mem, regs[j]);
13997 else
13998 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14000 dstoffset += words * UNITS_PER_WORD;
14003 remaining -= words * UNITS_PER_WORD;
14005 gcc_assert (remaining < 4);
14007 /* Copy a halfword if necessary. */
14009 if (remaining >= 2)
14011 halfword_tmp = gen_reg_rtx (SImode);
14013 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14014 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14015 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14017 /* Either write out immediately, or delay until we've loaded the last
14018 byte, depending on interleave factor. */
14019 if (interleave_factor == 1)
14021 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14022 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14023 emit_insn (gen_unaligned_storehi (mem,
14024 gen_lowpart (HImode, halfword_tmp)));
14025 halfword_tmp = NULL;
14026 dstoffset += 2;
14029 remaining -= 2;
14030 srcoffset += 2;
14033 gcc_assert (remaining < 2);
14035 /* Copy last byte. */
14037 if ((remaining & 1) != 0)
14039 byte_tmp = gen_reg_rtx (SImode);
14041 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14042 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14043 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14045 if (interleave_factor == 1)
14047 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14048 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14049 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14050 byte_tmp = NULL;
14051 dstoffset++;
14054 remaining--;
14055 srcoffset++;
14058 /* Store last halfword if we haven't done so already. */
14060 if (halfword_tmp)
14062 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14063 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14064 emit_insn (gen_unaligned_storehi (mem,
14065 gen_lowpart (HImode, halfword_tmp)));
14066 dstoffset += 2;
14069 /* Likewise for last byte. */
14071 if (byte_tmp)
14073 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14074 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14075 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14076 dstoffset++;
14079 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14082 /* From mips_adjust_block_mem:
14084 Helper function for doing a loop-based block operation on memory
14085 reference MEM. Each iteration of the loop will operate on LENGTH
14086 bytes of MEM.
14088 Create a new base register for use within the loop and point it to
14089 the start of MEM. Create a new memory reference that uses this
14090 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14092 static void
14093 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14094 rtx *loop_mem)
14096 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14098 /* Although the new mem does not refer to a known location,
14099 it does keep up to LENGTH bytes of alignment. */
14100 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14101 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14104 /* From mips_block_move_loop:
14106 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14107 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14108 the memory regions do not overlap. */
14110 static void
14111 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14112 unsigned int interleave_factor,
14113 HOST_WIDE_INT bytes_per_iter)
14115 rtx src_reg, dest_reg, final_src, test;
14116 HOST_WIDE_INT leftover;
14118 leftover = length % bytes_per_iter;
14119 length -= leftover;
14121 /* Create registers and memory references for use within the loop. */
14122 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14123 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14125 /* Calculate the value that SRC_REG should have after the last iteration of
14126 the loop. */
14127 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14128 0, 0, OPTAB_WIDEN);
14130 /* Emit the start of the loop. */
14131 rtx_code_label *label = gen_label_rtx ();
14132 emit_label (label);
14134 /* Emit the loop body. */
14135 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14136 interleave_factor);
14138 /* Move on to the next block. */
14139 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14140 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14142 /* Emit the loop condition. */
14143 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14144 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14146 /* Mop up any left-over bytes. */
14147 if (leftover)
14148 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14151 /* Emit a block move when either the source or destination is unaligned (not
14152 aligned to a four-byte boundary). This may need further tuning depending on
14153 core type, optimize_size setting, etc. */
14155 static int
14156 arm_movmemqi_unaligned (rtx *operands)
14158 HOST_WIDE_INT length = INTVAL (operands[2]);
14160 if (optimize_size)
14162 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14163 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14164 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14165 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14166 or dst_aligned though: allow more interleaving in those cases since the
14167 resulting code can be smaller. */
14168 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14169 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14171 if (length > 12)
14172 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14173 interleave_factor, bytes_per_iter);
14174 else
14175 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14176 interleave_factor);
14178 else
14180 /* Note that the loop created by arm_block_move_unaligned_loop may be
14181 subject to loop unrolling, which makes tuning this condition a little
14182 redundant. */
14183 if (length > 32)
14184 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14185 else
14186 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14189 return 1;
14193 arm_gen_movmemqi (rtx *operands)
14195 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14196 HOST_WIDE_INT srcoffset, dstoffset;
14197 rtx src, dst, srcbase, dstbase;
14198 rtx part_bytes_reg = NULL;
14199 rtx mem;
14201 if (!CONST_INT_P (operands[2])
14202 || !CONST_INT_P (operands[3])
14203 || INTVAL (operands[2]) > 64)
14204 return 0;
14206 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14207 return arm_movmemqi_unaligned (operands);
14209 if (INTVAL (operands[3]) & 3)
14210 return 0;
14212 dstbase = operands[0];
14213 srcbase = operands[1];
14215 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14216 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14218 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14219 out_words_to_go = INTVAL (operands[2]) / 4;
14220 last_bytes = INTVAL (operands[2]) & 3;
14221 dstoffset = srcoffset = 0;
14223 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14224 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14226 while (in_words_to_go >= 2)
14228 if (in_words_to_go > 4)
14229 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14230 TRUE, srcbase, &srcoffset));
14231 else
14232 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14233 src, FALSE, srcbase,
14234 &srcoffset));
14236 if (out_words_to_go)
14238 if (out_words_to_go > 4)
14239 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14240 TRUE, dstbase, &dstoffset));
14241 else if (out_words_to_go != 1)
14242 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14243 out_words_to_go, dst,
14244 (last_bytes == 0
14245 ? FALSE : TRUE),
14246 dstbase, &dstoffset));
14247 else
14249 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14250 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14251 if (last_bytes != 0)
14253 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14254 dstoffset += 4;
14259 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14260 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14263 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14264 if (out_words_to_go)
14266 rtx sreg;
14268 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14269 sreg = copy_to_reg (mem);
14271 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14272 emit_move_insn (mem, sreg);
14273 in_words_to_go--;
14275 gcc_assert (!in_words_to_go); /* Sanity check */
14278 if (in_words_to_go)
14280 gcc_assert (in_words_to_go > 0);
14282 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14283 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14286 gcc_assert (!last_bytes || part_bytes_reg);
14288 if (BYTES_BIG_ENDIAN && last_bytes)
14290 rtx tmp = gen_reg_rtx (SImode);
14292 /* The bytes we want are in the top end of the word. */
14293 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14294 GEN_INT (8 * (4 - last_bytes))));
14295 part_bytes_reg = tmp;
14297 while (last_bytes)
14299 mem = adjust_automodify_address (dstbase, QImode,
14300 plus_constant (Pmode, dst,
14301 last_bytes - 1),
14302 dstoffset + last_bytes - 1);
14303 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14305 if (--last_bytes)
14307 tmp = gen_reg_rtx (SImode);
14308 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14309 part_bytes_reg = tmp;
14314 else
14316 if (last_bytes > 1)
14318 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14319 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14320 last_bytes -= 2;
14321 if (last_bytes)
14323 rtx tmp = gen_reg_rtx (SImode);
14324 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14325 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14326 part_bytes_reg = tmp;
14327 dstoffset += 2;
14331 if (last_bytes)
14333 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14334 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14338 return 1;
14341 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14342 by mode size. */
14343 inline static rtx
14344 next_consecutive_mem (rtx mem)
14346 machine_mode mode = GET_MODE (mem);
14347 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14348 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14350 return adjust_automodify_address (mem, mode, addr, offset);
14353 /* Copy using LDRD/STRD instructions whenever possible.
14354 Returns true upon success. */
14355 bool
14356 gen_movmem_ldrd_strd (rtx *operands)
14358 unsigned HOST_WIDE_INT len;
14359 HOST_WIDE_INT align;
14360 rtx src, dst, base;
14361 rtx reg0;
14362 bool src_aligned, dst_aligned;
14363 bool src_volatile, dst_volatile;
14365 gcc_assert (CONST_INT_P (operands[2]));
14366 gcc_assert (CONST_INT_P (operands[3]));
14368 len = UINTVAL (operands[2]);
14369 if (len > 64)
14370 return false;
14372 /* Maximum alignment we can assume for both src and dst buffers. */
14373 align = INTVAL (operands[3]);
14375 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14376 return false;
14378 /* Place src and dst addresses in registers
14379 and update the corresponding mem rtx. */
14380 dst = operands[0];
14381 dst_volatile = MEM_VOLATILE_P (dst);
14382 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14383 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14384 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14386 src = operands[1];
14387 src_volatile = MEM_VOLATILE_P (src);
14388 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14389 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14390 src = adjust_automodify_address (src, VOIDmode, base, 0);
14392 if (!unaligned_access && !(src_aligned && dst_aligned))
14393 return false;
14395 if (src_volatile || dst_volatile)
14396 return false;
14398 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14399 if (!(dst_aligned || src_aligned))
14400 return arm_gen_movmemqi (operands);
14402 /* If the either src or dst is unaligned we'll be accessing it as pairs
14403 of unaligned SImode accesses. Otherwise we can generate DImode
14404 ldrd/strd instructions. */
14405 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14406 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14408 while (len >= 8)
14410 len -= 8;
14411 reg0 = gen_reg_rtx (DImode);
14412 rtx low_reg = NULL_RTX;
14413 rtx hi_reg = NULL_RTX;
14415 if (!src_aligned || !dst_aligned)
14417 low_reg = gen_lowpart (SImode, reg0);
14418 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14420 if (src_aligned)
14421 emit_move_insn (reg0, src);
14422 else
14424 emit_insn (gen_unaligned_loadsi (low_reg, src));
14425 src = next_consecutive_mem (src);
14426 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14429 if (dst_aligned)
14430 emit_move_insn (dst, reg0);
14431 else
14433 emit_insn (gen_unaligned_storesi (dst, low_reg));
14434 dst = next_consecutive_mem (dst);
14435 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14438 src = next_consecutive_mem (src);
14439 dst = next_consecutive_mem (dst);
14442 gcc_assert (len < 8);
14443 if (len >= 4)
14445 /* More than a word but less than a double-word to copy. Copy a word. */
14446 reg0 = gen_reg_rtx (SImode);
14447 src = adjust_address (src, SImode, 0);
14448 dst = adjust_address (dst, SImode, 0);
14449 if (src_aligned)
14450 emit_move_insn (reg0, src);
14451 else
14452 emit_insn (gen_unaligned_loadsi (reg0, src));
14454 if (dst_aligned)
14455 emit_move_insn (dst, reg0);
14456 else
14457 emit_insn (gen_unaligned_storesi (dst, reg0));
14459 src = next_consecutive_mem (src);
14460 dst = next_consecutive_mem (dst);
14461 len -= 4;
14464 if (len == 0)
14465 return true;
14467 /* Copy the remaining bytes. */
14468 if (len >= 2)
14470 dst = adjust_address (dst, HImode, 0);
14471 src = adjust_address (src, HImode, 0);
14472 reg0 = gen_reg_rtx (SImode);
14473 if (src_aligned)
14474 emit_insn (gen_zero_extendhisi2 (reg0, src));
14475 else
14476 emit_insn (gen_unaligned_loadhiu (reg0, src));
14478 if (dst_aligned)
14479 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14480 else
14481 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14483 src = next_consecutive_mem (src);
14484 dst = next_consecutive_mem (dst);
14485 if (len == 2)
14486 return true;
14489 dst = adjust_address (dst, QImode, 0);
14490 src = adjust_address (src, QImode, 0);
14491 reg0 = gen_reg_rtx (QImode);
14492 emit_move_insn (reg0, src);
14493 emit_move_insn (dst, reg0);
14494 return true;
14497 /* Select a dominance comparison mode if possible for a test of the general
14498 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14499 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14500 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14501 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14502 In all cases OP will be either EQ or NE, but we don't need to know which
14503 here. If we are unable to support a dominance comparison we return
14504 CC mode. This will then fail to match for the RTL expressions that
14505 generate this call. */
14506 machine_mode
14507 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14509 enum rtx_code cond1, cond2;
14510 int swapped = 0;
14512 /* Currently we will probably get the wrong result if the individual
14513 comparisons are not simple. This also ensures that it is safe to
14514 reverse a comparison if necessary. */
14515 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14516 != CCmode)
14517 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14518 != CCmode))
14519 return CCmode;
14521 /* The if_then_else variant of this tests the second condition if the
14522 first passes, but is true if the first fails. Reverse the first
14523 condition to get a true "inclusive-or" expression. */
14524 if (cond_or == DOM_CC_NX_OR_Y)
14525 cond1 = reverse_condition (cond1);
14527 /* If the comparisons are not equal, and one doesn't dominate the other,
14528 then we can't do this. */
14529 if (cond1 != cond2
14530 && !comparison_dominates_p (cond1, cond2)
14531 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14532 return CCmode;
14534 if (swapped)
14535 std::swap (cond1, cond2);
14537 switch (cond1)
14539 case EQ:
14540 if (cond_or == DOM_CC_X_AND_Y)
14541 return CC_DEQmode;
14543 switch (cond2)
14545 case EQ: return CC_DEQmode;
14546 case LE: return CC_DLEmode;
14547 case LEU: return CC_DLEUmode;
14548 case GE: return CC_DGEmode;
14549 case GEU: return CC_DGEUmode;
14550 default: gcc_unreachable ();
14553 case LT:
14554 if (cond_or == DOM_CC_X_AND_Y)
14555 return CC_DLTmode;
14557 switch (cond2)
14559 case LT:
14560 return CC_DLTmode;
14561 case LE:
14562 return CC_DLEmode;
14563 case NE:
14564 return CC_DNEmode;
14565 default:
14566 gcc_unreachable ();
14569 case GT:
14570 if (cond_or == DOM_CC_X_AND_Y)
14571 return CC_DGTmode;
14573 switch (cond2)
14575 case GT:
14576 return CC_DGTmode;
14577 case GE:
14578 return CC_DGEmode;
14579 case NE:
14580 return CC_DNEmode;
14581 default:
14582 gcc_unreachable ();
14585 case LTU:
14586 if (cond_or == DOM_CC_X_AND_Y)
14587 return CC_DLTUmode;
14589 switch (cond2)
14591 case LTU:
14592 return CC_DLTUmode;
14593 case LEU:
14594 return CC_DLEUmode;
14595 case NE:
14596 return CC_DNEmode;
14597 default:
14598 gcc_unreachable ();
14601 case GTU:
14602 if (cond_or == DOM_CC_X_AND_Y)
14603 return CC_DGTUmode;
14605 switch (cond2)
14607 case GTU:
14608 return CC_DGTUmode;
14609 case GEU:
14610 return CC_DGEUmode;
14611 case NE:
14612 return CC_DNEmode;
14613 default:
14614 gcc_unreachable ();
14617 /* The remaining cases only occur when both comparisons are the
14618 same. */
14619 case NE:
14620 gcc_assert (cond1 == cond2);
14621 return CC_DNEmode;
14623 case LE:
14624 gcc_assert (cond1 == cond2);
14625 return CC_DLEmode;
14627 case GE:
14628 gcc_assert (cond1 == cond2);
14629 return CC_DGEmode;
14631 case LEU:
14632 gcc_assert (cond1 == cond2);
14633 return CC_DLEUmode;
14635 case GEU:
14636 gcc_assert (cond1 == cond2);
14637 return CC_DGEUmode;
14639 default:
14640 gcc_unreachable ();
14644 machine_mode
14645 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14647 /* All floating point compares return CCFP if it is an equality
14648 comparison, and CCFPE otherwise. */
14649 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14651 switch (op)
14653 case EQ:
14654 case NE:
14655 case UNORDERED:
14656 case ORDERED:
14657 case UNLT:
14658 case UNLE:
14659 case UNGT:
14660 case UNGE:
14661 case UNEQ:
14662 case LTGT:
14663 return CCFPmode;
14665 case LT:
14666 case LE:
14667 case GT:
14668 case GE:
14669 return CCFPEmode;
14671 default:
14672 gcc_unreachable ();
14676 /* A compare with a shifted operand. Because of canonicalization, the
14677 comparison will have to be swapped when we emit the assembler. */
14678 if (GET_MODE (y) == SImode
14679 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14680 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14681 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14682 || GET_CODE (x) == ROTATERT))
14683 return CC_SWPmode;
14685 /* This operation is performed swapped, but since we only rely on the Z
14686 flag we don't need an additional mode. */
14687 if (GET_MODE (y) == SImode
14688 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14689 && GET_CODE (x) == NEG
14690 && (op == EQ || op == NE))
14691 return CC_Zmode;
14693 /* This is a special case that is used by combine to allow a
14694 comparison of a shifted byte load to be split into a zero-extend
14695 followed by a comparison of the shifted integer (only valid for
14696 equalities and unsigned inequalities). */
14697 if (GET_MODE (x) == SImode
14698 && GET_CODE (x) == ASHIFT
14699 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14700 && GET_CODE (XEXP (x, 0)) == SUBREG
14701 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14702 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14703 && (op == EQ || op == NE
14704 || op == GEU || op == GTU || op == LTU || op == LEU)
14705 && CONST_INT_P (y))
14706 return CC_Zmode;
14708 /* A construct for a conditional compare, if the false arm contains
14709 0, then both conditions must be true, otherwise either condition
14710 must be true. Not all conditions are possible, so CCmode is
14711 returned if it can't be done. */
14712 if (GET_CODE (x) == IF_THEN_ELSE
14713 && (XEXP (x, 2) == const0_rtx
14714 || XEXP (x, 2) == const1_rtx)
14715 && COMPARISON_P (XEXP (x, 0))
14716 && COMPARISON_P (XEXP (x, 1)))
14717 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14718 INTVAL (XEXP (x, 2)));
14720 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14721 if (GET_CODE (x) == AND
14722 && (op == EQ || op == NE)
14723 && COMPARISON_P (XEXP (x, 0))
14724 && COMPARISON_P (XEXP (x, 1)))
14725 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14726 DOM_CC_X_AND_Y);
14728 if (GET_CODE (x) == IOR
14729 && (op == EQ || op == NE)
14730 && COMPARISON_P (XEXP (x, 0))
14731 && COMPARISON_P (XEXP (x, 1)))
14732 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14733 DOM_CC_X_OR_Y);
14735 /* An operation (on Thumb) where we want to test for a single bit.
14736 This is done by shifting that bit up into the top bit of a
14737 scratch register; we can then branch on the sign bit. */
14738 if (TARGET_THUMB1
14739 && GET_MODE (x) == SImode
14740 && (op == EQ || op == NE)
14741 && GET_CODE (x) == ZERO_EXTRACT
14742 && XEXP (x, 1) == const1_rtx)
14743 return CC_Nmode;
14745 /* An operation that sets the condition codes as a side-effect, the
14746 V flag is not set correctly, so we can only use comparisons where
14747 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14748 instead.) */
14749 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14750 if (GET_MODE (x) == SImode
14751 && y == const0_rtx
14752 && (op == EQ || op == NE || op == LT || op == GE)
14753 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14754 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14755 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14756 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14757 || GET_CODE (x) == LSHIFTRT
14758 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14759 || GET_CODE (x) == ROTATERT
14760 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14761 return CC_NOOVmode;
14763 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14764 return CC_Zmode;
14766 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14767 && GET_CODE (x) == PLUS
14768 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14769 return CC_Cmode;
14771 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14773 switch (op)
14775 case EQ:
14776 case NE:
14777 /* A DImode comparison against zero can be implemented by
14778 or'ing the two halves together. */
14779 if (y == const0_rtx)
14780 return CC_Zmode;
14782 /* We can do an equality test in three Thumb instructions. */
14783 if (!TARGET_32BIT)
14784 return CC_Zmode;
14786 /* FALLTHROUGH */
14788 case LTU:
14789 case LEU:
14790 case GTU:
14791 case GEU:
14792 /* DImode unsigned comparisons can be implemented by cmp +
14793 cmpeq without a scratch register. Not worth doing in
14794 Thumb-2. */
14795 if (TARGET_32BIT)
14796 return CC_CZmode;
14798 /* FALLTHROUGH */
14800 case LT:
14801 case LE:
14802 case GT:
14803 case GE:
14804 /* DImode signed and unsigned comparisons can be implemented
14805 by cmp + sbcs with a scratch register, but that does not
14806 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14807 gcc_assert (op != EQ && op != NE);
14808 return CC_NCVmode;
14810 default:
14811 gcc_unreachable ();
14815 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14816 return GET_MODE (x);
14818 return CCmode;
14821 /* X and Y are two things to compare using CODE. Emit the compare insn and
14822 return the rtx for register 0 in the proper mode. FP means this is a
14823 floating point compare: I don't think that it is needed on the arm. */
14825 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14827 machine_mode mode;
14828 rtx cc_reg;
14829 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14831 /* We might have X as a constant, Y as a register because of the predicates
14832 used for cmpdi. If so, force X to a register here. */
14833 if (dimode_comparison && !REG_P (x))
14834 x = force_reg (DImode, x);
14836 mode = SELECT_CC_MODE (code, x, y);
14837 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14839 if (dimode_comparison
14840 && mode != CC_CZmode)
14842 rtx clobber, set;
14844 /* To compare two non-zero values for equality, XOR them and
14845 then compare against zero. Not used for ARM mode; there
14846 CC_CZmode is cheaper. */
14847 if (mode == CC_Zmode && y != const0_rtx)
14849 gcc_assert (!reload_completed);
14850 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14851 y = const0_rtx;
14854 /* A scratch register is required. */
14855 if (reload_completed)
14856 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14857 else
14858 scratch = gen_rtx_SCRATCH (SImode);
14860 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14861 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14862 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14864 else
14865 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14867 return cc_reg;
14870 /* Generate a sequence of insns that will generate the correct return
14871 address mask depending on the physical architecture that the program
14872 is running on. */
14874 arm_gen_return_addr_mask (void)
14876 rtx reg = gen_reg_rtx (Pmode);
14878 emit_insn (gen_return_addr_mask (reg));
14879 return reg;
14882 void
14883 arm_reload_in_hi (rtx *operands)
14885 rtx ref = operands[1];
14886 rtx base, scratch;
14887 HOST_WIDE_INT offset = 0;
14889 if (GET_CODE (ref) == SUBREG)
14891 offset = SUBREG_BYTE (ref);
14892 ref = SUBREG_REG (ref);
14895 if (REG_P (ref))
14897 /* We have a pseudo which has been spilt onto the stack; there
14898 are two cases here: the first where there is a simple
14899 stack-slot replacement and a second where the stack-slot is
14900 out of range, or is used as a subreg. */
14901 if (reg_equiv_mem (REGNO (ref)))
14903 ref = reg_equiv_mem (REGNO (ref));
14904 base = find_replacement (&XEXP (ref, 0));
14906 else
14907 /* The slot is out of range, or was dressed up in a SUBREG. */
14908 base = reg_equiv_address (REGNO (ref));
14910 /* PR 62554: If there is no equivalent memory location then just move
14911 the value as an SImode register move. This happens when the target
14912 architecture variant does not have an HImode register move. */
14913 if (base == NULL)
14915 gcc_assert (REG_P (operands[0]));
14916 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14917 gen_rtx_SUBREG (SImode, ref, 0)));
14918 return;
14921 else
14922 base = find_replacement (&XEXP (ref, 0));
14924 /* Handle the case where the address is too complex to be offset by 1. */
14925 if (GET_CODE (base) == MINUS
14926 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14928 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14930 emit_set_insn (base_plus, base);
14931 base = base_plus;
14933 else if (GET_CODE (base) == PLUS)
14935 /* The addend must be CONST_INT, or we would have dealt with it above. */
14936 HOST_WIDE_INT hi, lo;
14938 offset += INTVAL (XEXP (base, 1));
14939 base = XEXP (base, 0);
14941 /* Rework the address into a legal sequence of insns. */
14942 /* Valid range for lo is -4095 -> 4095 */
14943 lo = (offset >= 0
14944 ? (offset & 0xfff)
14945 : -((-offset) & 0xfff));
14947 /* Corner case, if lo is the max offset then we would be out of range
14948 once we have added the additional 1 below, so bump the msb into the
14949 pre-loading insn(s). */
14950 if (lo == 4095)
14951 lo &= 0x7ff;
14953 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14954 ^ (HOST_WIDE_INT) 0x80000000)
14955 - (HOST_WIDE_INT) 0x80000000);
14957 gcc_assert (hi + lo == offset);
14959 if (hi != 0)
14961 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14963 /* Get the base address; addsi3 knows how to handle constants
14964 that require more than one insn. */
14965 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14966 base = base_plus;
14967 offset = lo;
14971 /* Operands[2] may overlap operands[0] (though it won't overlap
14972 operands[1]), that's why we asked for a DImode reg -- so we can
14973 use the bit that does not overlap. */
14974 if (REGNO (operands[2]) == REGNO (operands[0]))
14975 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14976 else
14977 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14979 emit_insn (gen_zero_extendqisi2 (scratch,
14980 gen_rtx_MEM (QImode,
14981 plus_constant (Pmode, base,
14982 offset))));
14983 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14984 gen_rtx_MEM (QImode,
14985 plus_constant (Pmode, base,
14986 offset + 1))));
14987 if (!BYTES_BIG_ENDIAN)
14988 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14989 gen_rtx_IOR (SImode,
14990 gen_rtx_ASHIFT
14991 (SImode,
14992 gen_rtx_SUBREG (SImode, operands[0], 0),
14993 GEN_INT (8)),
14994 scratch));
14995 else
14996 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14997 gen_rtx_IOR (SImode,
14998 gen_rtx_ASHIFT (SImode, scratch,
14999 GEN_INT (8)),
15000 gen_rtx_SUBREG (SImode, operands[0], 0)));
15003 /* Handle storing a half-word to memory during reload by synthesizing as two
15004 byte stores. Take care not to clobber the input values until after we
15005 have moved them somewhere safe. This code assumes that if the DImode
15006 scratch in operands[2] overlaps either the input value or output address
15007 in some way, then that value must die in this insn (we absolutely need
15008 two scratch registers for some corner cases). */
15009 void
15010 arm_reload_out_hi (rtx *operands)
15012 rtx ref = operands[0];
15013 rtx outval = operands[1];
15014 rtx base, scratch;
15015 HOST_WIDE_INT offset = 0;
15017 if (GET_CODE (ref) == SUBREG)
15019 offset = SUBREG_BYTE (ref);
15020 ref = SUBREG_REG (ref);
15023 if (REG_P (ref))
15025 /* We have a pseudo which has been spilt onto the stack; there
15026 are two cases here: the first where there is a simple
15027 stack-slot replacement and a second where the stack-slot is
15028 out of range, or is used as a subreg. */
15029 if (reg_equiv_mem (REGNO (ref)))
15031 ref = reg_equiv_mem (REGNO (ref));
15032 base = find_replacement (&XEXP (ref, 0));
15034 else
15035 /* The slot is out of range, or was dressed up in a SUBREG. */
15036 base = reg_equiv_address (REGNO (ref));
15038 /* PR 62254: If there is no equivalent memory location then just move
15039 the value as an SImode register move. This happens when the target
15040 architecture variant does not have an HImode register move. */
15041 if (base == NULL)
15043 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15045 if (REG_P (outval))
15047 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15048 gen_rtx_SUBREG (SImode, outval, 0)));
15050 else /* SUBREG_P (outval) */
15052 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15053 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15054 SUBREG_REG (outval)));
15055 else
15056 /* FIXME: Handle other cases ? */
15057 gcc_unreachable ();
15059 return;
15062 else
15063 base = find_replacement (&XEXP (ref, 0));
15065 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15067 /* Handle the case where the address is too complex to be offset by 1. */
15068 if (GET_CODE (base) == MINUS
15069 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15071 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15073 /* Be careful not to destroy OUTVAL. */
15074 if (reg_overlap_mentioned_p (base_plus, outval))
15076 /* Updating base_plus might destroy outval, see if we can
15077 swap the scratch and base_plus. */
15078 if (!reg_overlap_mentioned_p (scratch, outval))
15079 std::swap (scratch, base_plus);
15080 else
15082 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15084 /* Be conservative and copy OUTVAL into the scratch now,
15085 this should only be necessary if outval is a subreg
15086 of something larger than a word. */
15087 /* XXX Might this clobber base? I can't see how it can,
15088 since scratch is known to overlap with OUTVAL, and
15089 must be wider than a word. */
15090 emit_insn (gen_movhi (scratch_hi, outval));
15091 outval = scratch_hi;
15095 emit_set_insn (base_plus, base);
15096 base = base_plus;
15098 else if (GET_CODE (base) == PLUS)
15100 /* The addend must be CONST_INT, or we would have dealt with it above. */
15101 HOST_WIDE_INT hi, lo;
15103 offset += INTVAL (XEXP (base, 1));
15104 base = XEXP (base, 0);
15106 /* Rework the address into a legal sequence of insns. */
15107 /* Valid range for lo is -4095 -> 4095 */
15108 lo = (offset >= 0
15109 ? (offset & 0xfff)
15110 : -((-offset) & 0xfff));
15112 /* Corner case, if lo is the max offset then we would be out of range
15113 once we have added the additional 1 below, so bump the msb into the
15114 pre-loading insn(s). */
15115 if (lo == 4095)
15116 lo &= 0x7ff;
15118 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15119 ^ (HOST_WIDE_INT) 0x80000000)
15120 - (HOST_WIDE_INT) 0x80000000);
15122 gcc_assert (hi + lo == offset);
15124 if (hi != 0)
15126 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15128 /* Be careful not to destroy OUTVAL. */
15129 if (reg_overlap_mentioned_p (base_plus, outval))
15131 /* Updating base_plus might destroy outval, see if we
15132 can swap the scratch and base_plus. */
15133 if (!reg_overlap_mentioned_p (scratch, outval))
15134 std::swap (scratch, base_plus);
15135 else
15137 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15139 /* Be conservative and copy outval into scratch now,
15140 this should only be necessary if outval is a
15141 subreg of something larger than a word. */
15142 /* XXX Might this clobber base? I can't see how it
15143 can, since scratch is known to overlap with
15144 outval. */
15145 emit_insn (gen_movhi (scratch_hi, outval));
15146 outval = scratch_hi;
15150 /* Get the base address; addsi3 knows how to handle constants
15151 that require more than one insn. */
15152 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15153 base = base_plus;
15154 offset = lo;
15158 if (BYTES_BIG_ENDIAN)
15160 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15161 plus_constant (Pmode, base,
15162 offset + 1)),
15163 gen_lowpart (QImode, outval)));
15164 emit_insn (gen_lshrsi3 (scratch,
15165 gen_rtx_SUBREG (SImode, outval, 0),
15166 GEN_INT (8)));
15167 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15168 offset)),
15169 gen_lowpart (QImode, scratch)));
15171 else
15173 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15174 offset)),
15175 gen_lowpart (QImode, outval)));
15176 emit_insn (gen_lshrsi3 (scratch,
15177 gen_rtx_SUBREG (SImode, outval, 0),
15178 GEN_INT (8)));
15179 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15180 plus_constant (Pmode, base,
15181 offset + 1)),
15182 gen_lowpart (QImode, scratch)));
15186 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15187 (padded to the size of a word) should be passed in a register. */
15189 static bool
15190 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15192 if (TARGET_AAPCS_BASED)
15193 return must_pass_in_stack_var_size (mode, type);
15194 else
15195 return must_pass_in_stack_var_size_or_pad (mode, type);
15199 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15200 byte of a stack argument has useful data. For legacy APCS ABIs we use
15201 the default. For AAPCS based ABIs small aggregate types are placed
15202 in the lowest memory address. */
15204 static pad_direction
15205 arm_function_arg_padding (machine_mode mode, const_tree type)
15207 if (!TARGET_AAPCS_BASED)
15208 return default_function_arg_padding (mode, type);
15210 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15211 return PAD_DOWNWARD;
15213 return PAD_UPWARD;
15217 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15218 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15219 register has useful data, and return the opposite if the most
15220 significant byte does. */
15222 bool
15223 arm_pad_reg_upward (machine_mode mode,
15224 tree type, int first ATTRIBUTE_UNUSED)
15226 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15228 /* For AAPCS, small aggregates, small fixed-point types,
15229 and small complex types are always padded upwards. */
15230 if (type)
15232 if ((AGGREGATE_TYPE_P (type)
15233 || TREE_CODE (type) == COMPLEX_TYPE
15234 || FIXED_POINT_TYPE_P (type))
15235 && int_size_in_bytes (type) <= 4)
15236 return true;
15238 else
15240 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15241 && GET_MODE_SIZE (mode) <= 4)
15242 return true;
15246 /* Otherwise, use default padding. */
15247 return !BYTES_BIG_ENDIAN;
15250 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15251 assuming that the address in the base register is word aligned. */
15252 bool
15253 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15255 HOST_WIDE_INT max_offset;
15257 /* Offset must be a multiple of 4 in Thumb mode. */
15258 if (TARGET_THUMB2 && ((offset & 3) != 0))
15259 return false;
15261 if (TARGET_THUMB2)
15262 max_offset = 1020;
15263 else if (TARGET_ARM)
15264 max_offset = 255;
15265 else
15266 return false;
15268 return ((offset <= max_offset) && (offset >= -max_offset));
15271 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15272 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15273 Assumes that the address in the base register RN is word aligned. Pattern
15274 guarantees that both memory accesses use the same base register,
15275 the offsets are constants within the range, and the gap between the offsets is 4.
15276 If preload complete then check that registers are legal. WBACK indicates whether
15277 address is updated. LOAD indicates whether memory access is load or store. */
15278 bool
15279 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15280 bool wback, bool load)
15282 unsigned int t, t2, n;
15284 if (!reload_completed)
15285 return true;
15287 if (!offset_ok_for_ldrd_strd (offset))
15288 return false;
15290 t = REGNO (rt);
15291 t2 = REGNO (rt2);
15292 n = REGNO (rn);
15294 if ((TARGET_THUMB2)
15295 && ((wback && (n == t || n == t2))
15296 || (t == SP_REGNUM)
15297 || (t == PC_REGNUM)
15298 || (t2 == SP_REGNUM)
15299 || (t2 == PC_REGNUM)
15300 || (!load && (n == PC_REGNUM))
15301 || (load && (t == t2))
15302 /* Triggers Cortex-M3 LDRD errata. */
15303 || (!wback && load && fix_cm3_ldrd && (n == t))))
15304 return false;
15306 if ((TARGET_ARM)
15307 && ((wback && (n == t || n == t2))
15308 || (t2 == PC_REGNUM)
15309 || (t % 2 != 0) /* First destination register is not even. */
15310 || (t2 != t + 1)
15311 /* PC can be used as base register (for offset addressing only),
15312 but it is depricated. */
15313 || (n == PC_REGNUM)))
15314 return false;
15316 return true;
15319 /* Return true if a 64-bit access with alignment ALIGN and with a
15320 constant offset OFFSET from the base pointer is permitted on this
15321 architecture. */
15322 static bool
15323 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15325 return (unaligned_access
15326 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15327 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15330 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15331 operand MEM's address contains an immediate offset from the base
15332 register and has no side effects, in which case it sets BASE,
15333 OFFSET and ALIGN accordingly. */
15334 static bool
15335 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15337 rtx addr;
15339 gcc_assert (base != NULL && offset != NULL);
15341 /* TODO: Handle more general memory operand patterns, such as
15342 PRE_DEC and PRE_INC. */
15344 if (side_effects_p (mem))
15345 return false;
15347 /* Can't deal with subregs. */
15348 if (GET_CODE (mem) == SUBREG)
15349 return false;
15351 gcc_assert (MEM_P (mem));
15353 *offset = const0_rtx;
15354 *align = MEM_ALIGN (mem);
15356 addr = XEXP (mem, 0);
15358 /* If addr isn't valid for DImode, then we can't handle it. */
15359 if (!arm_legitimate_address_p (DImode, addr,
15360 reload_in_progress || reload_completed))
15361 return false;
15363 if (REG_P (addr))
15365 *base = addr;
15366 return true;
15368 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15370 *base = XEXP (addr, 0);
15371 *offset = XEXP (addr, 1);
15372 return (REG_P (*base) && CONST_INT_P (*offset));
15375 return false;
15378 /* Called from a peephole2 to replace two word-size accesses with a
15379 single LDRD/STRD instruction. Returns true iff we can generate a
15380 new instruction sequence. That is, both accesses use the same base
15381 register and the gap between constant offsets is 4. This function
15382 may reorder its operands to match ldrd/strd RTL templates.
15383 OPERANDS are the operands found by the peephole matcher;
15384 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15385 corresponding memory operands. LOAD indicaates whether the access
15386 is load or store. CONST_STORE indicates a store of constant
15387 integer values held in OPERANDS[4,5] and assumes that the pattern
15388 is of length 4 insn, for the purpose of checking dead registers.
15389 COMMUTE indicates that register operands may be reordered. */
15390 bool
15391 gen_operands_ldrd_strd (rtx *operands, bool load,
15392 bool const_store, bool commute)
15394 int nops = 2;
15395 HOST_WIDE_INT offsets[2], offset, align[2];
15396 rtx base = NULL_RTX;
15397 rtx cur_base, cur_offset, tmp;
15398 int i, gap;
15399 HARD_REG_SET regset;
15401 gcc_assert (!const_store || !load);
15402 /* Check that the memory references are immediate offsets from the
15403 same base register. Extract the base register, the destination
15404 registers, and the corresponding memory offsets. */
15405 for (i = 0; i < nops; i++)
15407 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15408 &align[i]))
15409 return false;
15411 if (i == 0)
15412 base = cur_base;
15413 else if (REGNO (base) != REGNO (cur_base))
15414 return false;
15416 offsets[i] = INTVAL (cur_offset);
15417 if (GET_CODE (operands[i]) == SUBREG)
15419 tmp = SUBREG_REG (operands[i]);
15420 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15421 operands[i] = tmp;
15425 /* Make sure there is no dependency between the individual loads. */
15426 if (load && REGNO (operands[0]) == REGNO (base))
15427 return false; /* RAW */
15429 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15430 return false; /* WAW */
15432 /* If the same input register is used in both stores
15433 when storing different constants, try to find a free register.
15434 For example, the code
15435 mov r0, 0
15436 str r0, [r2]
15437 mov r0, 1
15438 str r0, [r2, #4]
15439 can be transformed into
15440 mov r1, 0
15441 mov r0, 1
15442 strd r1, r0, [r2]
15443 in Thumb mode assuming that r1 is free.
15444 For ARM mode do the same but only if the starting register
15445 can be made to be even. */
15446 if (const_store
15447 && REGNO (operands[0]) == REGNO (operands[1])
15448 && INTVAL (operands[4]) != INTVAL (operands[5]))
15450 if (TARGET_THUMB2)
15452 CLEAR_HARD_REG_SET (regset);
15453 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15454 if (tmp == NULL_RTX)
15455 return false;
15457 /* Use the new register in the first load to ensure that
15458 if the original input register is not dead after peephole,
15459 then it will have the correct constant value. */
15460 operands[0] = tmp;
15462 else if (TARGET_ARM)
15464 int regno = REGNO (operands[0]);
15465 if (!peep2_reg_dead_p (4, operands[0]))
15467 /* When the input register is even and is not dead after the
15468 pattern, it has to hold the second constant but we cannot
15469 form a legal STRD in ARM mode with this register as the second
15470 register. */
15471 if (regno % 2 == 0)
15472 return false;
15474 /* Is regno-1 free? */
15475 SET_HARD_REG_SET (regset);
15476 CLEAR_HARD_REG_BIT(regset, regno - 1);
15477 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15478 if (tmp == NULL_RTX)
15479 return false;
15481 operands[0] = tmp;
15483 else
15485 /* Find a DImode register. */
15486 CLEAR_HARD_REG_SET (regset);
15487 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15488 if (tmp != NULL_RTX)
15490 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15491 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15493 else
15495 /* Can we use the input register to form a DI register? */
15496 SET_HARD_REG_SET (regset);
15497 CLEAR_HARD_REG_BIT(regset,
15498 regno % 2 == 0 ? regno + 1 : regno - 1);
15499 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15500 if (tmp == NULL_RTX)
15501 return false;
15502 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15506 gcc_assert (operands[0] != NULL_RTX);
15507 gcc_assert (operands[1] != NULL_RTX);
15508 gcc_assert (REGNO (operands[0]) % 2 == 0);
15509 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15513 /* Make sure the instructions are ordered with lower memory access first. */
15514 if (offsets[0] > offsets[1])
15516 gap = offsets[0] - offsets[1];
15517 offset = offsets[1];
15519 /* Swap the instructions such that lower memory is accessed first. */
15520 std::swap (operands[0], operands[1]);
15521 std::swap (operands[2], operands[3]);
15522 std::swap (align[0], align[1]);
15523 if (const_store)
15524 std::swap (operands[4], operands[5]);
15526 else
15528 gap = offsets[1] - offsets[0];
15529 offset = offsets[0];
15532 /* Make sure accesses are to consecutive memory locations. */
15533 if (gap != 4)
15534 return false;
15536 if (!align_ok_ldrd_strd (align[0], offset))
15537 return false;
15539 /* Make sure we generate legal instructions. */
15540 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15541 false, load))
15542 return true;
15544 /* In Thumb state, where registers are almost unconstrained, there
15545 is little hope to fix it. */
15546 if (TARGET_THUMB2)
15547 return false;
15549 if (load && commute)
15551 /* Try reordering registers. */
15552 std::swap (operands[0], operands[1]);
15553 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15554 false, load))
15555 return true;
15558 if (const_store)
15560 /* If input registers are dead after this pattern, they can be
15561 reordered or replaced by other registers that are free in the
15562 current pattern. */
15563 if (!peep2_reg_dead_p (4, operands[0])
15564 || !peep2_reg_dead_p (4, operands[1]))
15565 return false;
15567 /* Try to reorder the input registers. */
15568 /* For example, the code
15569 mov r0, 0
15570 mov r1, 1
15571 str r1, [r2]
15572 str r0, [r2, #4]
15573 can be transformed into
15574 mov r1, 0
15575 mov r0, 1
15576 strd r0, [r2]
15578 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15579 false, false))
15581 std::swap (operands[0], operands[1]);
15582 return true;
15585 /* Try to find a free DI register. */
15586 CLEAR_HARD_REG_SET (regset);
15587 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15588 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15589 while (true)
15591 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15592 if (tmp == NULL_RTX)
15593 return false;
15595 /* DREG must be an even-numbered register in DImode.
15596 Split it into SI registers. */
15597 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15598 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15599 gcc_assert (operands[0] != NULL_RTX);
15600 gcc_assert (operands[1] != NULL_RTX);
15601 gcc_assert (REGNO (operands[0]) % 2 == 0);
15602 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15604 return (operands_ok_ldrd_strd (operands[0], operands[1],
15605 base, offset,
15606 false, load));
15610 return false;
15616 /* Print a symbolic form of X to the debug file, F. */
15617 static void
15618 arm_print_value (FILE *f, rtx x)
15620 switch (GET_CODE (x))
15622 case CONST_INT:
15623 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15624 return;
15626 case CONST_DOUBLE:
15627 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15628 return;
15630 case CONST_VECTOR:
15632 int i;
15634 fprintf (f, "<");
15635 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15637 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15638 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15639 fputc (',', f);
15641 fprintf (f, ">");
15643 return;
15645 case CONST_STRING:
15646 fprintf (f, "\"%s\"", XSTR (x, 0));
15647 return;
15649 case SYMBOL_REF:
15650 fprintf (f, "`%s'", XSTR (x, 0));
15651 return;
15653 case LABEL_REF:
15654 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15655 return;
15657 case CONST:
15658 arm_print_value (f, XEXP (x, 0));
15659 return;
15661 case PLUS:
15662 arm_print_value (f, XEXP (x, 0));
15663 fprintf (f, "+");
15664 arm_print_value (f, XEXP (x, 1));
15665 return;
15667 case PC:
15668 fprintf (f, "pc");
15669 return;
15671 default:
15672 fprintf (f, "????");
15673 return;
15677 /* Routines for manipulation of the constant pool. */
15679 /* Arm instructions cannot load a large constant directly into a
15680 register; they have to come from a pc relative load. The constant
15681 must therefore be placed in the addressable range of the pc
15682 relative load. Depending on the precise pc relative load
15683 instruction the range is somewhere between 256 bytes and 4k. This
15684 means that we often have to dump a constant inside a function, and
15685 generate code to branch around it.
15687 It is important to minimize this, since the branches will slow
15688 things down and make the code larger.
15690 Normally we can hide the table after an existing unconditional
15691 branch so that there is no interruption of the flow, but in the
15692 worst case the code looks like this:
15694 ldr rn, L1
15696 b L2
15697 align
15698 L1: .long value
15702 ldr rn, L3
15704 b L4
15705 align
15706 L3: .long value
15710 We fix this by performing a scan after scheduling, which notices
15711 which instructions need to have their operands fetched from the
15712 constant table and builds the table.
15714 The algorithm starts by building a table of all the constants that
15715 need fixing up and all the natural barriers in the function (places
15716 where a constant table can be dropped without breaking the flow).
15717 For each fixup we note how far the pc-relative replacement will be
15718 able to reach and the offset of the instruction into the function.
15720 Having built the table we then group the fixes together to form
15721 tables that are as large as possible (subject to addressing
15722 constraints) and emit each table of constants after the last
15723 barrier that is within range of all the instructions in the group.
15724 If a group does not contain a barrier, then we forcibly create one
15725 by inserting a jump instruction into the flow. Once the table has
15726 been inserted, the insns are then modified to reference the
15727 relevant entry in the pool.
15729 Possible enhancements to the algorithm (not implemented) are:
15731 1) For some processors and object formats, there may be benefit in
15732 aligning the pools to the start of cache lines; this alignment
15733 would need to be taken into account when calculating addressability
15734 of a pool. */
15736 /* These typedefs are located at the start of this file, so that
15737 they can be used in the prototypes there. This comment is to
15738 remind readers of that fact so that the following structures
15739 can be understood more easily.
15741 typedef struct minipool_node Mnode;
15742 typedef struct minipool_fixup Mfix; */
15744 struct minipool_node
15746 /* Doubly linked chain of entries. */
15747 Mnode * next;
15748 Mnode * prev;
15749 /* The maximum offset into the code that this entry can be placed. While
15750 pushing fixes for forward references, all entries are sorted in order
15751 of increasing max_address. */
15752 HOST_WIDE_INT max_address;
15753 /* Similarly for an entry inserted for a backwards ref. */
15754 HOST_WIDE_INT min_address;
15755 /* The number of fixes referencing this entry. This can become zero
15756 if we "unpush" an entry. In this case we ignore the entry when we
15757 come to emit the code. */
15758 int refcount;
15759 /* The offset from the start of the minipool. */
15760 HOST_WIDE_INT offset;
15761 /* The value in table. */
15762 rtx value;
15763 /* The mode of value. */
15764 machine_mode mode;
15765 /* The size of the value. With iWMMXt enabled
15766 sizes > 4 also imply an alignment of 8-bytes. */
15767 int fix_size;
15770 struct minipool_fixup
15772 Mfix * next;
15773 rtx_insn * insn;
15774 HOST_WIDE_INT address;
15775 rtx * loc;
15776 machine_mode mode;
15777 int fix_size;
15778 rtx value;
15779 Mnode * minipool;
15780 HOST_WIDE_INT forwards;
15781 HOST_WIDE_INT backwards;
15784 /* Fixes less than a word need padding out to a word boundary. */
15785 #define MINIPOOL_FIX_SIZE(mode) \
15786 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15788 static Mnode * minipool_vector_head;
15789 static Mnode * minipool_vector_tail;
15790 static rtx_code_label *minipool_vector_label;
15791 static int minipool_pad;
15793 /* The linked list of all minipool fixes required for this function. */
15794 Mfix * minipool_fix_head;
15795 Mfix * minipool_fix_tail;
15796 /* The fix entry for the current minipool, once it has been placed. */
15797 Mfix * minipool_barrier;
15799 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15800 #define JUMP_TABLES_IN_TEXT_SECTION 0
15801 #endif
15803 static HOST_WIDE_INT
15804 get_jump_table_size (rtx_jump_table_data *insn)
15806 /* ADDR_VECs only take room if read-only data does into the text
15807 section. */
15808 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15810 rtx body = PATTERN (insn);
15811 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15812 HOST_WIDE_INT size;
15813 HOST_WIDE_INT modesize;
15815 modesize = GET_MODE_SIZE (GET_MODE (body));
15816 size = modesize * XVECLEN (body, elt);
15817 switch (modesize)
15819 case 1:
15820 /* Round up size of TBB table to a halfword boundary. */
15821 size = (size + 1) & ~HOST_WIDE_INT_1;
15822 break;
15823 case 2:
15824 /* No padding necessary for TBH. */
15825 break;
15826 case 4:
15827 /* Add two bytes for alignment on Thumb. */
15828 if (TARGET_THUMB)
15829 size += 2;
15830 break;
15831 default:
15832 gcc_unreachable ();
15834 return size;
15837 return 0;
15840 /* Return the maximum amount of padding that will be inserted before
15841 label LABEL. */
15843 static HOST_WIDE_INT
15844 get_label_padding (rtx label)
15846 HOST_WIDE_INT align, min_insn_size;
15848 align = 1 << label_to_alignment (label);
15849 min_insn_size = TARGET_THUMB ? 2 : 4;
15850 return align > min_insn_size ? align - min_insn_size : 0;
15853 /* Move a minipool fix MP from its current location to before MAX_MP.
15854 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15855 constraints may need updating. */
15856 static Mnode *
15857 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15858 HOST_WIDE_INT max_address)
15860 /* The code below assumes these are different. */
15861 gcc_assert (mp != max_mp);
15863 if (max_mp == NULL)
15865 if (max_address < mp->max_address)
15866 mp->max_address = max_address;
15868 else
15870 if (max_address > max_mp->max_address - mp->fix_size)
15871 mp->max_address = max_mp->max_address - mp->fix_size;
15872 else
15873 mp->max_address = max_address;
15875 /* Unlink MP from its current position. Since max_mp is non-null,
15876 mp->prev must be non-null. */
15877 mp->prev->next = mp->next;
15878 if (mp->next != NULL)
15879 mp->next->prev = mp->prev;
15880 else
15881 minipool_vector_tail = mp->prev;
15883 /* Re-insert it before MAX_MP. */
15884 mp->next = max_mp;
15885 mp->prev = max_mp->prev;
15886 max_mp->prev = mp;
15888 if (mp->prev != NULL)
15889 mp->prev->next = mp;
15890 else
15891 minipool_vector_head = mp;
15894 /* Save the new entry. */
15895 max_mp = mp;
15897 /* Scan over the preceding entries and adjust their addresses as
15898 required. */
15899 while (mp->prev != NULL
15900 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15902 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15903 mp = mp->prev;
15906 return max_mp;
15909 /* Add a constant to the minipool for a forward reference. Returns the
15910 node added or NULL if the constant will not fit in this pool. */
15911 static Mnode *
15912 add_minipool_forward_ref (Mfix *fix)
15914 /* If set, max_mp is the first pool_entry that has a lower
15915 constraint than the one we are trying to add. */
15916 Mnode * max_mp = NULL;
15917 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15918 Mnode * mp;
15920 /* If the minipool starts before the end of FIX->INSN then this FIX
15921 can not be placed into the current pool. Furthermore, adding the
15922 new constant pool entry may cause the pool to start FIX_SIZE bytes
15923 earlier. */
15924 if (minipool_vector_head &&
15925 (fix->address + get_attr_length (fix->insn)
15926 >= minipool_vector_head->max_address - fix->fix_size))
15927 return NULL;
15929 /* Scan the pool to see if a constant with the same value has
15930 already been added. While we are doing this, also note the
15931 location where we must insert the constant if it doesn't already
15932 exist. */
15933 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15935 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15936 && fix->mode == mp->mode
15937 && (!LABEL_P (fix->value)
15938 || (CODE_LABEL_NUMBER (fix->value)
15939 == CODE_LABEL_NUMBER (mp->value)))
15940 && rtx_equal_p (fix->value, mp->value))
15942 /* More than one fix references this entry. */
15943 mp->refcount++;
15944 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15947 /* Note the insertion point if necessary. */
15948 if (max_mp == NULL
15949 && mp->max_address > max_address)
15950 max_mp = mp;
15952 /* If we are inserting an 8-bytes aligned quantity and
15953 we have not already found an insertion point, then
15954 make sure that all such 8-byte aligned quantities are
15955 placed at the start of the pool. */
15956 if (ARM_DOUBLEWORD_ALIGN
15957 && max_mp == NULL
15958 && fix->fix_size >= 8
15959 && mp->fix_size < 8)
15961 max_mp = mp;
15962 max_address = mp->max_address;
15966 /* The value is not currently in the minipool, so we need to create
15967 a new entry for it. If MAX_MP is NULL, the entry will be put on
15968 the end of the list since the placement is less constrained than
15969 any existing entry. Otherwise, we insert the new fix before
15970 MAX_MP and, if necessary, adjust the constraints on the other
15971 entries. */
15972 mp = XNEW (Mnode);
15973 mp->fix_size = fix->fix_size;
15974 mp->mode = fix->mode;
15975 mp->value = fix->value;
15976 mp->refcount = 1;
15977 /* Not yet required for a backwards ref. */
15978 mp->min_address = -65536;
15980 if (max_mp == NULL)
15982 mp->max_address = max_address;
15983 mp->next = NULL;
15984 mp->prev = minipool_vector_tail;
15986 if (mp->prev == NULL)
15988 minipool_vector_head = mp;
15989 minipool_vector_label = gen_label_rtx ();
15991 else
15992 mp->prev->next = mp;
15994 minipool_vector_tail = mp;
15996 else
15998 if (max_address > max_mp->max_address - mp->fix_size)
15999 mp->max_address = max_mp->max_address - mp->fix_size;
16000 else
16001 mp->max_address = max_address;
16003 mp->next = max_mp;
16004 mp->prev = max_mp->prev;
16005 max_mp->prev = mp;
16006 if (mp->prev != NULL)
16007 mp->prev->next = mp;
16008 else
16009 minipool_vector_head = mp;
16012 /* Save the new entry. */
16013 max_mp = mp;
16015 /* Scan over the preceding entries and adjust their addresses as
16016 required. */
16017 while (mp->prev != NULL
16018 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16020 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16021 mp = mp->prev;
16024 return max_mp;
16027 static Mnode *
16028 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16029 HOST_WIDE_INT min_address)
16031 HOST_WIDE_INT offset;
16033 /* The code below assumes these are different. */
16034 gcc_assert (mp != min_mp);
16036 if (min_mp == NULL)
16038 if (min_address > mp->min_address)
16039 mp->min_address = min_address;
16041 else
16043 /* We will adjust this below if it is too loose. */
16044 mp->min_address = min_address;
16046 /* Unlink MP from its current position. Since min_mp is non-null,
16047 mp->next must be non-null. */
16048 mp->next->prev = mp->prev;
16049 if (mp->prev != NULL)
16050 mp->prev->next = mp->next;
16051 else
16052 minipool_vector_head = mp->next;
16054 /* Reinsert it after MIN_MP. */
16055 mp->prev = min_mp;
16056 mp->next = min_mp->next;
16057 min_mp->next = mp;
16058 if (mp->next != NULL)
16059 mp->next->prev = mp;
16060 else
16061 minipool_vector_tail = mp;
16064 min_mp = mp;
16066 offset = 0;
16067 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16069 mp->offset = offset;
16070 if (mp->refcount > 0)
16071 offset += mp->fix_size;
16073 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16074 mp->next->min_address = mp->min_address + mp->fix_size;
16077 return min_mp;
16080 /* Add a constant to the minipool for a backward reference. Returns the
16081 node added or NULL if the constant will not fit in this pool.
16083 Note that the code for insertion for a backwards reference can be
16084 somewhat confusing because the calculated offsets for each fix do
16085 not take into account the size of the pool (which is still under
16086 construction. */
16087 static Mnode *
16088 add_minipool_backward_ref (Mfix *fix)
16090 /* If set, min_mp is the last pool_entry that has a lower constraint
16091 than the one we are trying to add. */
16092 Mnode *min_mp = NULL;
16093 /* This can be negative, since it is only a constraint. */
16094 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16095 Mnode *mp;
16097 /* If we can't reach the current pool from this insn, or if we can't
16098 insert this entry at the end of the pool without pushing other
16099 fixes out of range, then we don't try. This ensures that we
16100 can't fail later on. */
16101 if (min_address >= minipool_barrier->address
16102 || (minipool_vector_tail->min_address + fix->fix_size
16103 >= minipool_barrier->address))
16104 return NULL;
16106 /* Scan the pool to see if a constant with the same value has
16107 already been added. While we are doing this, also note the
16108 location where we must insert the constant if it doesn't already
16109 exist. */
16110 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16112 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16113 && fix->mode == mp->mode
16114 && (!LABEL_P (fix->value)
16115 || (CODE_LABEL_NUMBER (fix->value)
16116 == CODE_LABEL_NUMBER (mp->value)))
16117 && rtx_equal_p (fix->value, mp->value)
16118 /* Check that there is enough slack to move this entry to the
16119 end of the table (this is conservative). */
16120 && (mp->max_address
16121 > (minipool_barrier->address
16122 + minipool_vector_tail->offset
16123 + minipool_vector_tail->fix_size)))
16125 mp->refcount++;
16126 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16129 if (min_mp != NULL)
16130 mp->min_address += fix->fix_size;
16131 else
16133 /* Note the insertion point if necessary. */
16134 if (mp->min_address < min_address)
16136 /* For now, we do not allow the insertion of 8-byte alignment
16137 requiring nodes anywhere but at the start of the pool. */
16138 if (ARM_DOUBLEWORD_ALIGN
16139 && fix->fix_size >= 8 && mp->fix_size < 8)
16140 return NULL;
16141 else
16142 min_mp = mp;
16144 else if (mp->max_address
16145 < minipool_barrier->address + mp->offset + fix->fix_size)
16147 /* Inserting before this entry would push the fix beyond
16148 its maximum address (which can happen if we have
16149 re-located a forwards fix); force the new fix to come
16150 after it. */
16151 if (ARM_DOUBLEWORD_ALIGN
16152 && fix->fix_size >= 8 && mp->fix_size < 8)
16153 return NULL;
16154 else
16156 min_mp = mp;
16157 min_address = mp->min_address + fix->fix_size;
16160 /* Do not insert a non-8-byte aligned quantity before 8-byte
16161 aligned quantities. */
16162 else if (ARM_DOUBLEWORD_ALIGN
16163 && fix->fix_size < 8
16164 && mp->fix_size >= 8)
16166 min_mp = mp;
16167 min_address = mp->min_address + fix->fix_size;
16172 /* We need to create a new entry. */
16173 mp = XNEW (Mnode);
16174 mp->fix_size = fix->fix_size;
16175 mp->mode = fix->mode;
16176 mp->value = fix->value;
16177 mp->refcount = 1;
16178 mp->max_address = minipool_barrier->address + 65536;
16180 mp->min_address = min_address;
16182 if (min_mp == NULL)
16184 mp->prev = NULL;
16185 mp->next = minipool_vector_head;
16187 if (mp->next == NULL)
16189 minipool_vector_tail = mp;
16190 minipool_vector_label = gen_label_rtx ();
16192 else
16193 mp->next->prev = mp;
16195 minipool_vector_head = mp;
16197 else
16199 mp->next = min_mp->next;
16200 mp->prev = min_mp;
16201 min_mp->next = mp;
16203 if (mp->next != NULL)
16204 mp->next->prev = mp;
16205 else
16206 minipool_vector_tail = mp;
16209 /* Save the new entry. */
16210 min_mp = mp;
16212 if (mp->prev)
16213 mp = mp->prev;
16214 else
16215 mp->offset = 0;
16217 /* Scan over the following entries and adjust their offsets. */
16218 while (mp->next != NULL)
16220 if (mp->next->min_address < mp->min_address + mp->fix_size)
16221 mp->next->min_address = mp->min_address + mp->fix_size;
16223 if (mp->refcount)
16224 mp->next->offset = mp->offset + mp->fix_size;
16225 else
16226 mp->next->offset = mp->offset;
16228 mp = mp->next;
16231 return min_mp;
16234 static void
16235 assign_minipool_offsets (Mfix *barrier)
16237 HOST_WIDE_INT offset = 0;
16238 Mnode *mp;
16240 minipool_barrier = barrier;
16242 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16244 mp->offset = offset;
16246 if (mp->refcount > 0)
16247 offset += mp->fix_size;
16251 /* Output the literal table */
16252 static void
16253 dump_minipool (rtx_insn *scan)
16255 Mnode * mp;
16256 Mnode * nmp;
16257 int align64 = 0;
16259 if (ARM_DOUBLEWORD_ALIGN)
16260 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16261 if (mp->refcount > 0 && mp->fix_size >= 8)
16263 align64 = 1;
16264 break;
16267 if (dump_file)
16268 fprintf (dump_file,
16269 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16270 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16272 scan = emit_label_after (gen_label_rtx (), scan);
16273 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16274 scan = emit_label_after (minipool_vector_label, scan);
16276 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16278 if (mp->refcount > 0)
16280 if (dump_file)
16282 fprintf (dump_file,
16283 ";; Offset %u, min %ld, max %ld ",
16284 (unsigned) mp->offset, (unsigned long) mp->min_address,
16285 (unsigned long) mp->max_address);
16286 arm_print_value (dump_file, mp->value);
16287 fputc ('\n', dump_file);
16290 rtx val = copy_rtx (mp->value);
16292 switch (GET_MODE_SIZE (mp->mode))
16294 #ifdef HAVE_consttable_1
16295 case 1:
16296 scan = emit_insn_after (gen_consttable_1 (val), scan);
16297 break;
16299 #endif
16300 #ifdef HAVE_consttable_2
16301 case 2:
16302 scan = emit_insn_after (gen_consttable_2 (val), scan);
16303 break;
16305 #endif
16306 #ifdef HAVE_consttable_4
16307 case 4:
16308 scan = emit_insn_after (gen_consttable_4 (val), scan);
16309 break;
16311 #endif
16312 #ifdef HAVE_consttable_8
16313 case 8:
16314 scan = emit_insn_after (gen_consttable_8 (val), scan);
16315 break;
16317 #endif
16318 #ifdef HAVE_consttable_16
16319 case 16:
16320 scan = emit_insn_after (gen_consttable_16 (val), scan);
16321 break;
16323 #endif
16324 default:
16325 gcc_unreachable ();
16329 nmp = mp->next;
16330 free (mp);
16333 minipool_vector_head = minipool_vector_tail = NULL;
16334 scan = emit_insn_after (gen_consttable_end (), scan);
16335 scan = emit_barrier_after (scan);
16338 /* Return the cost of forcibly inserting a barrier after INSN. */
16339 static int
16340 arm_barrier_cost (rtx_insn *insn)
16342 /* Basing the location of the pool on the loop depth is preferable,
16343 but at the moment, the basic block information seems to be
16344 corrupt by this stage of the compilation. */
16345 int base_cost = 50;
16346 rtx_insn *next = next_nonnote_insn (insn);
16348 if (next != NULL && LABEL_P (next))
16349 base_cost -= 20;
16351 switch (GET_CODE (insn))
16353 case CODE_LABEL:
16354 /* It will always be better to place the table before the label, rather
16355 than after it. */
16356 return 50;
16358 case INSN:
16359 case CALL_INSN:
16360 return base_cost;
16362 case JUMP_INSN:
16363 return base_cost - 10;
16365 default:
16366 return base_cost + 10;
16370 /* Find the best place in the insn stream in the range
16371 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16372 Create the barrier by inserting a jump and add a new fix entry for
16373 it. */
16374 static Mfix *
16375 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16377 HOST_WIDE_INT count = 0;
16378 rtx_barrier *barrier;
16379 rtx_insn *from = fix->insn;
16380 /* The instruction after which we will insert the jump. */
16381 rtx_insn *selected = NULL;
16382 int selected_cost;
16383 /* The address at which the jump instruction will be placed. */
16384 HOST_WIDE_INT selected_address;
16385 Mfix * new_fix;
16386 HOST_WIDE_INT max_count = max_address - fix->address;
16387 rtx_code_label *label = gen_label_rtx ();
16389 selected_cost = arm_barrier_cost (from);
16390 selected_address = fix->address;
16392 while (from && count < max_count)
16394 rtx_jump_table_data *tmp;
16395 int new_cost;
16397 /* This code shouldn't have been called if there was a natural barrier
16398 within range. */
16399 gcc_assert (!BARRIER_P (from));
16401 /* Count the length of this insn. This must stay in sync with the
16402 code that pushes minipool fixes. */
16403 if (LABEL_P (from))
16404 count += get_label_padding (from);
16405 else
16406 count += get_attr_length (from);
16408 /* If there is a jump table, add its length. */
16409 if (tablejump_p (from, NULL, &tmp))
16411 count += get_jump_table_size (tmp);
16413 /* Jump tables aren't in a basic block, so base the cost on
16414 the dispatch insn. If we select this location, we will
16415 still put the pool after the table. */
16416 new_cost = arm_barrier_cost (from);
16418 if (count < max_count
16419 && (!selected || new_cost <= selected_cost))
16421 selected = tmp;
16422 selected_cost = new_cost;
16423 selected_address = fix->address + count;
16426 /* Continue after the dispatch table. */
16427 from = NEXT_INSN (tmp);
16428 continue;
16431 new_cost = arm_barrier_cost (from);
16433 if (count < max_count
16434 && (!selected || new_cost <= selected_cost))
16436 selected = from;
16437 selected_cost = new_cost;
16438 selected_address = fix->address + count;
16441 from = NEXT_INSN (from);
16444 /* Make sure that we found a place to insert the jump. */
16445 gcc_assert (selected);
16447 /* Make sure we do not split a call and its corresponding
16448 CALL_ARG_LOCATION note. */
16449 if (CALL_P (selected))
16451 rtx_insn *next = NEXT_INSN (selected);
16452 if (next && NOTE_P (next)
16453 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16454 selected = next;
16457 /* Create a new JUMP_INSN that branches around a barrier. */
16458 from = emit_jump_insn_after (gen_jump (label), selected);
16459 JUMP_LABEL (from) = label;
16460 barrier = emit_barrier_after (from);
16461 emit_label_after (label, barrier);
16463 /* Create a minipool barrier entry for the new barrier. */
16464 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16465 new_fix->insn = barrier;
16466 new_fix->address = selected_address;
16467 new_fix->next = fix->next;
16468 fix->next = new_fix;
16470 return new_fix;
16473 /* Record that there is a natural barrier in the insn stream at
16474 ADDRESS. */
16475 static void
16476 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16478 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16480 fix->insn = insn;
16481 fix->address = address;
16483 fix->next = NULL;
16484 if (minipool_fix_head != NULL)
16485 minipool_fix_tail->next = fix;
16486 else
16487 minipool_fix_head = fix;
16489 minipool_fix_tail = fix;
16492 /* Record INSN, which will need fixing up to load a value from the
16493 minipool. ADDRESS is the offset of the insn since the start of the
16494 function; LOC is a pointer to the part of the insn which requires
16495 fixing; VALUE is the constant that must be loaded, which is of type
16496 MODE. */
16497 static void
16498 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16499 machine_mode mode, rtx value)
16501 gcc_assert (!arm_disable_literal_pool);
16502 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16504 fix->insn = insn;
16505 fix->address = address;
16506 fix->loc = loc;
16507 fix->mode = mode;
16508 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16509 fix->value = value;
16510 fix->forwards = get_attr_pool_range (insn);
16511 fix->backwards = get_attr_neg_pool_range (insn);
16512 fix->minipool = NULL;
16514 /* If an insn doesn't have a range defined for it, then it isn't
16515 expecting to be reworked by this code. Better to stop now than
16516 to generate duff assembly code. */
16517 gcc_assert (fix->forwards || fix->backwards);
16519 /* If an entry requires 8-byte alignment then assume all constant pools
16520 require 4 bytes of padding. Trying to do this later on a per-pool
16521 basis is awkward because existing pool entries have to be modified. */
16522 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16523 minipool_pad = 4;
16525 if (dump_file)
16527 fprintf (dump_file,
16528 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16529 GET_MODE_NAME (mode),
16530 INSN_UID (insn), (unsigned long) address,
16531 -1 * (long)fix->backwards, (long)fix->forwards);
16532 arm_print_value (dump_file, fix->value);
16533 fprintf (dump_file, "\n");
16536 /* Add it to the chain of fixes. */
16537 fix->next = NULL;
16539 if (minipool_fix_head != NULL)
16540 minipool_fix_tail->next = fix;
16541 else
16542 minipool_fix_head = fix;
16544 minipool_fix_tail = fix;
16547 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16548 Returns the number of insns needed, or 99 if we always want to synthesize
16549 the value. */
16551 arm_max_const_double_inline_cost ()
16553 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16556 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16557 Returns the number of insns needed, or 99 if we don't know how to
16558 do it. */
16560 arm_const_double_inline_cost (rtx val)
16562 rtx lowpart, highpart;
16563 machine_mode mode;
16565 mode = GET_MODE (val);
16567 if (mode == VOIDmode)
16568 mode = DImode;
16570 gcc_assert (GET_MODE_SIZE (mode) == 8);
16572 lowpart = gen_lowpart (SImode, val);
16573 highpart = gen_highpart_mode (SImode, mode, val);
16575 gcc_assert (CONST_INT_P (lowpart));
16576 gcc_assert (CONST_INT_P (highpart));
16578 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16579 NULL_RTX, NULL_RTX, 0, 0)
16580 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16581 NULL_RTX, NULL_RTX, 0, 0));
16584 /* Cost of loading a SImode constant. */
16585 static inline int
16586 arm_const_inline_cost (enum rtx_code code, rtx val)
16588 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16589 NULL_RTX, NULL_RTX, 1, 0);
16592 /* Return true if it is worthwhile to split a 64-bit constant into two
16593 32-bit operations. This is the case if optimizing for size, or
16594 if we have load delay slots, or if one 32-bit part can be done with
16595 a single data operation. */
16596 bool
16597 arm_const_double_by_parts (rtx val)
16599 machine_mode mode = GET_MODE (val);
16600 rtx part;
16602 if (optimize_size || arm_ld_sched)
16603 return true;
16605 if (mode == VOIDmode)
16606 mode = DImode;
16608 part = gen_highpart_mode (SImode, mode, val);
16610 gcc_assert (CONST_INT_P (part));
16612 if (const_ok_for_arm (INTVAL (part))
16613 || const_ok_for_arm (~INTVAL (part)))
16614 return true;
16616 part = gen_lowpart (SImode, val);
16618 gcc_assert (CONST_INT_P (part));
16620 if (const_ok_for_arm (INTVAL (part))
16621 || const_ok_for_arm (~INTVAL (part)))
16622 return true;
16624 return false;
16627 /* Return true if it is possible to inline both the high and low parts
16628 of a 64-bit constant into 32-bit data processing instructions. */
16629 bool
16630 arm_const_double_by_immediates (rtx val)
16632 machine_mode mode = GET_MODE (val);
16633 rtx part;
16635 if (mode == VOIDmode)
16636 mode = DImode;
16638 part = gen_highpart_mode (SImode, mode, val);
16640 gcc_assert (CONST_INT_P (part));
16642 if (!const_ok_for_arm (INTVAL (part)))
16643 return false;
16645 part = gen_lowpart (SImode, val);
16647 gcc_assert (CONST_INT_P (part));
16649 if (!const_ok_for_arm (INTVAL (part)))
16650 return false;
16652 return true;
16655 /* Scan INSN and note any of its operands that need fixing.
16656 If DO_PUSHES is false we do not actually push any of the fixups
16657 needed. */
16658 static void
16659 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16661 int opno;
16663 extract_constrain_insn (insn);
16665 if (recog_data.n_alternatives == 0)
16666 return;
16668 /* Fill in recog_op_alt with information about the constraints of
16669 this insn. */
16670 preprocess_constraints (insn);
16672 const operand_alternative *op_alt = which_op_alt ();
16673 for (opno = 0; opno < recog_data.n_operands; opno++)
16675 /* Things we need to fix can only occur in inputs. */
16676 if (recog_data.operand_type[opno] != OP_IN)
16677 continue;
16679 /* If this alternative is a memory reference, then any mention
16680 of constants in this alternative is really to fool reload
16681 into allowing us to accept one there. We need to fix them up
16682 now so that we output the right code. */
16683 if (op_alt[opno].memory_ok)
16685 rtx op = recog_data.operand[opno];
16687 if (CONSTANT_P (op))
16689 if (do_pushes)
16690 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16691 recog_data.operand_mode[opno], op);
16693 else if (MEM_P (op)
16694 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16695 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16697 if (do_pushes)
16699 rtx cop = avoid_constant_pool_reference (op);
16701 /* Casting the address of something to a mode narrower
16702 than a word can cause avoid_constant_pool_reference()
16703 to return the pool reference itself. That's no good to
16704 us here. Lets just hope that we can use the
16705 constant pool value directly. */
16706 if (op == cop)
16707 cop = get_pool_constant (XEXP (op, 0));
16709 push_minipool_fix (insn, address,
16710 recog_data.operand_loc[opno],
16711 recog_data.operand_mode[opno], cop);
16718 return;
16721 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16722 and unions in the context of ARMv8-M Security Extensions. It is used as a
16723 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16724 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16725 or four masks, depending on whether it is being computed for a
16726 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16727 respectively. The tree for the type of the argument or a field within an
16728 argument is passed in ARG_TYPE, the current register this argument or field
16729 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16730 argument or field starts at is passed in STARTING_BIT and the last used bit
16731 is kept in LAST_USED_BIT which is also updated accordingly. */
16733 static unsigned HOST_WIDE_INT
16734 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16735 uint32_t * padding_bits_to_clear,
16736 unsigned starting_bit, int * last_used_bit)
16739 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16741 if (TREE_CODE (arg_type) == RECORD_TYPE)
16743 unsigned current_bit = starting_bit;
16744 tree field;
16745 long int offset, size;
16748 field = TYPE_FIELDS (arg_type);
16749 while (field)
16751 /* The offset within a structure is always an offset from
16752 the start of that structure. Make sure we take that into the
16753 calculation of the register based offset that we use here. */
16754 offset = starting_bit;
16755 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16756 offset %= 32;
16758 /* This is the actual size of the field, for bitfields this is the
16759 bitfield width and not the container size. */
16760 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16762 if (*last_used_bit != offset)
16764 if (offset < *last_used_bit)
16766 /* This field's offset is before the 'last_used_bit', that
16767 means this field goes on the next register. So we need to
16768 pad the rest of the current register and increase the
16769 register number. */
16770 uint32_t mask;
16771 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16772 mask++;
16774 padding_bits_to_clear[*regno] |= mask;
16775 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16776 (*regno)++;
16778 else
16780 /* Otherwise we pad the bits between the last field's end and
16781 the start of the new field. */
16782 uint32_t mask;
16784 mask = ((uint32_t)-1) >> (32 - offset);
16785 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16786 padding_bits_to_clear[*regno] |= mask;
16788 current_bit = offset;
16791 /* Calculate further padding bits for inner structs/unions too. */
16792 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16794 *last_used_bit = current_bit;
16795 not_to_clear_reg_mask
16796 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16797 padding_bits_to_clear, offset,
16798 last_used_bit);
16800 else
16802 /* Update 'current_bit' with this field's size. If the
16803 'current_bit' lies in a subsequent register, update 'regno' and
16804 reset 'current_bit' to point to the current bit in that new
16805 register. */
16806 current_bit += size;
16807 while (current_bit >= 32)
16809 current_bit-=32;
16810 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16811 (*regno)++;
16813 *last_used_bit = current_bit;
16816 field = TREE_CHAIN (field);
16818 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16820 else if (TREE_CODE (arg_type) == UNION_TYPE)
16822 tree field, field_t;
16823 int i, regno_t, field_size;
16824 int max_reg = -1;
16825 int max_bit = -1;
16826 uint32_t mask;
16827 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16828 = {-1, -1, -1, -1};
16830 /* To compute the padding bits in a union we only consider bits as
16831 padding bits if they are always either a padding bit or fall outside a
16832 fields size for all fields in the union. */
16833 field = TYPE_FIELDS (arg_type);
16834 while (field)
16836 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16837 = {0U, 0U, 0U, 0U};
16838 int last_used_bit_t = *last_used_bit;
16839 regno_t = *regno;
16840 field_t = TREE_TYPE (field);
16842 /* If the field's type is either a record or a union make sure to
16843 compute their padding bits too. */
16844 if (RECORD_OR_UNION_TYPE_P (field_t))
16845 not_to_clear_reg_mask
16846 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16847 &padding_bits_to_clear_t[0],
16848 starting_bit, &last_used_bit_t);
16849 else
16851 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16852 regno_t = (field_size / 32) + *regno;
16853 last_used_bit_t = (starting_bit + field_size) % 32;
16856 for (i = *regno; i < regno_t; i++)
16858 /* For all but the last register used by this field only keep the
16859 padding bits that were padding bits in this field. */
16860 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16863 /* For the last register, keep all padding bits that were padding
16864 bits in this field and any padding bits that are still valid
16865 as padding bits but fall outside of this field's size. */
16866 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16867 padding_bits_to_clear_res[regno_t]
16868 &= padding_bits_to_clear_t[regno_t] | mask;
16870 /* Update the maximum size of the fields in terms of registers used
16871 ('max_reg') and the 'last_used_bit' in said register. */
16872 if (max_reg < regno_t)
16874 max_reg = regno_t;
16875 max_bit = last_used_bit_t;
16877 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16878 max_bit = last_used_bit_t;
16880 field = TREE_CHAIN (field);
16883 /* Update the current padding_bits_to_clear using the intersection of the
16884 padding bits of all the fields. */
16885 for (i=*regno; i < max_reg; i++)
16886 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16888 /* Do not keep trailing padding bits, we do not know yet whether this
16889 is the end of the argument. */
16890 mask = ((uint32_t) 1 << max_bit) - 1;
16891 padding_bits_to_clear[max_reg]
16892 |= padding_bits_to_clear_res[max_reg] & mask;
16894 *regno = max_reg;
16895 *last_used_bit = max_bit;
16897 else
16898 /* This function should only be used for structs and unions. */
16899 gcc_unreachable ();
16901 return not_to_clear_reg_mask;
16904 /* In the context of ARMv8-M Security Extensions, this function is used for both
16905 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16906 registers are used when returning or passing arguments, which is then
16907 returned as a mask. It will also compute a mask to indicate padding/unused
16908 bits for each of these registers, and passes this through the
16909 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16910 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16911 the starting register used to pass this argument or return value is passed
16912 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16913 for struct and union types. */
16915 static unsigned HOST_WIDE_INT
16916 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16917 uint32_t * padding_bits_to_clear)
16920 int last_used_bit = 0;
16921 unsigned HOST_WIDE_INT not_to_clear_mask;
16923 if (RECORD_OR_UNION_TYPE_P (arg_type))
16925 not_to_clear_mask
16926 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16927 padding_bits_to_clear, 0,
16928 &last_used_bit);
16931 /* If the 'last_used_bit' is not zero, that means we are still using a
16932 part of the last 'regno'. In such cases we must clear the trailing
16933 bits. Otherwise we are not using regno and we should mark it as to
16934 clear. */
16935 if (last_used_bit != 0)
16936 padding_bits_to_clear[regno]
16937 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16938 else
16939 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16941 else
16943 not_to_clear_mask = 0;
16944 /* We are not dealing with structs nor unions. So these arguments may be
16945 passed in floating point registers too. In some cases a BLKmode is
16946 used when returning or passing arguments in multiple VFP registers. */
16947 if (GET_MODE (arg_rtx) == BLKmode)
16949 int i, arg_regs;
16950 rtx reg;
16952 /* This should really only occur when dealing with the hard-float
16953 ABI. */
16954 gcc_assert (TARGET_HARD_FLOAT_ABI);
16956 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16958 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16959 gcc_assert (REG_P (reg));
16961 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16963 /* If we are dealing with DF mode, make sure we don't
16964 clear either of the registers it addresses. */
16965 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16966 if (arg_regs > 1)
16968 unsigned HOST_WIDE_INT mask;
16969 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16970 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16971 not_to_clear_mask |= mask;
16975 else
16977 /* Otherwise we can rely on the MODE to determine how many registers
16978 are being used by this argument. */
16979 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16980 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16981 if (arg_regs > 1)
16983 unsigned HOST_WIDE_INT
16984 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16985 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16986 not_to_clear_mask |= mask;
16991 return not_to_clear_mask;
16994 /* Clears caller saved registers not used to pass arguments before a
16995 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16996 registers is done in __gnu_cmse_nonsecure_call libcall.
16997 See libgcc/config/arm/cmse_nonsecure_call.S. */
16999 static void
17000 cmse_nonsecure_call_clear_caller_saved (void)
17002 basic_block bb;
17004 FOR_EACH_BB_FN (bb, cfun)
17006 rtx_insn *insn;
17008 FOR_BB_INSNS (bb, insn)
17010 uint64_t to_clear_mask, float_mask;
17011 rtx_insn *seq;
17012 rtx pat, call, unspec, reg, cleared_reg, tmp;
17013 unsigned int regno, maxregno;
17014 rtx address;
17015 CUMULATIVE_ARGS args_so_far_v;
17016 cumulative_args_t args_so_far;
17017 tree arg_type, fntype;
17018 bool using_r4, first_param = true;
17019 function_args_iterator args_iter;
17020 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17021 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17023 if (!NONDEBUG_INSN_P (insn))
17024 continue;
17026 if (!CALL_P (insn))
17027 continue;
17029 pat = PATTERN (insn);
17030 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17031 call = XVECEXP (pat, 0, 0);
17033 /* Get the real call RTX if the insn sets a value, ie. returns. */
17034 if (GET_CODE (call) == SET)
17035 call = SET_SRC (call);
17037 /* Check if it is a cmse_nonsecure_call. */
17038 unspec = XEXP (call, 0);
17039 if (GET_CODE (unspec) != UNSPEC
17040 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17041 continue;
17043 /* Determine the caller-saved registers we need to clear. */
17044 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17045 maxregno = NUM_ARG_REGS - 1;
17046 /* Only look at the caller-saved floating point registers in case of
17047 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17048 lazy store and loads which clear both caller- and callee-saved
17049 registers. */
17050 if (TARGET_HARD_FLOAT_ABI)
17052 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17053 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17054 to_clear_mask |= float_mask;
17055 maxregno = D7_VFP_REGNUM;
17058 /* Make sure the register used to hold the function address is not
17059 cleared. */
17060 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17061 gcc_assert (MEM_P (address));
17062 gcc_assert (REG_P (XEXP (address, 0)));
17063 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17065 /* Set basic block of call insn so that df rescan is performed on
17066 insns inserted here. */
17067 set_block_for_insn (insn, bb);
17068 df_set_flags (DF_DEFER_INSN_RESCAN);
17069 start_sequence ();
17071 /* Make sure the scheduler doesn't schedule other insns beyond
17072 here. */
17073 emit_insn (gen_blockage ());
17075 /* Walk through all arguments and clear registers appropriately.
17077 fntype = TREE_TYPE (MEM_EXPR (address));
17078 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17079 NULL_TREE);
17080 args_so_far = pack_cumulative_args (&args_so_far_v);
17081 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17083 rtx arg_rtx;
17084 machine_mode arg_mode = TYPE_MODE (arg_type);
17086 if (VOID_TYPE_P (arg_type))
17087 continue;
17089 if (!first_param)
17090 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17091 true);
17093 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17094 true);
17095 gcc_assert (REG_P (arg_rtx));
17096 to_clear_mask
17097 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17098 REGNO (arg_rtx),
17099 padding_bits_to_clear_ptr);
17101 first_param = false;
17104 /* Clear padding bits where needed. */
17105 cleared_reg = XEXP (address, 0);
17106 reg = gen_rtx_REG (SImode, IP_REGNUM);
17107 using_r4 = false;
17108 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17110 if (padding_bits_to_clear[regno] == 0)
17111 continue;
17113 /* If this is a Thumb-1 target copy the address of the function
17114 we are calling from 'r4' into 'ip' such that we can use r4 to
17115 clear the unused bits in the arguments. */
17116 if (TARGET_THUMB1 && !using_r4)
17118 using_r4 = true;
17119 reg = cleared_reg;
17120 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17121 reg);
17124 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17125 emit_move_insn (reg, tmp);
17126 /* Also fill the top half of the negated
17127 padding_bits_to_clear. */
17128 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17130 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17131 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17132 GEN_INT (16),
17133 GEN_INT (16)),
17134 tmp));
17137 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17138 gen_rtx_REG (SImode, regno),
17139 reg));
17142 if (using_r4)
17143 emit_move_insn (cleared_reg,
17144 gen_rtx_REG (SImode, IP_REGNUM));
17146 /* We use right shift and left shift to clear the LSB of the address
17147 we jump to instead of using bic, to avoid having to use an extra
17148 register on Thumb-1. */
17149 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17150 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17151 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17152 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17154 /* Clearing all registers that leak before doing a non-secure
17155 call. */
17156 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17158 if (!(to_clear_mask & (1LL << regno)))
17159 continue;
17161 /* If regno is an even vfp register and its successor is also to
17162 be cleared, use vmov. */
17163 if (IS_VFP_REGNUM (regno))
17165 if (TARGET_VFP_DOUBLE
17166 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17167 && to_clear_mask & (1LL << (regno + 1)))
17168 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17169 CONST0_RTX (DFmode));
17170 else
17171 emit_move_insn (gen_rtx_REG (SFmode, regno),
17172 CONST0_RTX (SFmode));
17174 else
17175 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17178 seq = get_insns ();
17179 end_sequence ();
17180 emit_insn_before (seq, insn);
17186 /* Rewrite move insn into subtract of 0 if the condition codes will
17187 be useful in next conditional jump insn. */
17189 static void
17190 thumb1_reorg (void)
17192 basic_block bb;
17194 FOR_EACH_BB_FN (bb, cfun)
17196 rtx dest, src;
17197 rtx cmp, op0, op1, set = NULL;
17198 rtx_insn *prev, *insn = BB_END (bb);
17199 bool insn_clobbered = false;
17201 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17202 insn = PREV_INSN (insn);
17204 /* Find the last cbranchsi4_insn in basic block BB. */
17205 if (insn == BB_HEAD (bb)
17206 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17207 continue;
17209 /* Get the register with which we are comparing. */
17210 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17211 op0 = XEXP (cmp, 0);
17212 op1 = XEXP (cmp, 1);
17214 /* Check that comparison is against ZERO. */
17215 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17216 continue;
17218 /* Find the first flag setting insn before INSN in basic block BB. */
17219 gcc_assert (insn != BB_HEAD (bb));
17220 for (prev = PREV_INSN (insn);
17221 (!insn_clobbered
17222 && prev != BB_HEAD (bb)
17223 && (NOTE_P (prev)
17224 || DEBUG_INSN_P (prev)
17225 || ((set = single_set (prev)) != NULL
17226 && get_attr_conds (prev) == CONDS_NOCOND)));
17227 prev = PREV_INSN (prev))
17229 if (reg_set_p (op0, prev))
17230 insn_clobbered = true;
17233 /* Skip if op0 is clobbered by insn other than prev. */
17234 if (insn_clobbered)
17235 continue;
17237 if (!set)
17238 continue;
17240 dest = SET_DEST (set);
17241 src = SET_SRC (set);
17242 if (!low_register_operand (dest, SImode)
17243 || !low_register_operand (src, SImode))
17244 continue;
17246 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17247 in INSN. Both src and dest of the move insn are checked. */
17248 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17250 dest = copy_rtx (dest);
17251 src = copy_rtx (src);
17252 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17253 PATTERN (prev) = gen_rtx_SET (dest, src);
17254 INSN_CODE (prev) = -1;
17255 /* Set test register in INSN to dest. */
17256 XEXP (cmp, 0) = copy_rtx (dest);
17257 INSN_CODE (insn) = -1;
17262 /* Convert instructions to their cc-clobbering variant if possible, since
17263 that allows us to use smaller encodings. */
17265 static void
17266 thumb2_reorg (void)
17268 basic_block bb;
17269 regset_head live;
17271 INIT_REG_SET (&live);
17273 /* We are freeing block_for_insn in the toplev to keep compatibility
17274 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17275 compute_bb_for_insn ();
17276 df_analyze ();
17278 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17280 FOR_EACH_BB_FN (bb, cfun)
17282 if ((current_tune->disparage_flag_setting_t16_encodings
17283 == tune_params::DISPARAGE_FLAGS_ALL)
17284 && optimize_bb_for_speed_p (bb))
17285 continue;
17287 rtx_insn *insn;
17288 Convert_Action action = SKIP;
17289 Convert_Action action_for_partial_flag_setting
17290 = ((current_tune->disparage_flag_setting_t16_encodings
17291 != tune_params::DISPARAGE_FLAGS_NEITHER)
17292 && optimize_bb_for_speed_p (bb))
17293 ? SKIP : CONV;
17295 COPY_REG_SET (&live, DF_LR_OUT (bb));
17296 df_simulate_initialize_backwards (bb, &live);
17297 FOR_BB_INSNS_REVERSE (bb, insn)
17299 if (NONJUMP_INSN_P (insn)
17300 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17301 && GET_CODE (PATTERN (insn)) == SET)
17303 action = SKIP;
17304 rtx pat = PATTERN (insn);
17305 rtx dst = XEXP (pat, 0);
17306 rtx src = XEXP (pat, 1);
17307 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17309 if (UNARY_P (src) || BINARY_P (src))
17310 op0 = XEXP (src, 0);
17312 if (BINARY_P (src))
17313 op1 = XEXP (src, 1);
17315 if (low_register_operand (dst, SImode))
17317 switch (GET_CODE (src))
17319 case PLUS:
17320 /* Adding two registers and storing the result
17321 in the first source is already a 16-bit
17322 operation. */
17323 if (rtx_equal_p (dst, op0)
17324 && register_operand (op1, SImode))
17325 break;
17327 if (low_register_operand (op0, SImode))
17329 /* ADDS <Rd>,<Rn>,<Rm> */
17330 if (low_register_operand (op1, SImode))
17331 action = CONV;
17332 /* ADDS <Rdn>,#<imm8> */
17333 /* SUBS <Rdn>,#<imm8> */
17334 else if (rtx_equal_p (dst, op0)
17335 && CONST_INT_P (op1)
17336 && IN_RANGE (INTVAL (op1), -255, 255))
17337 action = CONV;
17338 /* ADDS <Rd>,<Rn>,#<imm3> */
17339 /* SUBS <Rd>,<Rn>,#<imm3> */
17340 else if (CONST_INT_P (op1)
17341 && IN_RANGE (INTVAL (op1), -7, 7))
17342 action = CONV;
17344 /* ADCS <Rd>, <Rn> */
17345 else if (GET_CODE (XEXP (src, 0)) == PLUS
17346 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17347 && low_register_operand (XEXP (XEXP (src, 0), 1),
17348 SImode)
17349 && COMPARISON_P (op1)
17350 && cc_register (XEXP (op1, 0), VOIDmode)
17351 && maybe_get_arm_condition_code (op1) == ARM_CS
17352 && XEXP (op1, 1) == const0_rtx)
17353 action = CONV;
17354 break;
17356 case MINUS:
17357 /* RSBS <Rd>,<Rn>,#0
17358 Not handled here: see NEG below. */
17359 /* SUBS <Rd>,<Rn>,#<imm3>
17360 SUBS <Rdn>,#<imm8>
17361 Not handled here: see PLUS above. */
17362 /* SUBS <Rd>,<Rn>,<Rm> */
17363 if (low_register_operand (op0, SImode)
17364 && low_register_operand (op1, SImode))
17365 action = CONV;
17366 break;
17368 case MULT:
17369 /* MULS <Rdm>,<Rn>,<Rdm>
17370 As an exception to the rule, this is only used
17371 when optimizing for size since MULS is slow on all
17372 known implementations. We do not even want to use
17373 MULS in cold code, if optimizing for speed, so we
17374 test the global flag here. */
17375 if (!optimize_size)
17376 break;
17377 /* Fall through. */
17378 case AND:
17379 case IOR:
17380 case XOR:
17381 /* ANDS <Rdn>,<Rm> */
17382 if (rtx_equal_p (dst, op0)
17383 && low_register_operand (op1, SImode))
17384 action = action_for_partial_flag_setting;
17385 else if (rtx_equal_p (dst, op1)
17386 && low_register_operand (op0, SImode))
17387 action = action_for_partial_flag_setting == SKIP
17388 ? SKIP : SWAP_CONV;
17389 break;
17391 case ASHIFTRT:
17392 case ASHIFT:
17393 case LSHIFTRT:
17394 /* ASRS <Rdn>,<Rm> */
17395 /* LSRS <Rdn>,<Rm> */
17396 /* LSLS <Rdn>,<Rm> */
17397 if (rtx_equal_p (dst, op0)
17398 && low_register_operand (op1, SImode))
17399 action = action_for_partial_flag_setting;
17400 /* ASRS <Rd>,<Rm>,#<imm5> */
17401 /* LSRS <Rd>,<Rm>,#<imm5> */
17402 /* LSLS <Rd>,<Rm>,#<imm5> */
17403 else if (low_register_operand (op0, SImode)
17404 && CONST_INT_P (op1)
17405 && IN_RANGE (INTVAL (op1), 0, 31))
17406 action = action_for_partial_flag_setting;
17407 break;
17409 case ROTATERT:
17410 /* RORS <Rdn>,<Rm> */
17411 if (rtx_equal_p (dst, op0)
17412 && low_register_operand (op1, SImode))
17413 action = action_for_partial_flag_setting;
17414 break;
17416 case NOT:
17417 /* MVNS <Rd>,<Rm> */
17418 if (low_register_operand (op0, SImode))
17419 action = action_for_partial_flag_setting;
17420 break;
17422 case NEG:
17423 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17424 if (low_register_operand (op0, SImode))
17425 action = CONV;
17426 break;
17428 case CONST_INT:
17429 /* MOVS <Rd>,#<imm8> */
17430 if (CONST_INT_P (src)
17431 && IN_RANGE (INTVAL (src), 0, 255))
17432 action = action_for_partial_flag_setting;
17433 break;
17435 case REG:
17436 /* MOVS and MOV<c> with registers have different
17437 encodings, so are not relevant here. */
17438 break;
17440 default:
17441 break;
17445 if (action != SKIP)
17447 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17448 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17449 rtvec vec;
17451 if (action == SWAP_CONV)
17453 src = copy_rtx (src);
17454 XEXP (src, 0) = op1;
17455 XEXP (src, 1) = op0;
17456 pat = gen_rtx_SET (dst, src);
17457 vec = gen_rtvec (2, pat, clobber);
17459 else /* action == CONV */
17460 vec = gen_rtvec (2, pat, clobber);
17462 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17463 INSN_CODE (insn) = -1;
17467 if (NONDEBUG_INSN_P (insn))
17468 df_simulate_one_insn_backwards (bb, insn, &live);
17472 CLEAR_REG_SET (&live);
17475 /* Gcc puts the pool in the wrong place for ARM, since we can only
17476 load addresses a limited distance around the pc. We do some
17477 special munging to move the constant pool values to the correct
17478 point in the code. */
17479 static void
17480 arm_reorg (void)
17482 rtx_insn *insn;
17483 HOST_WIDE_INT address = 0;
17484 Mfix * fix;
17486 if (use_cmse)
17487 cmse_nonsecure_call_clear_caller_saved ();
17488 if (TARGET_THUMB1)
17489 thumb1_reorg ();
17490 else if (TARGET_THUMB2)
17491 thumb2_reorg ();
17493 /* Ensure all insns that must be split have been split at this point.
17494 Otherwise, the pool placement code below may compute incorrect
17495 insn lengths. Note that when optimizing, all insns have already
17496 been split at this point. */
17497 if (!optimize)
17498 split_all_insns_noflow ();
17500 /* Make sure we do not attempt to create a literal pool even though it should
17501 no longer be necessary to create any. */
17502 if (arm_disable_literal_pool)
17503 return ;
17505 minipool_fix_head = minipool_fix_tail = NULL;
17507 /* The first insn must always be a note, or the code below won't
17508 scan it properly. */
17509 insn = get_insns ();
17510 gcc_assert (NOTE_P (insn));
17511 minipool_pad = 0;
17513 /* Scan all the insns and record the operands that will need fixing. */
17514 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17516 if (BARRIER_P (insn))
17517 push_minipool_barrier (insn, address);
17518 else if (INSN_P (insn))
17520 rtx_jump_table_data *table;
17522 note_invalid_constants (insn, address, true);
17523 address += get_attr_length (insn);
17525 /* If the insn is a vector jump, add the size of the table
17526 and skip the table. */
17527 if (tablejump_p (insn, NULL, &table))
17529 address += get_jump_table_size (table);
17530 insn = table;
17533 else if (LABEL_P (insn))
17534 /* Add the worst-case padding due to alignment. We don't add
17535 the _current_ padding because the minipool insertions
17536 themselves might change it. */
17537 address += get_label_padding (insn);
17540 fix = minipool_fix_head;
17542 /* Now scan the fixups and perform the required changes. */
17543 while (fix)
17545 Mfix * ftmp;
17546 Mfix * fdel;
17547 Mfix * last_added_fix;
17548 Mfix * last_barrier = NULL;
17549 Mfix * this_fix;
17551 /* Skip any further barriers before the next fix. */
17552 while (fix && BARRIER_P (fix->insn))
17553 fix = fix->next;
17555 /* No more fixes. */
17556 if (fix == NULL)
17557 break;
17559 last_added_fix = NULL;
17561 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17563 if (BARRIER_P (ftmp->insn))
17565 if (ftmp->address >= minipool_vector_head->max_address)
17566 break;
17568 last_barrier = ftmp;
17570 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17571 break;
17573 last_added_fix = ftmp; /* Keep track of the last fix added. */
17576 /* If we found a barrier, drop back to that; any fixes that we
17577 could have reached but come after the barrier will now go in
17578 the next mini-pool. */
17579 if (last_barrier != NULL)
17581 /* Reduce the refcount for those fixes that won't go into this
17582 pool after all. */
17583 for (fdel = last_barrier->next;
17584 fdel && fdel != ftmp;
17585 fdel = fdel->next)
17587 fdel->minipool->refcount--;
17588 fdel->minipool = NULL;
17591 ftmp = last_barrier;
17593 else
17595 /* ftmp is first fix that we can't fit into this pool and
17596 there no natural barriers that we could use. Insert a
17597 new barrier in the code somewhere between the previous
17598 fix and this one, and arrange to jump around it. */
17599 HOST_WIDE_INT max_address;
17601 /* The last item on the list of fixes must be a barrier, so
17602 we can never run off the end of the list of fixes without
17603 last_barrier being set. */
17604 gcc_assert (ftmp);
17606 max_address = minipool_vector_head->max_address;
17607 /* Check that there isn't another fix that is in range that
17608 we couldn't fit into this pool because the pool was
17609 already too large: we need to put the pool before such an
17610 instruction. The pool itself may come just after the
17611 fix because create_fix_barrier also allows space for a
17612 jump instruction. */
17613 if (ftmp->address < max_address)
17614 max_address = ftmp->address + 1;
17616 last_barrier = create_fix_barrier (last_added_fix, max_address);
17619 assign_minipool_offsets (last_barrier);
17621 while (ftmp)
17623 if (!BARRIER_P (ftmp->insn)
17624 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17625 == NULL))
17626 break;
17628 ftmp = ftmp->next;
17631 /* Scan over the fixes we have identified for this pool, fixing them
17632 up and adding the constants to the pool itself. */
17633 for (this_fix = fix; this_fix && ftmp != this_fix;
17634 this_fix = this_fix->next)
17635 if (!BARRIER_P (this_fix->insn))
17637 rtx addr
17638 = plus_constant (Pmode,
17639 gen_rtx_LABEL_REF (VOIDmode,
17640 minipool_vector_label),
17641 this_fix->minipool->offset);
17642 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17645 dump_minipool (last_barrier->insn);
17646 fix = ftmp;
17649 /* From now on we must synthesize any constants that we can't handle
17650 directly. This can happen if the RTL gets split during final
17651 instruction generation. */
17652 cfun->machine->after_arm_reorg = 1;
17654 /* Free the minipool memory. */
17655 obstack_free (&minipool_obstack, minipool_startobj);
17658 /* Routines to output assembly language. */
17660 /* Return string representation of passed in real value. */
17661 static const char *
17662 fp_const_from_val (REAL_VALUE_TYPE *r)
17664 if (!fp_consts_inited)
17665 init_fp_table ();
17667 gcc_assert (real_equal (r, &value_fp0));
17668 return "0";
17671 /* OPERANDS[0] is the entire list of insns that constitute pop,
17672 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17673 is in the list, UPDATE is true iff the list contains explicit
17674 update of base register. */
17675 void
17676 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17677 bool update)
17679 int i;
17680 char pattern[100];
17681 int offset;
17682 const char *conditional;
17683 int num_saves = XVECLEN (operands[0], 0);
17684 unsigned int regno;
17685 unsigned int regno_base = REGNO (operands[1]);
17686 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17688 offset = 0;
17689 offset += update ? 1 : 0;
17690 offset += return_pc ? 1 : 0;
17692 /* Is the base register in the list? */
17693 for (i = offset; i < num_saves; i++)
17695 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17696 /* If SP is in the list, then the base register must be SP. */
17697 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17698 /* If base register is in the list, there must be no explicit update. */
17699 if (regno == regno_base)
17700 gcc_assert (!update);
17703 conditional = reverse ? "%?%D0" : "%?%d0";
17704 /* Can't use POP if returning from an interrupt. */
17705 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17706 sprintf (pattern, "pop%s\t{", conditional);
17707 else
17709 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17710 It's just a convention, their semantics are identical. */
17711 if (regno_base == SP_REGNUM)
17712 sprintf (pattern, "ldmfd%s\t", conditional);
17713 else if (update)
17714 sprintf (pattern, "ldmia%s\t", conditional);
17715 else
17716 sprintf (pattern, "ldm%s\t", conditional);
17718 strcat (pattern, reg_names[regno_base]);
17719 if (update)
17720 strcat (pattern, "!, {");
17721 else
17722 strcat (pattern, ", {");
17725 /* Output the first destination register. */
17726 strcat (pattern,
17727 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17729 /* Output the rest of the destination registers. */
17730 for (i = offset + 1; i < num_saves; i++)
17732 strcat (pattern, ", ");
17733 strcat (pattern,
17734 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17737 strcat (pattern, "}");
17739 if (interrupt_p && return_pc)
17740 strcat (pattern, "^");
17742 output_asm_insn (pattern, &cond);
17746 /* Output the assembly for a store multiple. */
17748 const char *
17749 vfp_output_vstmd (rtx * operands)
17751 char pattern[100];
17752 int p;
17753 int base;
17754 int i;
17755 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17756 ? XEXP (operands[0], 0)
17757 : XEXP (XEXP (operands[0], 0), 0);
17758 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17760 if (push_p)
17761 strcpy (pattern, "vpush%?.64\t{%P1");
17762 else
17763 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17765 p = strlen (pattern);
17767 gcc_assert (REG_P (operands[1]));
17769 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17770 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17772 p += sprintf (&pattern[p], ", d%d", base + i);
17774 strcpy (&pattern[p], "}");
17776 output_asm_insn (pattern, operands);
17777 return "";
17781 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17782 number of bytes pushed. */
17784 static int
17785 vfp_emit_fstmd (int base_reg, int count)
17787 rtx par;
17788 rtx dwarf;
17789 rtx tmp, reg;
17790 int i;
17792 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17793 register pairs are stored by a store multiple insn. We avoid this
17794 by pushing an extra pair. */
17795 if (count == 2 && !arm_arch6)
17797 if (base_reg == LAST_VFP_REGNUM - 3)
17798 base_reg -= 2;
17799 count++;
17802 /* FSTMD may not store more than 16 doubleword registers at once. Split
17803 larger stores into multiple parts (up to a maximum of two, in
17804 practice). */
17805 if (count > 16)
17807 int saved;
17808 /* NOTE: base_reg is an internal register number, so each D register
17809 counts as 2. */
17810 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17811 saved += vfp_emit_fstmd (base_reg, 16);
17812 return saved;
17815 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17816 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17818 reg = gen_rtx_REG (DFmode, base_reg);
17819 base_reg += 2;
17821 XVECEXP (par, 0, 0)
17822 = gen_rtx_SET (gen_frame_mem
17823 (BLKmode,
17824 gen_rtx_PRE_MODIFY (Pmode,
17825 stack_pointer_rtx,
17826 plus_constant
17827 (Pmode, stack_pointer_rtx,
17828 - (count * 8)))
17830 gen_rtx_UNSPEC (BLKmode,
17831 gen_rtvec (1, reg),
17832 UNSPEC_PUSH_MULT));
17834 tmp = gen_rtx_SET (stack_pointer_rtx,
17835 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17836 RTX_FRAME_RELATED_P (tmp) = 1;
17837 XVECEXP (dwarf, 0, 0) = tmp;
17839 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17840 RTX_FRAME_RELATED_P (tmp) = 1;
17841 XVECEXP (dwarf, 0, 1) = tmp;
17843 for (i = 1; i < count; i++)
17845 reg = gen_rtx_REG (DFmode, base_reg);
17846 base_reg += 2;
17847 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17849 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17850 plus_constant (Pmode,
17851 stack_pointer_rtx,
17852 i * 8)),
17853 reg);
17854 RTX_FRAME_RELATED_P (tmp) = 1;
17855 XVECEXP (dwarf, 0, i + 1) = tmp;
17858 par = emit_insn (par);
17859 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17860 RTX_FRAME_RELATED_P (par) = 1;
17862 return count * 8;
17865 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17866 has the cmse_nonsecure_call attribute and returns false otherwise. */
17868 bool
17869 detect_cmse_nonsecure_call (tree addr)
17871 if (!addr)
17872 return FALSE;
17874 tree fntype = TREE_TYPE (addr);
17875 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17876 TYPE_ATTRIBUTES (fntype)))
17877 return TRUE;
17878 return FALSE;
17882 /* Emit a call instruction with pattern PAT. ADDR is the address of
17883 the call target. */
17885 void
17886 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17888 rtx insn;
17890 insn = emit_call_insn (pat);
17892 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17893 If the call might use such an entry, add a use of the PIC register
17894 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17895 if (TARGET_VXWORKS_RTP
17896 && flag_pic
17897 && !sibcall
17898 && GET_CODE (addr) == SYMBOL_REF
17899 && (SYMBOL_REF_DECL (addr)
17900 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17901 : !SYMBOL_REF_LOCAL_P (addr)))
17903 require_pic_register ();
17904 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17907 if (TARGET_AAPCS_BASED)
17909 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17910 linker. We need to add an IP clobber to allow setting
17911 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17912 is not needed since it's a fixed register. */
17913 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17914 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17918 /* Output a 'call' insn. */
17919 const char *
17920 output_call (rtx *operands)
17922 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17924 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17925 if (REGNO (operands[0]) == LR_REGNUM)
17927 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17928 output_asm_insn ("mov%?\t%0, %|lr", operands);
17931 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17933 if (TARGET_INTERWORK || arm_arch4t)
17934 output_asm_insn ("bx%?\t%0", operands);
17935 else
17936 output_asm_insn ("mov%?\t%|pc, %0", operands);
17938 return "";
17941 /* Output a move from arm registers to arm registers of a long double
17942 OPERANDS[0] is the destination.
17943 OPERANDS[1] is the source. */
17944 const char *
17945 output_mov_long_double_arm_from_arm (rtx *operands)
17947 /* We have to be careful here because the two might overlap. */
17948 int dest_start = REGNO (operands[0]);
17949 int src_start = REGNO (operands[1]);
17950 rtx ops[2];
17951 int i;
17953 if (dest_start < src_start)
17955 for (i = 0; i < 3; i++)
17957 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17958 ops[1] = gen_rtx_REG (SImode, src_start + i);
17959 output_asm_insn ("mov%?\t%0, %1", ops);
17962 else
17964 for (i = 2; i >= 0; i--)
17966 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17967 ops[1] = gen_rtx_REG (SImode, src_start + i);
17968 output_asm_insn ("mov%?\t%0, %1", ops);
17972 return "";
17975 void
17976 arm_emit_movpair (rtx dest, rtx src)
17978 /* If the src is an immediate, simplify it. */
17979 if (CONST_INT_P (src))
17981 HOST_WIDE_INT val = INTVAL (src);
17982 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17983 if ((val >> 16) & 0x0000ffff)
17985 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17986 GEN_INT (16)),
17987 GEN_INT ((val >> 16) & 0x0000ffff));
17988 rtx_insn *insn = get_last_insn ();
17989 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17991 return;
17993 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17994 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17995 rtx_insn *insn = get_last_insn ();
17996 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17999 /* Output a move between double words. It must be REG<-MEM
18000 or MEM<-REG. */
18001 const char *
18002 output_move_double (rtx *operands, bool emit, int *count)
18004 enum rtx_code code0 = GET_CODE (operands[0]);
18005 enum rtx_code code1 = GET_CODE (operands[1]);
18006 rtx otherops[3];
18007 if (count)
18008 *count = 1;
18010 /* The only case when this might happen is when
18011 you are looking at the length of a DImode instruction
18012 that has an invalid constant in it. */
18013 if (code0 == REG && code1 != MEM)
18015 gcc_assert (!emit);
18016 *count = 2;
18017 return "";
18020 if (code0 == REG)
18022 unsigned int reg0 = REGNO (operands[0]);
18024 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18026 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18028 switch (GET_CODE (XEXP (operands[1], 0)))
18030 case REG:
18032 if (emit)
18034 if (TARGET_LDRD
18035 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18036 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18037 else
18038 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18040 break;
18042 case PRE_INC:
18043 gcc_assert (TARGET_LDRD);
18044 if (emit)
18045 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18046 break;
18048 case PRE_DEC:
18049 if (emit)
18051 if (TARGET_LDRD)
18052 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18053 else
18054 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18056 break;
18058 case POST_INC:
18059 if (emit)
18061 if (TARGET_LDRD)
18062 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18063 else
18064 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18066 break;
18068 case POST_DEC:
18069 gcc_assert (TARGET_LDRD);
18070 if (emit)
18071 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18072 break;
18074 case PRE_MODIFY:
18075 case POST_MODIFY:
18076 /* Autoicrement addressing modes should never have overlapping
18077 base and destination registers, and overlapping index registers
18078 are already prohibited, so this doesn't need to worry about
18079 fix_cm3_ldrd. */
18080 otherops[0] = operands[0];
18081 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18082 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18084 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18086 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18088 /* Registers overlap so split out the increment. */
18089 if (emit)
18091 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18092 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18094 if (count)
18095 *count = 2;
18097 else
18099 /* Use a single insn if we can.
18100 FIXME: IWMMXT allows offsets larger than ldrd can
18101 handle, fix these up with a pair of ldr. */
18102 if (TARGET_THUMB2
18103 || !CONST_INT_P (otherops[2])
18104 || (INTVAL (otherops[2]) > -256
18105 && INTVAL (otherops[2]) < 256))
18107 if (emit)
18108 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18110 else
18112 if (emit)
18114 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18115 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18117 if (count)
18118 *count = 2;
18123 else
18125 /* Use a single insn if we can.
18126 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18127 fix these up with a pair of ldr. */
18128 if (TARGET_THUMB2
18129 || !CONST_INT_P (otherops[2])
18130 || (INTVAL (otherops[2]) > -256
18131 && INTVAL (otherops[2]) < 256))
18133 if (emit)
18134 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18136 else
18138 if (emit)
18140 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18141 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18143 if (count)
18144 *count = 2;
18147 break;
18149 case LABEL_REF:
18150 case CONST:
18151 /* We might be able to use ldrd %0, %1 here. However the range is
18152 different to ldr/adr, and it is broken on some ARMv7-M
18153 implementations. */
18154 /* Use the second register of the pair to avoid problematic
18155 overlap. */
18156 otherops[1] = operands[1];
18157 if (emit)
18158 output_asm_insn ("adr%?\t%0, %1", otherops);
18159 operands[1] = otherops[0];
18160 if (emit)
18162 if (TARGET_LDRD)
18163 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18164 else
18165 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18168 if (count)
18169 *count = 2;
18170 break;
18172 /* ??? This needs checking for thumb2. */
18173 default:
18174 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18175 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18177 otherops[0] = operands[0];
18178 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18179 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18181 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18183 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18185 switch ((int) INTVAL (otherops[2]))
18187 case -8:
18188 if (emit)
18189 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18190 return "";
18191 case -4:
18192 if (TARGET_THUMB2)
18193 break;
18194 if (emit)
18195 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18196 return "";
18197 case 4:
18198 if (TARGET_THUMB2)
18199 break;
18200 if (emit)
18201 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18202 return "";
18205 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18206 operands[1] = otherops[0];
18207 if (TARGET_LDRD
18208 && (REG_P (otherops[2])
18209 || TARGET_THUMB2
18210 || (CONST_INT_P (otherops[2])
18211 && INTVAL (otherops[2]) > -256
18212 && INTVAL (otherops[2]) < 256)))
18214 if (reg_overlap_mentioned_p (operands[0],
18215 otherops[2]))
18217 /* Swap base and index registers over to
18218 avoid a conflict. */
18219 std::swap (otherops[1], otherops[2]);
18221 /* If both registers conflict, it will usually
18222 have been fixed by a splitter. */
18223 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18224 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18226 if (emit)
18228 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18229 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18231 if (count)
18232 *count = 2;
18234 else
18236 otherops[0] = operands[0];
18237 if (emit)
18238 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18240 return "";
18243 if (CONST_INT_P (otherops[2]))
18245 if (emit)
18247 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18248 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18249 else
18250 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18253 else
18255 if (emit)
18256 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18259 else
18261 if (emit)
18262 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18265 if (count)
18266 *count = 2;
18268 if (TARGET_LDRD)
18269 return "ldrd%?\t%0, [%1]";
18271 return "ldmia%?\t%1, %M0";
18273 else
18275 otherops[1] = adjust_address (operands[1], SImode, 4);
18276 /* Take care of overlapping base/data reg. */
18277 if (reg_mentioned_p (operands[0], operands[1]))
18279 if (emit)
18281 output_asm_insn ("ldr%?\t%0, %1", otherops);
18282 output_asm_insn ("ldr%?\t%0, %1", operands);
18284 if (count)
18285 *count = 2;
18288 else
18290 if (emit)
18292 output_asm_insn ("ldr%?\t%0, %1", operands);
18293 output_asm_insn ("ldr%?\t%0, %1", otherops);
18295 if (count)
18296 *count = 2;
18301 else
18303 /* Constraints should ensure this. */
18304 gcc_assert (code0 == MEM && code1 == REG);
18305 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18306 || (TARGET_ARM && TARGET_LDRD));
18308 switch (GET_CODE (XEXP (operands[0], 0)))
18310 case REG:
18311 if (emit)
18313 if (TARGET_LDRD)
18314 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18315 else
18316 output_asm_insn ("stm%?\t%m0, %M1", operands);
18318 break;
18320 case PRE_INC:
18321 gcc_assert (TARGET_LDRD);
18322 if (emit)
18323 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18324 break;
18326 case PRE_DEC:
18327 if (emit)
18329 if (TARGET_LDRD)
18330 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18331 else
18332 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18334 break;
18336 case POST_INC:
18337 if (emit)
18339 if (TARGET_LDRD)
18340 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18341 else
18342 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18344 break;
18346 case POST_DEC:
18347 gcc_assert (TARGET_LDRD);
18348 if (emit)
18349 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18350 break;
18352 case PRE_MODIFY:
18353 case POST_MODIFY:
18354 otherops[0] = operands[1];
18355 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18356 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18358 /* IWMMXT allows offsets larger than ldrd can handle,
18359 fix these up with a pair of ldr. */
18360 if (!TARGET_THUMB2
18361 && CONST_INT_P (otherops[2])
18362 && (INTVAL(otherops[2]) <= -256
18363 || INTVAL(otherops[2]) >= 256))
18365 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18367 if (emit)
18369 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18370 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18372 if (count)
18373 *count = 2;
18375 else
18377 if (emit)
18379 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18380 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18382 if (count)
18383 *count = 2;
18386 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18388 if (emit)
18389 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18391 else
18393 if (emit)
18394 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18396 break;
18398 case PLUS:
18399 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18400 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18402 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18404 case -8:
18405 if (emit)
18406 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18407 return "";
18409 case -4:
18410 if (TARGET_THUMB2)
18411 break;
18412 if (emit)
18413 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18414 return "";
18416 case 4:
18417 if (TARGET_THUMB2)
18418 break;
18419 if (emit)
18420 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18421 return "";
18424 if (TARGET_LDRD
18425 && (REG_P (otherops[2])
18426 || TARGET_THUMB2
18427 || (CONST_INT_P (otherops[2])
18428 && INTVAL (otherops[2]) > -256
18429 && INTVAL (otherops[2]) < 256)))
18431 otherops[0] = operands[1];
18432 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18433 if (emit)
18434 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18435 return "";
18437 /* Fall through */
18439 default:
18440 otherops[0] = adjust_address (operands[0], SImode, 4);
18441 otherops[1] = operands[1];
18442 if (emit)
18444 output_asm_insn ("str%?\t%1, %0", operands);
18445 output_asm_insn ("str%?\t%H1, %0", otherops);
18447 if (count)
18448 *count = 2;
18452 return "";
18455 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18456 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18458 const char *
18459 output_move_quad (rtx *operands)
18461 if (REG_P (operands[0]))
18463 /* Load, or reg->reg move. */
18465 if (MEM_P (operands[1]))
18467 switch (GET_CODE (XEXP (operands[1], 0)))
18469 case REG:
18470 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18471 break;
18473 case LABEL_REF:
18474 case CONST:
18475 output_asm_insn ("adr%?\t%0, %1", operands);
18476 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18477 break;
18479 default:
18480 gcc_unreachable ();
18483 else
18485 rtx ops[2];
18486 int dest, src, i;
18488 gcc_assert (REG_P (operands[1]));
18490 dest = REGNO (operands[0]);
18491 src = REGNO (operands[1]);
18493 /* This seems pretty dumb, but hopefully GCC won't try to do it
18494 very often. */
18495 if (dest < src)
18496 for (i = 0; i < 4; i++)
18498 ops[0] = gen_rtx_REG (SImode, dest + i);
18499 ops[1] = gen_rtx_REG (SImode, src + i);
18500 output_asm_insn ("mov%?\t%0, %1", ops);
18502 else
18503 for (i = 3; i >= 0; i--)
18505 ops[0] = gen_rtx_REG (SImode, dest + i);
18506 ops[1] = gen_rtx_REG (SImode, src + i);
18507 output_asm_insn ("mov%?\t%0, %1", ops);
18511 else
18513 gcc_assert (MEM_P (operands[0]));
18514 gcc_assert (REG_P (operands[1]));
18515 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18517 switch (GET_CODE (XEXP (operands[0], 0)))
18519 case REG:
18520 output_asm_insn ("stm%?\t%m0, %M1", operands);
18521 break;
18523 default:
18524 gcc_unreachable ();
18528 return "";
18531 /* Output a VFP load or store instruction. */
18533 const char *
18534 output_move_vfp (rtx *operands)
18536 rtx reg, mem, addr, ops[2];
18537 int load = REG_P (operands[0]);
18538 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18539 int sp = (!TARGET_VFP_FP16INST
18540 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18541 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18542 const char *templ;
18543 char buff[50];
18544 machine_mode mode;
18546 reg = operands[!load];
18547 mem = operands[load];
18549 mode = GET_MODE (reg);
18551 gcc_assert (REG_P (reg));
18552 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18553 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18554 || mode == SFmode
18555 || mode == DFmode
18556 || mode == HImode
18557 || mode == SImode
18558 || mode == DImode
18559 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18560 gcc_assert (MEM_P (mem));
18562 addr = XEXP (mem, 0);
18564 switch (GET_CODE (addr))
18566 case PRE_DEC:
18567 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18568 ops[0] = XEXP (addr, 0);
18569 ops[1] = reg;
18570 break;
18572 case POST_INC:
18573 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18574 ops[0] = XEXP (addr, 0);
18575 ops[1] = reg;
18576 break;
18578 default:
18579 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18580 ops[0] = reg;
18581 ops[1] = mem;
18582 break;
18585 sprintf (buff, templ,
18586 load ? "ld" : "st",
18587 dp ? "64" : sp ? "32" : "16",
18588 dp ? "P" : "",
18589 integer_p ? "\t%@ int" : "");
18590 output_asm_insn (buff, ops);
18592 return "";
18595 /* Output a Neon double-word or quad-word load or store, or a load
18596 or store for larger structure modes.
18598 WARNING: The ordering of elements is weird in big-endian mode,
18599 because the EABI requires that vectors stored in memory appear
18600 as though they were stored by a VSTM, as required by the EABI.
18601 GCC RTL defines element ordering based on in-memory order.
18602 This can be different from the architectural ordering of elements
18603 within a NEON register. The intrinsics defined in arm_neon.h use the
18604 NEON register element ordering, not the GCC RTL element ordering.
18606 For example, the in-memory ordering of a big-endian a quadword
18607 vector with 16-bit elements when stored from register pair {d0,d1}
18608 will be (lowest address first, d0[N] is NEON register element N):
18610 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18612 When necessary, quadword registers (dN, dN+1) are moved to ARM
18613 registers from rN in the order:
18615 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18617 So that STM/LDM can be used on vectors in ARM registers, and the
18618 same memory layout will result as if VSTM/VLDM were used.
18620 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18621 possible, which allows use of appropriate alignment tags.
18622 Note that the choice of "64" is independent of the actual vector
18623 element size; this size simply ensures that the behavior is
18624 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18626 Due to limitations of those instructions, use of VST1.64/VLD1.64
18627 is not possible if:
18628 - the address contains PRE_DEC, or
18629 - the mode refers to more than 4 double-word registers
18631 In those cases, it would be possible to replace VSTM/VLDM by a
18632 sequence of instructions; this is not currently implemented since
18633 this is not certain to actually improve performance. */
18635 const char *
18636 output_move_neon (rtx *operands)
18638 rtx reg, mem, addr, ops[2];
18639 int regno, nregs, load = REG_P (operands[0]);
18640 const char *templ;
18641 char buff[50];
18642 machine_mode mode;
18644 reg = operands[!load];
18645 mem = operands[load];
18647 mode = GET_MODE (reg);
18649 gcc_assert (REG_P (reg));
18650 regno = REGNO (reg);
18651 nregs = REG_NREGS (reg) / 2;
18652 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18653 || NEON_REGNO_OK_FOR_QUAD (regno));
18654 gcc_assert (VALID_NEON_DREG_MODE (mode)
18655 || VALID_NEON_QREG_MODE (mode)
18656 || VALID_NEON_STRUCT_MODE (mode));
18657 gcc_assert (MEM_P (mem));
18659 addr = XEXP (mem, 0);
18661 /* Strip off const from addresses like (const (plus (...))). */
18662 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18663 addr = XEXP (addr, 0);
18665 switch (GET_CODE (addr))
18667 case POST_INC:
18668 /* We have to use vldm / vstm for too-large modes. */
18669 if (nregs > 4)
18671 templ = "v%smia%%?\t%%0!, %%h1";
18672 ops[0] = XEXP (addr, 0);
18674 else
18676 templ = "v%s1.64\t%%h1, %%A0";
18677 ops[0] = mem;
18679 ops[1] = reg;
18680 break;
18682 case PRE_DEC:
18683 /* We have to use vldm / vstm in this case, since there is no
18684 pre-decrement form of the vld1 / vst1 instructions. */
18685 templ = "v%smdb%%?\t%%0!, %%h1";
18686 ops[0] = XEXP (addr, 0);
18687 ops[1] = reg;
18688 break;
18690 case POST_MODIFY:
18691 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18692 gcc_unreachable ();
18694 case REG:
18695 /* We have to use vldm / vstm for too-large modes. */
18696 if (nregs > 1)
18698 if (nregs > 4)
18699 templ = "v%smia%%?\t%%m0, %%h1";
18700 else
18701 templ = "v%s1.64\t%%h1, %%A0";
18703 ops[0] = mem;
18704 ops[1] = reg;
18705 break;
18707 /* Fall through. */
18708 case LABEL_REF:
18709 case PLUS:
18711 int i;
18712 int overlap = -1;
18713 for (i = 0; i < nregs; i++)
18715 /* We're only using DImode here because it's a convenient size. */
18716 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18717 ops[1] = adjust_address (mem, DImode, 8 * i);
18718 if (reg_overlap_mentioned_p (ops[0], mem))
18720 gcc_assert (overlap == -1);
18721 overlap = i;
18723 else
18725 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18726 output_asm_insn (buff, ops);
18729 if (overlap != -1)
18731 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18732 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18733 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18734 output_asm_insn (buff, ops);
18737 return "";
18740 default:
18741 gcc_unreachable ();
18744 sprintf (buff, templ, load ? "ld" : "st");
18745 output_asm_insn (buff, ops);
18747 return "";
18750 /* Compute and return the length of neon_mov<mode>, where <mode> is
18751 one of VSTRUCT modes: EI, OI, CI or XI. */
18753 arm_attr_length_move_neon (rtx_insn *insn)
18755 rtx reg, mem, addr;
18756 int load;
18757 machine_mode mode;
18759 extract_insn_cached (insn);
18761 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18763 mode = GET_MODE (recog_data.operand[0]);
18764 switch (mode)
18766 case E_EImode:
18767 case E_OImode:
18768 return 8;
18769 case E_CImode:
18770 return 12;
18771 case E_XImode:
18772 return 16;
18773 default:
18774 gcc_unreachable ();
18778 load = REG_P (recog_data.operand[0]);
18779 reg = recog_data.operand[!load];
18780 mem = recog_data.operand[load];
18782 gcc_assert (MEM_P (mem));
18784 addr = XEXP (mem, 0);
18786 /* Strip off const from addresses like (const (plus (...))). */
18787 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18788 addr = XEXP (addr, 0);
18790 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18792 int insns = REG_NREGS (reg) / 2;
18793 return insns * 4;
18795 else
18796 return 4;
18799 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18800 return zero. */
18803 arm_address_offset_is_imm (rtx_insn *insn)
18805 rtx mem, addr;
18807 extract_insn_cached (insn);
18809 if (REG_P (recog_data.operand[0]))
18810 return 0;
18812 mem = recog_data.operand[0];
18814 gcc_assert (MEM_P (mem));
18816 addr = XEXP (mem, 0);
18818 if (REG_P (addr)
18819 || (GET_CODE (addr) == PLUS
18820 && REG_P (XEXP (addr, 0))
18821 && CONST_INT_P (XEXP (addr, 1))))
18822 return 1;
18823 else
18824 return 0;
18827 /* Output an ADD r, s, #n where n may be too big for one instruction.
18828 If adding zero to one register, output nothing. */
18829 const char *
18830 output_add_immediate (rtx *operands)
18832 HOST_WIDE_INT n = INTVAL (operands[2]);
18834 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18836 if (n < 0)
18837 output_multi_immediate (operands,
18838 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18839 -n);
18840 else
18841 output_multi_immediate (operands,
18842 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18846 return "";
18849 /* Output a multiple immediate operation.
18850 OPERANDS is the vector of operands referred to in the output patterns.
18851 INSTR1 is the output pattern to use for the first constant.
18852 INSTR2 is the output pattern to use for subsequent constants.
18853 IMMED_OP is the index of the constant slot in OPERANDS.
18854 N is the constant value. */
18855 static const char *
18856 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18857 int immed_op, HOST_WIDE_INT n)
18859 #if HOST_BITS_PER_WIDE_INT > 32
18860 n &= 0xffffffff;
18861 #endif
18863 if (n == 0)
18865 /* Quick and easy output. */
18866 operands[immed_op] = const0_rtx;
18867 output_asm_insn (instr1, operands);
18869 else
18871 int i;
18872 const char * instr = instr1;
18874 /* Note that n is never zero here (which would give no output). */
18875 for (i = 0; i < 32; i += 2)
18877 if (n & (3 << i))
18879 operands[immed_op] = GEN_INT (n & (255 << i));
18880 output_asm_insn (instr, operands);
18881 instr = instr2;
18882 i += 6;
18887 return "";
18890 /* Return the name of a shifter operation. */
18891 static const char *
18892 arm_shift_nmem(enum rtx_code code)
18894 switch (code)
18896 case ASHIFT:
18897 return ARM_LSL_NAME;
18899 case ASHIFTRT:
18900 return "asr";
18902 case LSHIFTRT:
18903 return "lsr";
18905 case ROTATERT:
18906 return "ror";
18908 default:
18909 abort();
18913 /* Return the appropriate ARM instruction for the operation code.
18914 The returned result should not be overwritten. OP is the rtx of the
18915 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18916 was shifted. */
18917 const char *
18918 arithmetic_instr (rtx op, int shift_first_arg)
18920 switch (GET_CODE (op))
18922 case PLUS:
18923 return "add";
18925 case MINUS:
18926 return shift_first_arg ? "rsb" : "sub";
18928 case IOR:
18929 return "orr";
18931 case XOR:
18932 return "eor";
18934 case AND:
18935 return "and";
18937 case ASHIFT:
18938 case ASHIFTRT:
18939 case LSHIFTRT:
18940 case ROTATERT:
18941 return arm_shift_nmem(GET_CODE(op));
18943 default:
18944 gcc_unreachable ();
18948 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18949 for the operation code. The returned result should not be overwritten.
18950 OP is the rtx code of the shift.
18951 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18952 shift. */
18953 static const char *
18954 shift_op (rtx op, HOST_WIDE_INT *amountp)
18956 const char * mnem;
18957 enum rtx_code code = GET_CODE (op);
18959 switch (code)
18961 case ROTATE:
18962 if (!CONST_INT_P (XEXP (op, 1)))
18964 output_operand_lossage ("invalid shift operand");
18965 return NULL;
18968 code = ROTATERT;
18969 *amountp = 32 - INTVAL (XEXP (op, 1));
18970 mnem = "ror";
18971 break;
18973 case ASHIFT:
18974 case ASHIFTRT:
18975 case LSHIFTRT:
18976 case ROTATERT:
18977 mnem = arm_shift_nmem(code);
18978 if (CONST_INT_P (XEXP (op, 1)))
18980 *amountp = INTVAL (XEXP (op, 1));
18982 else if (REG_P (XEXP (op, 1)))
18984 *amountp = -1;
18985 return mnem;
18987 else
18989 output_operand_lossage ("invalid shift operand");
18990 return NULL;
18992 break;
18994 case MULT:
18995 /* We never have to worry about the amount being other than a
18996 power of 2, since this case can never be reloaded from a reg. */
18997 if (!CONST_INT_P (XEXP (op, 1)))
18999 output_operand_lossage ("invalid shift operand");
19000 return NULL;
19003 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19005 /* Amount must be a power of two. */
19006 if (*amountp & (*amountp - 1))
19008 output_operand_lossage ("invalid shift operand");
19009 return NULL;
19012 *amountp = exact_log2 (*amountp);
19013 gcc_assert (IN_RANGE (*amountp, 0, 31));
19014 return ARM_LSL_NAME;
19016 default:
19017 output_operand_lossage ("invalid shift operand");
19018 return NULL;
19021 /* This is not 100% correct, but follows from the desire to merge
19022 multiplication by a power of 2 with the recognizer for a
19023 shift. >=32 is not a valid shift for "lsl", so we must try and
19024 output a shift that produces the correct arithmetical result.
19025 Using lsr #32 is identical except for the fact that the carry bit
19026 is not set correctly if we set the flags; but we never use the
19027 carry bit from such an operation, so we can ignore that. */
19028 if (code == ROTATERT)
19029 /* Rotate is just modulo 32. */
19030 *amountp &= 31;
19031 else if (*amountp != (*amountp & 31))
19033 if (code == ASHIFT)
19034 mnem = "lsr";
19035 *amountp = 32;
19038 /* Shifts of 0 are no-ops. */
19039 if (*amountp == 0)
19040 return NULL;
19042 return mnem;
19045 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19046 because /bin/as is horribly restrictive. The judgement about
19047 whether or not each character is 'printable' (and can be output as
19048 is) or not (and must be printed with an octal escape) must be made
19049 with reference to the *host* character set -- the situation is
19050 similar to that discussed in the comments above pp_c_char in
19051 c-pretty-print.c. */
19053 #define MAX_ASCII_LEN 51
19055 void
19056 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19058 int i;
19059 int len_so_far = 0;
19061 fputs ("\t.ascii\t\"", stream);
19063 for (i = 0; i < len; i++)
19065 int c = p[i];
19067 if (len_so_far >= MAX_ASCII_LEN)
19069 fputs ("\"\n\t.ascii\t\"", stream);
19070 len_so_far = 0;
19073 if (ISPRINT (c))
19075 if (c == '\\' || c == '\"')
19077 putc ('\\', stream);
19078 len_so_far++;
19080 putc (c, stream);
19081 len_so_far++;
19083 else
19085 fprintf (stream, "\\%03o", c);
19086 len_so_far += 4;
19090 fputs ("\"\n", stream);
19093 /* Whether a register is callee saved or not. This is necessary because high
19094 registers are marked as caller saved when optimizing for size on Thumb-1
19095 targets despite being callee saved in order to avoid using them. */
19096 #define callee_saved_reg_p(reg) \
19097 (!call_used_regs[reg] \
19098 || (TARGET_THUMB1 && optimize_size \
19099 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19101 /* Compute the register save mask for registers 0 through 12
19102 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19104 static unsigned long
19105 arm_compute_save_reg0_reg12_mask (void)
19107 unsigned long func_type = arm_current_func_type ();
19108 unsigned long save_reg_mask = 0;
19109 unsigned int reg;
19111 if (IS_INTERRUPT (func_type))
19113 unsigned int max_reg;
19114 /* Interrupt functions must not corrupt any registers,
19115 even call clobbered ones. If this is a leaf function
19116 we can just examine the registers used by the RTL, but
19117 otherwise we have to assume that whatever function is
19118 called might clobber anything, and so we have to save
19119 all the call-clobbered registers as well. */
19120 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19121 /* FIQ handlers have registers r8 - r12 banked, so
19122 we only need to check r0 - r7, Normal ISRs only
19123 bank r14 and r15, so we must check up to r12.
19124 r13 is the stack pointer which is always preserved,
19125 so we do not need to consider it here. */
19126 max_reg = 7;
19127 else
19128 max_reg = 12;
19130 for (reg = 0; reg <= max_reg; reg++)
19131 if (df_regs_ever_live_p (reg)
19132 || (! crtl->is_leaf && call_used_regs[reg]))
19133 save_reg_mask |= (1 << reg);
19135 /* Also save the pic base register if necessary. */
19136 if (flag_pic
19137 && !TARGET_SINGLE_PIC_BASE
19138 && arm_pic_register != INVALID_REGNUM
19139 && crtl->uses_pic_offset_table)
19140 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19142 else if (IS_VOLATILE(func_type))
19144 /* For noreturn functions we historically omitted register saves
19145 altogether. However this really messes up debugging. As a
19146 compromise save just the frame pointers. Combined with the link
19147 register saved elsewhere this should be sufficient to get
19148 a backtrace. */
19149 if (frame_pointer_needed)
19150 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19151 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19152 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19153 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19154 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19156 else
19158 /* In the normal case we only need to save those registers
19159 which are call saved and which are used by this function. */
19160 for (reg = 0; reg <= 11; reg++)
19161 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19162 save_reg_mask |= (1 << reg);
19164 /* Handle the frame pointer as a special case. */
19165 if (frame_pointer_needed)
19166 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19168 /* If we aren't loading the PIC register,
19169 don't stack it even though it may be live. */
19170 if (flag_pic
19171 && !TARGET_SINGLE_PIC_BASE
19172 && arm_pic_register != INVALID_REGNUM
19173 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19174 || crtl->uses_pic_offset_table))
19175 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19177 /* The prologue will copy SP into R0, so save it. */
19178 if (IS_STACKALIGN (func_type))
19179 save_reg_mask |= 1;
19182 /* Save registers so the exception handler can modify them. */
19183 if (crtl->calls_eh_return)
19185 unsigned int i;
19187 for (i = 0; ; i++)
19189 reg = EH_RETURN_DATA_REGNO (i);
19190 if (reg == INVALID_REGNUM)
19191 break;
19192 save_reg_mask |= 1 << reg;
19196 return save_reg_mask;
19199 /* Return true if r3 is live at the start of the function. */
19201 static bool
19202 arm_r3_live_at_start_p (void)
19204 /* Just look at cfg info, which is still close enough to correct at this
19205 point. This gives false positives for broken functions that might use
19206 uninitialized data that happens to be allocated in r3, but who cares? */
19207 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19210 /* Compute the number of bytes used to store the static chain register on the
19211 stack, above the stack frame. We need to know this accurately to get the
19212 alignment of the rest of the stack frame correct. */
19214 static int
19215 arm_compute_static_chain_stack_bytes (void)
19217 /* See the defining assertion in arm_expand_prologue. */
19218 if (IS_NESTED (arm_current_func_type ())
19219 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19220 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19221 || flag_stack_clash_protection)
19222 && !df_regs_ever_live_p (LR_REGNUM)))
19223 && arm_r3_live_at_start_p ()
19224 && crtl->args.pretend_args_size == 0)
19225 return 4;
19227 return 0;
19230 /* Compute a bit mask of which core registers need to be
19231 saved on the stack for the current function.
19232 This is used by arm_compute_frame_layout, which may add extra registers. */
19234 static unsigned long
19235 arm_compute_save_core_reg_mask (void)
19237 unsigned int save_reg_mask = 0;
19238 unsigned long func_type = arm_current_func_type ();
19239 unsigned int reg;
19241 if (IS_NAKED (func_type))
19242 /* This should never really happen. */
19243 return 0;
19245 /* If we are creating a stack frame, then we must save the frame pointer,
19246 IP (which will hold the old stack pointer), LR and the PC. */
19247 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19248 save_reg_mask |=
19249 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19250 | (1 << IP_REGNUM)
19251 | (1 << LR_REGNUM)
19252 | (1 << PC_REGNUM);
19254 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19256 /* Decide if we need to save the link register.
19257 Interrupt routines have their own banked link register,
19258 so they never need to save it.
19259 Otherwise if we do not use the link register we do not need to save
19260 it. If we are pushing other registers onto the stack however, we
19261 can save an instruction in the epilogue by pushing the link register
19262 now and then popping it back into the PC. This incurs extra memory
19263 accesses though, so we only do it when optimizing for size, and only
19264 if we know that we will not need a fancy return sequence. */
19265 if (df_regs_ever_live_p (LR_REGNUM)
19266 || (save_reg_mask
19267 && optimize_size
19268 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19269 && !crtl->tail_call_emit
19270 && !crtl->calls_eh_return))
19271 save_reg_mask |= 1 << LR_REGNUM;
19273 if (cfun->machine->lr_save_eliminated)
19274 save_reg_mask &= ~ (1 << LR_REGNUM);
19276 if (TARGET_REALLY_IWMMXT
19277 && ((bit_count (save_reg_mask)
19278 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19279 arm_compute_static_chain_stack_bytes())
19280 ) % 2) != 0)
19282 /* The total number of registers that are going to be pushed
19283 onto the stack is odd. We need to ensure that the stack
19284 is 64-bit aligned before we start to save iWMMXt registers,
19285 and also before we start to create locals. (A local variable
19286 might be a double or long long which we will load/store using
19287 an iWMMXt instruction). Therefore we need to push another
19288 ARM register, so that the stack will be 64-bit aligned. We
19289 try to avoid using the arg registers (r0 -r3) as they might be
19290 used to pass values in a tail call. */
19291 for (reg = 4; reg <= 12; reg++)
19292 if ((save_reg_mask & (1 << reg)) == 0)
19293 break;
19295 if (reg <= 12)
19296 save_reg_mask |= (1 << reg);
19297 else
19299 cfun->machine->sibcall_blocked = 1;
19300 save_reg_mask |= (1 << 3);
19304 /* We may need to push an additional register for use initializing the
19305 PIC base register. */
19306 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19307 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19309 reg = thumb_find_work_register (1 << 4);
19310 if (!call_used_regs[reg])
19311 save_reg_mask |= (1 << reg);
19314 return save_reg_mask;
19317 /* Compute a bit mask of which core registers need to be
19318 saved on the stack for the current function. */
19319 static unsigned long
19320 thumb1_compute_save_core_reg_mask (void)
19322 unsigned long mask;
19323 unsigned reg;
19325 mask = 0;
19326 for (reg = 0; reg < 12; reg ++)
19327 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19328 mask |= 1 << reg;
19330 /* Handle the frame pointer as a special case. */
19331 if (frame_pointer_needed)
19332 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19334 if (flag_pic
19335 && !TARGET_SINGLE_PIC_BASE
19336 && arm_pic_register != INVALID_REGNUM
19337 && crtl->uses_pic_offset_table)
19338 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19340 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19341 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19342 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19344 /* LR will also be pushed if any lo regs are pushed. */
19345 if (mask & 0xff || thumb_force_lr_save ())
19346 mask |= (1 << LR_REGNUM);
19348 /* Make sure we have a low work register if we need one.
19349 We will need one if we are going to push a high register,
19350 but we are not currently intending to push a low register. */
19351 if ((mask & 0xff) == 0
19352 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19354 /* Use thumb_find_work_register to choose which register
19355 we will use. If the register is live then we will
19356 have to push it. Use LAST_LO_REGNUM as our fallback
19357 choice for the register to select. */
19358 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19359 /* Make sure the register returned by thumb_find_work_register is
19360 not part of the return value. */
19361 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19362 reg = LAST_LO_REGNUM;
19364 if (callee_saved_reg_p (reg))
19365 mask |= 1 << reg;
19368 /* The 504 below is 8 bytes less than 512 because there are two possible
19369 alignment words. We can't tell here if they will be present or not so we
19370 have to play it safe and assume that they are. */
19371 if ((CALLER_INTERWORKING_SLOT_SIZE +
19372 ROUND_UP_WORD (get_frame_size ()) +
19373 crtl->outgoing_args_size) >= 504)
19375 /* This is the same as the code in thumb1_expand_prologue() which
19376 determines which register to use for stack decrement. */
19377 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19378 if (mask & (1 << reg))
19379 break;
19381 if (reg > LAST_LO_REGNUM)
19383 /* Make sure we have a register available for stack decrement. */
19384 mask |= 1 << LAST_LO_REGNUM;
19388 return mask;
19392 /* Return the number of bytes required to save VFP registers. */
19393 static int
19394 arm_get_vfp_saved_size (void)
19396 unsigned int regno;
19397 int count;
19398 int saved;
19400 saved = 0;
19401 /* Space for saved VFP registers. */
19402 if (TARGET_HARD_FLOAT)
19404 count = 0;
19405 for (regno = FIRST_VFP_REGNUM;
19406 regno < LAST_VFP_REGNUM;
19407 regno += 2)
19409 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19410 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19412 if (count > 0)
19414 /* Workaround ARM10 VFPr1 bug. */
19415 if (count == 2 && !arm_arch6)
19416 count++;
19417 saved += count * 8;
19419 count = 0;
19421 else
19422 count++;
19424 if (count > 0)
19426 if (count == 2 && !arm_arch6)
19427 count++;
19428 saved += count * 8;
19431 return saved;
19435 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19436 everything bar the final return instruction. If simple_return is true,
19437 then do not output epilogue, because it has already been emitted in RTL.
19439 Note: do not forget to update length attribute of corresponding insn pattern
19440 when changing assembly output (eg. length attribute of
19441 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19442 register clearing sequences). */
19443 const char *
19444 output_return_instruction (rtx operand, bool really_return, bool reverse,
19445 bool simple_return)
19447 char conditional[10];
19448 char instr[100];
19449 unsigned reg;
19450 unsigned long live_regs_mask;
19451 unsigned long func_type;
19452 arm_stack_offsets *offsets;
19454 func_type = arm_current_func_type ();
19456 if (IS_NAKED (func_type))
19457 return "";
19459 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19461 /* If this function was declared non-returning, and we have
19462 found a tail call, then we have to trust that the called
19463 function won't return. */
19464 if (really_return)
19466 rtx ops[2];
19468 /* Otherwise, trap an attempted return by aborting. */
19469 ops[0] = operand;
19470 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19471 : "abort");
19472 assemble_external_libcall (ops[1]);
19473 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19476 return "";
19479 gcc_assert (!cfun->calls_alloca || really_return);
19481 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19483 cfun->machine->return_used_this_function = 1;
19485 offsets = arm_get_frame_offsets ();
19486 live_regs_mask = offsets->saved_regs_mask;
19488 if (!simple_return && live_regs_mask)
19490 const char * return_reg;
19492 /* If we do not have any special requirements for function exit
19493 (e.g. interworking) then we can load the return address
19494 directly into the PC. Otherwise we must load it into LR. */
19495 if (really_return
19496 && !IS_CMSE_ENTRY (func_type)
19497 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19498 return_reg = reg_names[PC_REGNUM];
19499 else
19500 return_reg = reg_names[LR_REGNUM];
19502 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19504 /* There are three possible reasons for the IP register
19505 being saved. 1) a stack frame was created, in which case
19506 IP contains the old stack pointer, or 2) an ISR routine
19507 corrupted it, or 3) it was saved to align the stack on
19508 iWMMXt. In case 1, restore IP into SP, otherwise just
19509 restore IP. */
19510 if (frame_pointer_needed)
19512 live_regs_mask &= ~ (1 << IP_REGNUM);
19513 live_regs_mask |= (1 << SP_REGNUM);
19515 else
19516 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19519 /* On some ARM architectures it is faster to use LDR rather than
19520 LDM to load a single register. On other architectures, the
19521 cost is the same. In 26 bit mode, or for exception handlers,
19522 we have to use LDM to load the PC so that the CPSR is also
19523 restored. */
19524 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19525 if (live_regs_mask == (1U << reg))
19526 break;
19528 if (reg <= LAST_ARM_REGNUM
19529 && (reg != LR_REGNUM
19530 || ! really_return
19531 || ! IS_INTERRUPT (func_type)))
19533 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19534 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19536 else
19538 char *p;
19539 int first = 1;
19541 /* Generate the load multiple instruction to restore the
19542 registers. Note we can get here, even if
19543 frame_pointer_needed is true, but only if sp already
19544 points to the base of the saved core registers. */
19545 if (live_regs_mask & (1 << SP_REGNUM))
19547 unsigned HOST_WIDE_INT stack_adjust;
19549 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19550 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19552 if (stack_adjust && arm_arch5 && TARGET_ARM)
19553 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19554 else
19556 /* If we can't use ldmib (SA110 bug),
19557 then try to pop r3 instead. */
19558 if (stack_adjust)
19559 live_regs_mask |= 1 << 3;
19561 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19564 /* For interrupt returns we have to use an LDM rather than
19565 a POP so that we can use the exception return variant. */
19566 else if (IS_INTERRUPT (func_type))
19567 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19568 else
19569 sprintf (instr, "pop%s\t{", conditional);
19571 p = instr + strlen (instr);
19573 for (reg = 0; reg <= SP_REGNUM; reg++)
19574 if (live_regs_mask & (1 << reg))
19576 int l = strlen (reg_names[reg]);
19578 if (first)
19579 first = 0;
19580 else
19582 memcpy (p, ", ", 2);
19583 p += 2;
19586 memcpy (p, "%|", 2);
19587 memcpy (p + 2, reg_names[reg], l);
19588 p += l + 2;
19591 if (live_regs_mask & (1 << LR_REGNUM))
19593 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19594 /* If returning from an interrupt, restore the CPSR. */
19595 if (IS_INTERRUPT (func_type))
19596 strcat (p, "^");
19598 else
19599 strcpy (p, "}");
19602 output_asm_insn (instr, & operand);
19604 /* See if we need to generate an extra instruction to
19605 perform the actual function return. */
19606 if (really_return
19607 && func_type != ARM_FT_INTERWORKED
19608 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19610 /* The return has already been handled
19611 by loading the LR into the PC. */
19612 return "";
19616 if (really_return)
19618 switch ((int) ARM_FUNC_TYPE (func_type))
19620 case ARM_FT_ISR:
19621 case ARM_FT_FIQ:
19622 /* ??? This is wrong for unified assembly syntax. */
19623 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19624 break;
19626 case ARM_FT_INTERWORKED:
19627 gcc_assert (arm_arch5 || arm_arch4t);
19628 sprintf (instr, "bx%s\t%%|lr", conditional);
19629 break;
19631 case ARM_FT_EXCEPTION:
19632 /* ??? This is wrong for unified assembly syntax. */
19633 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19634 break;
19636 default:
19637 if (IS_CMSE_ENTRY (func_type))
19639 /* Check if we have to clear the 'GE bits' which is only used if
19640 parallel add and subtraction instructions are available. */
19641 if (TARGET_INT_SIMD)
19642 snprintf (instr, sizeof (instr),
19643 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19644 else
19645 snprintf (instr, sizeof (instr),
19646 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19648 output_asm_insn (instr, & operand);
19649 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19651 /* Clear the cumulative exception-status bits (0-4,7) and the
19652 condition code bits (28-31) of the FPSCR. We need to
19653 remember to clear the first scratch register used (IP) and
19654 save and restore the second (r4). */
19655 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19656 output_asm_insn (instr, & operand);
19657 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19658 output_asm_insn (instr, & operand);
19659 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19660 output_asm_insn (instr, & operand);
19661 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19662 output_asm_insn (instr, & operand);
19663 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19664 output_asm_insn (instr, & operand);
19665 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19666 output_asm_insn (instr, & operand);
19667 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19668 output_asm_insn (instr, & operand);
19669 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19670 output_asm_insn (instr, & operand);
19672 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19674 /* Use bx if it's available. */
19675 else if (arm_arch5 || arm_arch4t)
19676 sprintf (instr, "bx%s\t%%|lr", conditional);
19677 else
19678 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19679 break;
19682 output_asm_insn (instr, & operand);
19685 return "";
19688 /* Output in FILE asm statements needed to declare the NAME of the function
19689 defined by its DECL node. */
19691 void
19692 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19694 size_t cmse_name_len;
19695 char *cmse_name = 0;
19696 char cmse_prefix[] = "__acle_se_";
19698 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19699 extra function label for each function with the 'cmse_nonsecure_entry'
19700 attribute. This extra function label should be prepended with
19701 '__acle_se_', telling the linker that it needs to create secure gateway
19702 veneers for this function. */
19703 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19704 DECL_ATTRIBUTES (decl)))
19706 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19707 cmse_name = XALLOCAVEC (char, cmse_name_len);
19708 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19709 targetm.asm_out.globalize_label (file, cmse_name);
19711 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19712 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19715 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19716 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19717 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19718 ASM_OUTPUT_LABEL (file, name);
19720 if (cmse_name)
19721 ASM_OUTPUT_LABEL (file, cmse_name);
19723 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19726 /* Write the function name into the code section, directly preceding
19727 the function prologue.
19729 Code will be output similar to this:
19731 .ascii "arm_poke_function_name", 0
19732 .align
19734 .word 0xff000000 + (t1 - t0)
19735 arm_poke_function_name
19736 mov ip, sp
19737 stmfd sp!, {fp, ip, lr, pc}
19738 sub fp, ip, #4
19740 When performing a stack backtrace, code can inspect the value
19741 of 'pc' stored at 'fp' + 0. If the trace function then looks
19742 at location pc - 12 and the top 8 bits are set, then we know
19743 that there is a function name embedded immediately preceding this
19744 location and has length ((pc[-3]) & 0xff000000).
19746 We assume that pc is declared as a pointer to an unsigned long.
19748 It is of no benefit to output the function name if we are assembling
19749 a leaf function. These function types will not contain a stack
19750 backtrace structure, therefore it is not possible to determine the
19751 function name. */
19752 void
19753 arm_poke_function_name (FILE *stream, const char *name)
19755 unsigned long alignlength;
19756 unsigned long length;
19757 rtx x;
19759 length = strlen (name) + 1;
19760 alignlength = ROUND_UP_WORD (length);
19762 ASM_OUTPUT_ASCII (stream, name, length);
19763 ASM_OUTPUT_ALIGN (stream, 2);
19764 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19765 assemble_aligned_integer (UNITS_PER_WORD, x);
19768 /* Place some comments into the assembler stream
19769 describing the current function. */
19770 static void
19771 arm_output_function_prologue (FILE *f)
19773 unsigned long func_type;
19775 /* Sanity check. */
19776 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19778 func_type = arm_current_func_type ();
19780 switch ((int) ARM_FUNC_TYPE (func_type))
19782 default:
19783 case ARM_FT_NORMAL:
19784 break;
19785 case ARM_FT_INTERWORKED:
19786 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19787 break;
19788 case ARM_FT_ISR:
19789 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19790 break;
19791 case ARM_FT_FIQ:
19792 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19793 break;
19794 case ARM_FT_EXCEPTION:
19795 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19796 break;
19799 if (IS_NAKED (func_type))
19800 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19802 if (IS_VOLATILE (func_type))
19803 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19805 if (IS_NESTED (func_type))
19806 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19807 if (IS_STACKALIGN (func_type))
19808 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19809 if (IS_CMSE_ENTRY (func_type))
19810 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19812 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19813 crtl->args.size,
19814 crtl->args.pretend_args_size,
19815 (HOST_WIDE_INT) get_frame_size ());
19817 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19818 frame_pointer_needed,
19819 cfun->machine->uses_anonymous_args);
19821 if (cfun->machine->lr_save_eliminated)
19822 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19824 if (crtl->calls_eh_return)
19825 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19829 static void
19830 arm_output_function_epilogue (FILE *)
19832 arm_stack_offsets *offsets;
19834 if (TARGET_THUMB1)
19836 int regno;
19838 /* Emit any call-via-reg trampolines that are needed for v4t support
19839 of call_reg and call_value_reg type insns. */
19840 for (regno = 0; regno < LR_REGNUM; regno++)
19842 rtx label = cfun->machine->call_via[regno];
19844 if (label != NULL)
19846 switch_to_section (function_section (current_function_decl));
19847 targetm.asm_out.internal_label (asm_out_file, "L",
19848 CODE_LABEL_NUMBER (label));
19849 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19853 /* ??? Probably not safe to set this here, since it assumes that a
19854 function will be emitted as assembly immediately after we generate
19855 RTL for it. This does not happen for inline functions. */
19856 cfun->machine->return_used_this_function = 0;
19858 else /* TARGET_32BIT */
19860 /* We need to take into account any stack-frame rounding. */
19861 offsets = arm_get_frame_offsets ();
19863 gcc_assert (!use_return_insn (FALSE, NULL)
19864 || (cfun->machine->return_used_this_function != 0)
19865 || offsets->saved_regs == offsets->outgoing_args
19866 || frame_pointer_needed);
19870 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19871 STR and STRD. If an even number of registers are being pushed, one
19872 or more STRD patterns are created for each register pair. If an
19873 odd number of registers are pushed, emit an initial STR followed by
19874 as many STRD instructions as are needed. This works best when the
19875 stack is initially 64-bit aligned (the normal case), since it
19876 ensures that each STRD is also 64-bit aligned. */
19877 static void
19878 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19880 int num_regs = 0;
19881 int i;
19882 int regno;
19883 rtx par = NULL_RTX;
19884 rtx dwarf = NULL_RTX;
19885 rtx tmp;
19886 bool first = true;
19888 num_regs = bit_count (saved_regs_mask);
19890 /* Must be at least one register to save, and can't save SP or PC. */
19891 gcc_assert (num_regs > 0 && num_regs <= 14);
19892 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19893 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19895 /* Create sequence for DWARF info. All the frame-related data for
19896 debugging is held in this wrapper. */
19897 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19899 /* Describe the stack adjustment. */
19900 tmp = gen_rtx_SET (stack_pointer_rtx,
19901 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19902 RTX_FRAME_RELATED_P (tmp) = 1;
19903 XVECEXP (dwarf, 0, 0) = tmp;
19905 /* Find the first register. */
19906 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19909 i = 0;
19911 /* If there's an odd number of registers to push. Start off by
19912 pushing a single register. This ensures that subsequent strd
19913 operations are dword aligned (assuming that SP was originally
19914 64-bit aligned). */
19915 if ((num_regs & 1) != 0)
19917 rtx reg, mem, insn;
19919 reg = gen_rtx_REG (SImode, regno);
19920 if (num_regs == 1)
19921 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19922 stack_pointer_rtx));
19923 else
19924 mem = gen_frame_mem (Pmode,
19925 gen_rtx_PRE_MODIFY
19926 (Pmode, stack_pointer_rtx,
19927 plus_constant (Pmode, stack_pointer_rtx,
19928 -4 * num_regs)));
19930 tmp = gen_rtx_SET (mem, reg);
19931 RTX_FRAME_RELATED_P (tmp) = 1;
19932 insn = emit_insn (tmp);
19933 RTX_FRAME_RELATED_P (insn) = 1;
19934 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19935 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19936 RTX_FRAME_RELATED_P (tmp) = 1;
19937 i++;
19938 regno++;
19939 XVECEXP (dwarf, 0, i) = tmp;
19940 first = false;
19943 while (i < num_regs)
19944 if (saved_regs_mask & (1 << regno))
19946 rtx reg1, reg2, mem1, mem2;
19947 rtx tmp0, tmp1, tmp2;
19948 int regno2;
19950 /* Find the register to pair with this one. */
19951 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19952 regno2++)
19955 reg1 = gen_rtx_REG (SImode, regno);
19956 reg2 = gen_rtx_REG (SImode, regno2);
19958 if (first)
19960 rtx insn;
19962 first = false;
19963 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19964 stack_pointer_rtx,
19965 -4 * num_regs));
19966 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19967 stack_pointer_rtx,
19968 -4 * (num_regs - 1)));
19969 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19970 plus_constant (Pmode, stack_pointer_rtx,
19971 -4 * (num_regs)));
19972 tmp1 = gen_rtx_SET (mem1, reg1);
19973 tmp2 = gen_rtx_SET (mem2, reg2);
19974 RTX_FRAME_RELATED_P (tmp0) = 1;
19975 RTX_FRAME_RELATED_P (tmp1) = 1;
19976 RTX_FRAME_RELATED_P (tmp2) = 1;
19977 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19978 XVECEXP (par, 0, 0) = tmp0;
19979 XVECEXP (par, 0, 1) = tmp1;
19980 XVECEXP (par, 0, 2) = tmp2;
19981 insn = emit_insn (par);
19982 RTX_FRAME_RELATED_P (insn) = 1;
19983 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19985 else
19987 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19988 stack_pointer_rtx,
19989 4 * i));
19990 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19991 stack_pointer_rtx,
19992 4 * (i + 1)));
19993 tmp1 = gen_rtx_SET (mem1, reg1);
19994 tmp2 = gen_rtx_SET (mem2, reg2);
19995 RTX_FRAME_RELATED_P (tmp1) = 1;
19996 RTX_FRAME_RELATED_P (tmp2) = 1;
19997 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19998 XVECEXP (par, 0, 0) = tmp1;
19999 XVECEXP (par, 0, 1) = tmp2;
20000 emit_insn (par);
20003 /* Create unwind information. This is an approximation. */
20004 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20005 plus_constant (Pmode,
20006 stack_pointer_rtx,
20007 4 * i)),
20008 reg1);
20009 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20010 plus_constant (Pmode,
20011 stack_pointer_rtx,
20012 4 * (i + 1))),
20013 reg2);
20015 RTX_FRAME_RELATED_P (tmp1) = 1;
20016 RTX_FRAME_RELATED_P (tmp2) = 1;
20017 XVECEXP (dwarf, 0, i + 1) = tmp1;
20018 XVECEXP (dwarf, 0, i + 2) = tmp2;
20019 i += 2;
20020 regno = regno2 + 1;
20022 else
20023 regno++;
20025 return;
20028 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20029 whenever possible, otherwise it emits single-word stores. The first store
20030 also allocates stack space for all saved registers, using writeback with
20031 post-addressing mode. All other stores use offset addressing. If no STRD
20032 can be emitted, this function emits a sequence of single-word stores,
20033 and not an STM as before, because single-word stores provide more freedom
20034 scheduling and can be turned into an STM by peephole optimizations. */
20035 static void
20036 arm_emit_strd_push (unsigned long saved_regs_mask)
20038 int num_regs = 0;
20039 int i, j, dwarf_index = 0;
20040 int offset = 0;
20041 rtx dwarf = NULL_RTX;
20042 rtx insn = NULL_RTX;
20043 rtx tmp, mem;
20045 /* TODO: A more efficient code can be emitted by changing the
20046 layout, e.g., first push all pairs that can use STRD to keep the
20047 stack aligned, and then push all other registers. */
20048 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20049 if (saved_regs_mask & (1 << i))
20050 num_regs++;
20052 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20053 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20054 gcc_assert (num_regs > 0);
20056 /* Create sequence for DWARF info. */
20057 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20059 /* For dwarf info, we generate explicit stack update. */
20060 tmp = gen_rtx_SET (stack_pointer_rtx,
20061 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20062 RTX_FRAME_RELATED_P (tmp) = 1;
20063 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20065 /* Save registers. */
20066 offset = - 4 * num_regs;
20067 j = 0;
20068 while (j <= LAST_ARM_REGNUM)
20069 if (saved_regs_mask & (1 << j))
20071 if ((j % 2 == 0)
20072 && (saved_regs_mask & (1 << (j + 1))))
20074 /* Current register and previous register form register pair for
20075 which STRD can be generated. */
20076 if (offset < 0)
20078 /* Allocate stack space for all saved registers. */
20079 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20080 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20081 mem = gen_frame_mem (DImode, tmp);
20082 offset = 0;
20084 else if (offset > 0)
20085 mem = gen_frame_mem (DImode,
20086 plus_constant (Pmode,
20087 stack_pointer_rtx,
20088 offset));
20089 else
20090 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20092 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20093 RTX_FRAME_RELATED_P (tmp) = 1;
20094 tmp = emit_insn (tmp);
20096 /* Record the first store insn. */
20097 if (dwarf_index == 1)
20098 insn = tmp;
20100 /* Generate dwarf info. */
20101 mem = gen_frame_mem (SImode,
20102 plus_constant (Pmode,
20103 stack_pointer_rtx,
20104 offset));
20105 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20106 RTX_FRAME_RELATED_P (tmp) = 1;
20107 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20109 mem = gen_frame_mem (SImode,
20110 plus_constant (Pmode,
20111 stack_pointer_rtx,
20112 offset + 4));
20113 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20114 RTX_FRAME_RELATED_P (tmp) = 1;
20115 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20117 offset += 8;
20118 j += 2;
20120 else
20122 /* Emit a single word store. */
20123 if (offset < 0)
20125 /* Allocate stack space for all saved registers. */
20126 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20127 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20128 mem = gen_frame_mem (SImode, tmp);
20129 offset = 0;
20131 else if (offset > 0)
20132 mem = gen_frame_mem (SImode,
20133 plus_constant (Pmode,
20134 stack_pointer_rtx,
20135 offset));
20136 else
20137 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20139 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20140 RTX_FRAME_RELATED_P (tmp) = 1;
20141 tmp = emit_insn (tmp);
20143 /* Record the first store insn. */
20144 if (dwarf_index == 1)
20145 insn = tmp;
20147 /* Generate dwarf info. */
20148 mem = gen_frame_mem (SImode,
20149 plus_constant(Pmode,
20150 stack_pointer_rtx,
20151 offset));
20152 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20153 RTX_FRAME_RELATED_P (tmp) = 1;
20154 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20156 offset += 4;
20157 j += 1;
20160 else
20161 j++;
20163 /* Attach dwarf info to the first insn we generate. */
20164 gcc_assert (insn != NULL_RTX);
20165 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20166 RTX_FRAME_RELATED_P (insn) = 1;
20169 /* Generate and emit an insn that we will recognize as a push_multi.
20170 Unfortunately, since this insn does not reflect very well the actual
20171 semantics of the operation, we need to annotate the insn for the benefit
20172 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20173 MASK for registers that should be annotated for DWARF2 frame unwind
20174 information. */
20175 static rtx
20176 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20178 int num_regs = 0;
20179 int num_dwarf_regs = 0;
20180 int i, j;
20181 rtx par;
20182 rtx dwarf;
20183 int dwarf_par_index;
20184 rtx tmp, reg;
20186 /* We don't record the PC in the dwarf frame information. */
20187 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20189 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20191 if (mask & (1 << i))
20192 num_regs++;
20193 if (dwarf_regs_mask & (1 << i))
20194 num_dwarf_regs++;
20197 gcc_assert (num_regs && num_regs <= 16);
20198 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20200 /* For the body of the insn we are going to generate an UNSPEC in
20201 parallel with several USEs. This allows the insn to be recognized
20202 by the push_multi pattern in the arm.md file.
20204 The body of the insn looks something like this:
20206 (parallel [
20207 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20208 (const_int:SI <num>)))
20209 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20210 (use (reg:SI XX))
20211 (use (reg:SI YY))
20215 For the frame note however, we try to be more explicit and actually
20216 show each register being stored into the stack frame, plus a (single)
20217 decrement of the stack pointer. We do it this way in order to be
20218 friendly to the stack unwinding code, which only wants to see a single
20219 stack decrement per instruction. The RTL we generate for the note looks
20220 something like this:
20222 (sequence [
20223 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20224 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20225 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20226 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20230 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20231 instead we'd have a parallel expression detailing all
20232 the stores to the various memory addresses so that debug
20233 information is more up-to-date. Remember however while writing
20234 this to take care of the constraints with the push instruction.
20236 Note also that this has to be taken care of for the VFP registers.
20238 For more see PR43399. */
20240 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20241 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20242 dwarf_par_index = 1;
20244 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20246 if (mask & (1 << i))
20248 reg = gen_rtx_REG (SImode, i);
20250 XVECEXP (par, 0, 0)
20251 = gen_rtx_SET (gen_frame_mem
20252 (BLKmode,
20253 gen_rtx_PRE_MODIFY (Pmode,
20254 stack_pointer_rtx,
20255 plus_constant
20256 (Pmode, stack_pointer_rtx,
20257 -4 * num_regs))
20259 gen_rtx_UNSPEC (BLKmode,
20260 gen_rtvec (1, reg),
20261 UNSPEC_PUSH_MULT));
20263 if (dwarf_regs_mask & (1 << i))
20265 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20266 reg);
20267 RTX_FRAME_RELATED_P (tmp) = 1;
20268 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20271 break;
20275 for (j = 1, i++; j < num_regs; i++)
20277 if (mask & (1 << i))
20279 reg = gen_rtx_REG (SImode, i);
20281 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20283 if (dwarf_regs_mask & (1 << i))
20286 = gen_rtx_SET (gen_frame_mem
20287 (SImode,
20288 plus_constant (Pmode, stack_pointer_rtx,
20289 4 * j)),
20290 reg);
20291 RTX_FRAME_RELATED_P (tmp) = 1;
20292 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20295 j++;
20299 par = emit_insn (par);
20301 tmp = gen_rtx_SET (stack_pointer_rtx,
20302 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20303 RTX_FRAME_RELATED_P (tmp) = 1;
20304 XVECEXP (dwarf, 0, 0) = tmp;
20306 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20308 return par;
20311 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20312 SIZE is the offset to be adjusted.
20313 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20314 static void
20315 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20317 rtx dwarf;
20319 RTX_FRAME_RELATED_P (insn) = 1;
20320 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20321 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20324 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20325 SAVED_REGS_MASK shows which registers need to be restored.
20327 Unfortunately, since this insn does not reflect very well the actual
20328 semantics of the operation, we need to annotate the insn for the benefit
20329 of DWARF2 frame unwind information. */
20330 static void
20331 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20333 int num_regs = 0;
20334 int i, j;
20335 rtx par;
20336 rtx dwarf = NULL_RTX;
20337 rtx tmp, reg;
20338 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20339 int offset_adj;
20340 int emit_update;
20342 offset_adj = return_in_pc ? 1 : 0;
20343 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20344 if (saved_regs_mask & (1 << i))
20345 num_regs++;
20347 gcc_assert (num_regs && num_regs <= 16);
20349 /* If SP is in reglist, then we don't emit SP update insn. */
20350 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20352 /* The parallel needs to hold num_regs SETs
20353 and one SET for the stack update. */
20354 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20356 if (return_in_pc)
20357 XVECEXP (par, 0, 0) = ret_rtx;
20359 if (emit_update)
20361 /* Increment the stack pointer, based on there being
20362 num_regs 4-byte registers to restore. */
20363 tmp = gen_rtx_SET (stack_pointer_rtx,
20364 plus_constant (Pmode,
20365 stack_pointer_rtx,
20366 4 * num_regs));
20367 RTX_FRAME_RELATED_P (tmp) = 1;
20368 XVECEXP (par, 0, offset_adj) = tmp;
20371 /* Now restore every reg, which may include PC. */
20372 for (j = 0, i = 0; j < num_regs; i++)
20373 if (saved_regs_mask & (1 << i))
20375 reg = gen_rtx_REG (SImode, i);
20376 if ((num_regs == 1) && emit_update && !return_in_pc)
20378 /* Emit single load with writeback. */
20379 tmp = gen_frame_mem (SImode,
20380 gen_rtx_POST_INC (Pmode,
20381 stack_pointer_rtx));
20382 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20383 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20384 return;
20387 tmp = gen_rtx_SET (reg,
20388 gen_frame_mem
20389 (SImode,
20390 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20391 RTX_FRAME_RELATED_P (tmp) = 1;
20392 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20394 /* We need to maintain a sequence for DWARF info too. As dwarf info
20395 should not have PC, skip PC. */
20396 if (i != PC_REGNUM)
20397 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20399 j++;
20402 if (return_in_pc)
20403 par = emit_jump_insn (par);
20404 else
20405 par = emit_insn (par);
20407 REG_NOTES (par) = dwarf;
20408 if (!return_in_pc)
20409 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20410 stack_pointer_rtx, stack_pointer_rtx);
20413 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20414 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20416 Unfortunately, since this insn does not reflect very well the actual
20417 semantics of the operation, we need to annotate the insn for the benefit
20418 of DWARF2 frame unwind information. */
20419 static void
20420 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20422 int i, j;
20423 rtx par;
20424 rtx dwarf = NULL_RTX;
20425 rtx tmp, reg;
20427 gcc_assert (num_regs && num_regs <= 32);
20429 /* Workaround ARM10 VFPr1 bug. */
20430 if (num_regs == 2 && !arm_arch6)
20432 if (first_reg == 15)
20433 first_reg--;
20435 num_regs++;
20438 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20439 there could be up to 32 D-registers to restore.
20440 If there are more than 16 D-registers, make two recursive calls,
20441 each of which emits one pop_multi instruction. */
20442 if (num_regs > 16)
20444 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20445 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20446 return;
20449 /* The parallel needs to hold num_regs SETs
20450 and one SET for the stack update. */
20451 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20453 /* Increment the stack pointer, based on there being
20454 num_regs 8-byte registers to restore. */
20455 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20456 RTX_FRAME_RELATED_P (tmp) = 1;
20457 XVECEXP (par, 0, 0) = tmp;
20459 /* Now show every reg that will be restored, using a SET for each. */
20460 for (j = 0, i=first_reg; j < num_regs; i += 2)
20462 reg = gen_rtx_REG (DFmode, i);
20464 tmp = gen_rtx_SET (reg,
20465 gen_frame_mem
20466 (DFmode,
20467 plus_constant (Pmode, base_reg, 8 * j)));
20468 RTX_FRAME_RELATED_P (tmp) = 1;
20469 XVECEXP (par, 0, j + 1) = tmp;
20471 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20473 j++;
20476 par = emit_insn (par);
20477 REG_NOTES (par) = dwarf;
20479 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20480 if (REGNO (base_reg) == IP_REGNUM)
20482 RTX_FRAME_RELATED_P (par) = 1;
20483 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20485 else
20486 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20487 base_reg, base_reg);
20490 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20491 number of registers are being popped, multiple LDRD patterns are created for
20492 all register pairs. If odd number of registers are popped, last register is
20493 loaded by using LDR pattern. */
20494 static void
20495 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20497 int num_regs = 0;
20498 int i, j;
20499 rtx par = NULL_RTX;
20500 rtx dwarf = NULL_RTX;
20501 rtx tmp, reg, tmp1;
20502 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20504 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20505 if (saved_regs_mask & (1 << i))
20506 num_regs++;
20508 gcc_assert (num_regs && num_regs <= 16);
20510 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20511 to be popped. So, if num_regs is even, now it will become odd,
20512 and we can generate pop with PC. If num_regs is odd, it will be
20513 even now, and ldr with return can be generated for PC. */
20514 if (return_in_pc)
20515 num_regs--;
20517 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20519 /* Var j iterates over all the registers to gather all the registers in
20520 saved_regs_mask. Var i gives index of saved registers in stack frame.
20521 A PARALLEL RTX of register-pair is created here, so that pattern for
20522 LDRD can be matched. As PC is always last register to be popped, and
20523 we have already decremented num_regs if PC, we don't have to worry
20524 about PC in this loop. */
20525 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20526 if (saved_regs_mask & (1 << j))
20528 /* Create RTX for memory load. */
20529 reg = gen_rtx_REG (SImode, j);
20530 tmp = gen_rtx_SET (reg,
20531 gen_frame_mem (SImode,
20532 plus_constant (Pmode,
20533 stack_pointer_rtx, 4 * i)));
20534 RTX_FRAME_RELATED_P (tmp) = 1;
20536 if (i % 2 == 0)
20538 /* When saved-register index (i) is even, the RTX to be emitted is
20539 yet to be created. Hence create it first. The LDRD pattern we
20540 are generating is :
20541 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20542 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20543 where target registers need not be consecutive. */
20544 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20545 dwarf = NULL_RTX;
20548 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20549 added as 0th element and if i is odd, reg_i is added as 1st element
20550 of LDRD pattern shown above. */
20551 XVECEXP (par, 0, (i % 2)) = tmp;
20552 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20554 if ((i % 2) == 1)
20556 /* When saved-register index (i) is odd, RTXs for both the registers
20557 to be loaded are generated in above given LDRD pattern, and the
20558 pattern can be emitted now. */
20559 par = emit_insn (par);
20560 REG_NOTES (par) = dwarf;
20561 RTX_FRAME_RELATED_P (par) = 1;
20564 i++;
20567 /* If the number of registers pushed is odd AND return_in_pc is false OR
20568 number of registers are even AND return_in_pc is true, last register is
20569 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20570 then LDR with post increment. */
20572 /* Increment the stack pointer, based on there being
20573 num_regs 4-byte registers to restore. */
20574 tmp = gen_rtx_SET (stack_pointer_rtx,
20575 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20576 RTX_FRAME_RELATED_P (tmp) = 1;
20577 tmp = emit_insn (tmp);
20578 if (!return_in_pc)
20580 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20581 stack_pointer_rtx, stack_pointer_rtx);
20584 dwarf = NULL_RTX;
20586 if (((num_regs % 2) == 1 && !return_in_pc)
20587 || ((num_regs % 2) == 0 && return_in_pc))
20589 /* Scan for the single register to be popped. Skip until the saved
20590 register is found. */
20591 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20593 /* Gen LDR with post increment here. */
20594 tmp1 = gen_rtx_MEM (SImode,
20595 gen_rtx_POST_INC (SImode,
20596 stack_pointer_rtx));
20597 set_mem_alias_set (tmp1, get_frame_alias_set ());
20599 reg = gen_rtx_REG (SImode, j);
20600 tmp = gen_rtx_SET (reg, tmp1);
20601 RTX_FRAME_RELATED_P (tmp) = 1;
20602 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20604 if (return_in_pc)
20606 /* If return_in_pc, j must be PC_REGNUM. */
20607 gcc_assert (j == PC_REGNUM);
20608 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20609 XVECEXP (par, 0, 0) = ret_rtx;
20610 XVECEXP (par, 0, 1) = tmp;
20611 par = emit_jump_insn (par);
20613 else
20615 par = emit_insn (tmp);
20616 REG_NOTES (par) = dwarf;
20617 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20618 stack_pointer_rtx, stack_pointer_rtx);
20622 else if ((num_regs % 2) == 1 && return_in_pc)
20624 /* There are 2 registers to be popped. So, generate the pattern
20625 pop_multiple_with_stack_update_and_return to pop in PC. */
20626 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20629 return;
20632 /* LDRD in ARM mode needs consecutive registers as operands. This function
20633 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20634 offset addressing and then generates one separate stack udpate. This provides
20635 more scheduling freedom, compared to writeback on every load. However,
20636 if the function returns using load into PC directly
20637 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20638 before the last load. TODO: Add a peephole optimization to recognize
20639 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20640 peephole optimization to merge the load at stack-offset zero
20641 with the stack update instruction using load with writeback
20642 in post-index addressing mode. */
20643 static void
20644 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20646 int j = 0;
20647 int offset = 0;
20648 rtx par = NULL_RTX;
20649 rtx dwarf = NULL_RTX;
20650 rtx tmp, mem;
20652 /* Restore saved registers. */
20653 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20654 j = 0;
20655 while (j <= LAST_ARM_REGNUM)
20656 if (saved_regs_mask & (1 << j))
20658 if ((j % 2) == 0
20659 && (saved_regs_mask & (1 << (j + 1)))
20660 && (j + 1) != PC_REGNUM)
20662 /* Current register and next register form register pair for which
20663 LDRD can be generated. PC is always the last register popped, and
20664 we handle it separately. */
20665 if (offset > 0)
20666 mem = gen_frame_mem (DImode,
20667 plus_constant (Pmode,
20668 stack_pointer_rtx,
20669 offset));
20670 else
20671 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20673 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20674 tmp = emit_insn (tmp);
20675 RTX_FRAME_RELATED_P (tmp) = 1;
20677 /* Generate dwarf info. */
20679 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20680 gen_rtx_REG (SImode, j),
20681 NULL_RTX);
20682 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20683 gen_rtx_REG (SImode, j + 1),
20684 dwarf);
20686 REG_NOTES (tmp) = dwarf;
20688 offset += 8;
20689 j += 2;
20691 else if (j != PC_REGNUM)
20693 /* Emit a single word load. */
20694 if (offset > 0)
20695 mem = gen_frame_mem (SImode,
20696 plus_constant (Pmode,
20697 stack_pointer_rtx,
20698 offset));
20699 else
20700 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20702 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20703 tmp = emit_insn (tmp);
20704 RTX_FRAME_RELATED_P (tmp) = 1;
20706 /* Generate dwarf info. */
20707 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20708 gen_rtx_REG (SImode, j),
20709 NULL_RTX);
20711 offset += 4;
20712 j += 1;
20714 else /* j == PC_REGNUM */
20715 j++;
20717 else
20718 j++;
20720 /* Update the stack. */
20721 if (offset > 0)
20723 tmp = gen_rtx_SET (stack_pointer_rtx,
20724 plus_constant (Pmode,
20725 stack_pointer_rtx,
20726 offset));
20727 tmp = emit_insn (tmp);
20728 arm_add_cfa_adjust_cfa_note (tmp, offset,
20729 stack_pointer_rtx, stack_pointer_rtx);
20730 offset = 0;
20733 if (saved_regs_mask & (1 << PC_REGNUM))
20735 /* Only PC is to be popped. */
20736 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20737 XVECEXP (par, 0, 0) = ret_rtx;
20738 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20739 gen_frame_mem (SImode,
20740 gen_rtx_POST_INC (SImode,
20741 stack_pointer_rtx)));
20742 RTX_FRAME_RELATED_P (tmp) = 1;
20743 XVECEXP (par, 0, 1) = tmp;
20744 par = emit_jump_insn (par);
20746 /* Generate dwarf info. */
20747 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20748 gen_rtx_REG (SImode, PC_REGNUM),
20749 NULL_RTX);
20750 REG_NOTES (par) = dwarf;
20751 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20752 stack_pointer_rtx, stack_pointer_rtx);
20756 /* Calculate the size of the return value that is passed in registers. */
20757 static unsigned
20758 arm_size_return_regs (void)
20760 machine_mode mode;
20762 if (crtl->return_rtx != 0)
20763 mode = GET_MODE (crtl->return_rtx);
20764 else
20765 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20767 return GET_MODE_SIZE (mode);
20770 /* Return true if the current function needs to save/restore LR. */
20771 static bool
20772 thumb_force_lr_save (void)
20774 return !cfun->machine->lr_save_eliminated
20775 && (!crtl->is_leaf
20776 || thumb_far_jump_used_p ()
20777 || df_regs_ever_live_p (LR_REGNUM));
20780 /* We do not know if r3 will be available because
20781 we do have an indirect tailcall happening in this
20782 particular case. */
20783 static bool
20784 is_indirect_tailcall_p (rtx call)
20786 rtx pat = PATTERN (call);
20788 /* Indirect tail call. */
20789 pat = XVECEXP (pat, 0, 0);
20790 if (GET_CODE (pat) == SET)
20791 pat = SET_SRC (pat);
20793 pat = XEXP (XEXP (pat, 0), 0);
20794 return REG_P (pat);
20797 /* Return true if r3 is used by any of the tail call insns in the
20798 current function. */
20799 static bool
20800 any_sibcall_could_use_r3 (void)
20802 edge_iterator ei;
20803 edge e;
20805 if (!crtl->tail_call_emit)
20806 return false;
20807 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20808 if (e->flags & EDGE_SIBCALL)
20810 rtx_insn *call = BB_END (e->src);
20811 if (!CALL_P (call))
20812 call = prev_nonnote_nondebug_insn (call);
20813 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20814 if (find_regno_fusage (call, USE, 3)
20815 || is_indirect_tailcall_p (call))
20816 return true;
20818 return false;
20822 /* Compute the distance from register FROM to register TO.
20823 These can be the arg pointer (26), the soft frame pointer (25),
20824 the stack pointer (13) or the hard frame pointer (11).
20825 In thumb mode r7 is used as the soft frame pointer, if needed.
20826 Typical stack layout looks like this:
20828 old stack pointer -> | |
20829 ----
20830 | | \
20831 | | saved arguments for
20832 | | vararg functions
20833 | | /
20835 hard FP & arg pointer -> | | \
20836 | | stack
20837 | | frame
20838 | | /
20840 | | \
20841 | | call saved
20842 | | registers
20843 soft frame pointer -> | | /
20845 | | \
20846 | | local
20847 | | variables
20848 locals base pointer -> | | /
20850 | | \
20851 | | outgoing
20852 | | arguments
20853 current stack pointer -> | | /
20856 For a given function some or all of these stack components
20857 may not be needed, giving rise to the possibility of
20858 eliminating some of the registers.
20860 The values returned by this function must reflect the behavior
20861 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20863 The sign of the number returned reflects the direction of stack
20864 growth, so the values are positive for all eliminations except
20865 from the soft frame pointer to the hard frame pointer.
20867 SFP may point just inside the local variables block to ensure correct
20868 alignment. */
20871 /* Return cached stack offsets. */
20873 static arm_stack_offsets *
20874 arm_get_frame_offsets (void)
20876 struct arm_stack_offsets *offsets;
20878 offsets = &cfun->machine->stack_offsets;
20880 return offsets;
20884 /* Calculate stack offsets. These are used to calculate register elimination
20885 offsets and in prologue/epilogue code. Also calculates which registers
20886 should be saved. */
20888 static void
20889 arm_compute_frame_layout (void)
20891 struct arm_stack_offsets *offsets;
20892 unsigned long func_type;
20893 int saved;
20894 int core_saved;
20895 HOST_WIDE_INT frame_size;
20896 int i;
20898 offsets = &cfun->machine->stack_offsets;
20900 /* Initially this is the size of the local variables. It will translated
20901 into an offset once we have determined the size of preceding data. */
20902 frame_size = ROUND_UP_WORD (get_frame_size ());
20904 /* Space for variadic functions. */
20905 offsets->saved_args = crtl->args.pretend_args_size;
20907 /* In Thumb mode this is incorrect, but never used. */
20908 offsets->frame
20909 = (offsets->saved_args
20910 + arm_compute_static_chain_stack_bytes ()
20911 + (frame_pointer_needed ? 4 : 0));
20913 if (TARGET_32BIT)
20915 unsigned int regno;
20917 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20918 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20919 saved = core_saved;
20921 /* We know that SP will be doubleword aligned on entry, and we must
20922 preserve that condition at any subroutine call. We also require the
20923 soft frame pointer to be doubleword aligned. */
20925 if (TARGET_REALLY_IWMMXT)
20927 /* Check for the call-saved iWMMXt registers. */
20928 for (regno = FIRST_IWMMXT_REGNUM;
20929 regno <= LAST_IWMMXT_REGNUM;
20930 regno++)
20931 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20932 saved += 8;
20935 func_type = arm_current_func_type ();
20936 /* Space for saved VFP registers. */
20937 if (! IS_VOLATILE (func_type)
20938 && TARGET_HARD_FLOAT)
20939 saved += arm_get_vfp_saved_size ();
20941 else /* TARGET_THUMB1 */
20943 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20944 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20945 saved = core_saved;
20946 if (TARGET_BACKTRACE)
20947 saved += 16;
20950 /* Saved registers include the stack frame. */
20951 offsets->saved_regs
20952 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20953 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20955 /* A leaf function does not need any stack alignment if it has nothing
20956 on the stack. */
20957 if (crtl->is_leaf && frame_size == 0
20958 /* However if it calls alloca(), we have a dynamically allocated
20959 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20960 && ! cfun->calls_alloca)
20962 offsets->outgoing_args = offsets->soft_frame;
20963 offsets->locals_base = offsets->soft_frame;
20964 return;
20967 /* Ensure SFP has the correct alignment. */
20968 if (ARM_DOUBLEWORD_ALIGN
20969 && (offsets->soft_frame & 7))
20971 offsets->soft_frame += 4;
20972 /* Try to align stack by pushing an extra reg. Don't bother doing this
20973 when there is a stack frame as the alignment will be rolled into
20974 the normal stack adjustment. */
20975 if (frame_size + crtl->outgoing_args_size == 0)
20977 int reg = -1;
20979 /* Register r3 is caller-saved. Normally it does not need to be
20980 saved on entry by the prologue. However if we choose to save
20981 it for padding then we may confuse the compiler into thinking
20982 a prologue sequence is required when in fact it is not. This
20983 will occur when shrink-wrapping if r3 is used as a scratch
20984 register and there are no other callee-saved writes.
20986 This situation can be avoided when other callee-saved registers
20987 are available and r3 is not mandatory if we choose a callee-saved
20988 register for padding. */
20989 bool prefer_callee_reg_p = false;
20991 /* If it is safe to use r3, then do so. This sometimes
20992 generates better code on Thumb-2 by avoiding the need to
20993 use 32-bit push/pop instructions. */
20994 if (! any_sibcall_could_use_r3 ()
20995 && arm_size_return_regs () <= 12
20996 && (offsets->saved_regs_mask & (1 << 3)) == 0
20997 && (TARGET_THUMB2
20998 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21000 reg = 3;
21001 if (!TARGET_THUMB2)
21002 prefer_callee_reg_p = true;
21004 if (reg == -1
21005 || prefer_callee_reg_p)
21007 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21009 /* Avoid fixed registers; they may be changed at
21010 arbitrary times so it's unsafe to restore them
21011 during the epilogue. */
21012 if (!fixed_regs[i]
21013 && (offsets->saved_regs_mask & (1 << i)) == 0)
21015 reg = i;
21016 break;
21021 if (reg != -1)
21023 offsets->saved_regs += 4;
21024 offsets->saved_regs_mask |= (1 << reg);
21029 offsets->locals_base = offsets->soft_frame + frame_size;
21030 offsets->outgoing_args = (offsets->locals_base
21031 + crtl->outgoing_args_size);
21033 if (ARM_DOUBLEWORD_ALIGN)
21035 /* Ensure SP remains doubleword aligned. */
21036 if (offsets->outgoing_args & 7)
21037 offsets->outgoing_args += 4;
21038 gcc_assert (!(offsets->outgoing_args & 7));
21043 /* Calculate the relative offsets for the different stack pointers. Positive
21044 offsets are in the direction of stack growth. */
21046 HOST_WIDE_INT
21047 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21049 arm_stack_offsets *offsets;
21051 offsets = arm_get_frame_offsets ();
21053 /* OK, now we have enough information to compute the distances.
21054 There must be an entry in these switch tables for each pair
21055 of registers in ELIMINABLE_REGS, even if some of the entries
21056 seem to be redundant or useless. */
21057 switch (from)
21059 case ARG_POINTER_REGNUM:
21060 switch (to)
21062 case THUMB_HARD_FRAME_POINTER_REGNUM:
21063 return 0;
21065 case FRAME_POINTER_REGNUM:
21066 /* This is the reverse of the soft frame pointer
21067 to hard frame pointer elimination below. */
21068 return offsets->soft_frame - offsets->saved_args;
21070 case ARM_HARD_FRAME_POINTER_REGNUM:
21071 /* This is only non-zero in the case where the static chain register
21072 is stored above the frame. */
21073 return offsets->frame - offsets->saved_args - 4;
21075 case STACK_POINTER_REGNUM:
21076 /* If nothing has been pushed on the stack at all
21077 then this will return -4. This *is* correct! */
21078 return offsets->outgoing_args - (offsets->saved_args + 4);
21080 default:
21081 gcc_unreachable ();
21083 gcc_unreachable ();
21085 case FRAME_POINTER_REGNUM:
21086 switch (to)
21088 case THUMB_HARD_FRAME_POINTER_REGNUM:
21089 return 0;
21091 case ARM_HARD_FRAME_POINTER_REGNUM:
21092 /* The hard frame pointer points to the top entry in the
21093 stack frame. The soft frame pointer to the bottom entry
21094 in the stack frame. If there is no stack frame at all,
21095 then they are identical. */
21097 return offsets->frame - offsets->soft_frame;
21099 case STACK_POINTER_REGNUM:
21100 return offsets->outgoing_args - offsets->soft_frame;
21102 default:
21103 gcc_unreachable ();
21105 gcc_unreachable ();
21107 default:
21108 /* You cannot eliminate from the stack pointer.
21109 In theory you could eliminate from the hard frame
21110 pointer to the stack pointer, but this will never
21111 happen, since if a stack frame is not needed the
21112 hard frame pointer will never be used. */
21113 gcc_unreachable ();
21117 /* Given FROM and TO register numbers, say whether this elimination is
21118 allowed. Frame pointer elimination is automatically handled.
21120 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21121 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21122 pointer, we must eliminate FRAME_POINTER_REGNUM into
21123 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21124 ARG_POINTER_REGNUM. */
21126 bool
21127 arm_can_eliminate (const int from, const int to)
21129 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21130 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21131 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21132 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21133 true);
21136 /* Emit RTL to save coprocessor registers on function entry. Returns the
21137 number of bytes pushed. */
21139 static int
21140 arm_save_coproc_regs(void)
21142 int saved_size = 0;
21143 unsigned reg;
21144 unsigned start_reg;
21145 rtx insn;
21147 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21148 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21150 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21151 insn = gen_rtx_MEM (V2SImode, insn);
21152 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21153 RTX_FRAME_RELATED_P (insn) = 1;
21154 saved_size += 8;
21157 if (TARGET_HARD_FLOAT)
21159 start_reg = FIRST_VFP_REGNUM;
21161 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21163 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21164 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21166 if (start_reg != reg)
21167 saved_size += vfp_emit_fstmd (start_reg,
21168 (reg - start_reg) / 2);
21169 start_reg = reg + 2;
21172 if (start_reg != reg)
21173 saved_size += vfp_emit_fstmd (start_reg,
21174 (reg - start_reg) / 2);
21176 return saved_size;
21180 /* Set the Thumb frame pointer from the stack pointer. */
21182 static void
21183 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21185 HOST_WIDE_INT amount;
21186 rtx insn, dwarf;
21188 amount = offsets->outgoing_args - offsets->locals_base;
21189 if (amount < 1024)
21190 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21191 stack_pointer_rtx, GEN_INT (amount)));
21192 else
21194 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21195 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21196 expects the first two operands to be the same. */
21197 if (TARGET_THUMB2)
21199 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21200 stack_pointer_rtx,
21201 hard_frame_pointer_rtx));
21203 else
21205 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21206 hard_frame_pointer_rtx,
21207 stack_pointer_rtx));
21209 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21210 plus_constant (Pmode, stack_pointer_rtx, amount));
21211 RTX_FRAME_RELATED_P (dwarf) = 1;
21212 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21215 RTX_FRAME_RELATED_P (insn) = 1;
21218 struct scratch_reg {
21219 rtx reg;
21220 bool saved;
21223 /* Return a short-lived scratch register for use as a 2nd scratch register on
21224 function entry after the registers are saved in the prologue. This register
21225 must be released by means of release_scratch_register_on_entry. IP is not
21226 considered since it is always used as the 1st scratch register if available.
21228 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21229 mask of live registers. */
21231 static void
21232 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21233 unsigned long live_regs)
21235 int regno = -1;
21237 sr->saved = false;
21239 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21240 regno = LR_REGNUM;
21241 else
21243 unsigned int i;
21245 for (i = 4; i < 11; i++)
21246 if (regno1 != i && (live_regs & (1 << i)) != 0)
21248 regno = i;
21249 break;
21252 if (regno < 0)
21254 /* If IP is used as the 1st scratch register for a nested function,
21255 then either r3 wasn't available or is used to preserve IP. */
21256 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21257 regno1 = 3;
21258 regno = (regno1 == 3 ? 2 : 3);
21259 sr->saved
21260 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21261 regno);
21265 sr->reg = gen_rtx_REG (SImode, regno);
21266 if (sr->saved)
21268 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21269 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21270 rtx x = gen_rtx_SET (stack_pointer_rtx,
21271 plus_constant (Pmode, stack_pointer_rtx, -4));
21272 RTX_FRAME_RELATED_P (insn) = 1;
21273 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21277 /* Release a scratch register obtained from the preceding function. */
21279 static void
21280 release_scratch_register_on_entry (struct scratch_reg *sr)
21282 if (sr->saved)
21284 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21285 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21286 rtx x = gen_rtx_SET (stack_pointer_rtx,
21287 plus_constant (Pmode, stack_pointer_rtx, 4));
21288 RTX_FRAME_RELATED_P (insn) = 1;
21289 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21293 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21295 #if PROBE_INTERVAL > 4096
21296 #error Cannot use indexed addressing mode for stack probing
21297 #endif
21299 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21300 inclusive. These are offsets from the current stack pointer. REGNO1
21301 is the index number of the 1st scratch register and LIVE_REGS is the
21302 mask of live registers. */
21304 static void
21305 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21306 unsigned int regno1, unsigned long live_regs)
21308 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21310 /* See if we have a constant small number of probes to generate. If so,
21311 that's the easy case. */
21312 if (size <= PROBE_INTERVAL)
21314 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21315 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21316 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21319 /* The run-time loop is made up of 10 insns in the generic case while the
21320 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21321 else if (size <= 5 * PROBE_INTERVAL)
21323 HOST_WIDE_INT i, rem;
21325 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21326 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21327 emit_stack_probe (reg1);
21329 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21330 it exceeds SIZE. If only two probes are needed, this will not
21331 generate any code. Then probe at FIRST + SIZE. */
21332 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21334 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21335 emit_stack_probe (reg1);
21338 rem = size - (i - PROBE_INTERVAL);
21339 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21341 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21342 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21344 else
21345 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21348 /* Otherwise, do the same as above, but in a loop. Note that we must be
21349 extra careful with variables wrapping around because we might be at
21350 the very top (or the very bottom) of the address space and we have
21351 to be able to handle this case properly; in particular, we use an
21352 equality test for the loop condition. */
21353 else
21355 HOST_WIDE_INT rounded_size;
21356 struct scratch_reg sr;
21358 get_scratch_register_on_entry (&sr, regno1, live_regs);
21360 emit_move_insn (reg1, GEN_INT (first));
21363 /* Step 1: round SIZE to the previous multiple of the interval. */
21365 rounded_size = size & -PROBE_INTERVAL;
21366 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21369 /* Step 2: compute initial and final value of the loop counter. */
21371 /* TEST_ADDR = SP + FIRST. */
21372 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21374 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21375 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21378 /* Step 3: the loop
21382 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21383 probe at TEST_ADDR
21385 while (TEST_ADDR != LAST_ADDR)
21387 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21388 until it is equal to ROUNDED_SIZE. */
21390 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21393 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21394 that SIZE is equal to ROUNDED_SIZE. */
21396 if (size != rounded_size)
21398 HOST_WIDE_INT rem = size - rounded_size;
21400 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21402 emit_set_insn (sr.reg,
21403 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21404 emit_stack_probe (plus_constant (Pmode, sr.reg,
21405 PROBE_INTERVAL - rem));
21407 else
21408 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21411 release_scratch_register_on_entry (&sr);
21414 /* Make sure nothing is scheduled before we are done. */
21415 emit_insn (gen_blockage ());
21418 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21419 absolute addresses. */
21421 const char *
21422 output_probe_stack_range (rtx reg1, rtx reg2)
21424 static int labelno = 0;
21425 char loop_lab[32];
21426 rtx xops[2];
21428 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21430 /* Loop. */
21431 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21433 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21434 xops[0] = reg1;
21435 xops[1] = GEN_INT (PROBE_INTERVAL);
21436 output_asm_insn ("sub\t%0, %0, %1", xops);
21438 /* Probe at TEST_ADDR. */
21439 output_asm_insn ("str\tr0, [%0, #0]", xops);
21441 /* Test if TEST_ADDR == LAST_ADDR. */
21442 xops[1] = reg2;
21443 output_asm_insn ("cmp\t%0, %1", xops);
21445 /* Branch. */
21446 fputs ("\tbne\t", asm_out_file);
21447 assemble_name_raw (asm_out_file, loop_lab);
21448 fputc ('\n', asm_out_file);
21450 return "";
21453 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21454 function. */
21455 void
21456 arm_expand_prologue (void)
21458 rtx amount;
21459 rtx insn;
21460 rtx ip_rtx;
21461 unsigned long live_regs_mask;
21462 unsigned long func_type;
21463 int fp_offset = 0;
21464 int saved_pretend_args = 0;
21465 int saved_regs = 0;
21466 unsigned HOST_WIDE_INT args_to_push;
21467 HOST_WIDE_INT size;
21468 arm_stack_offsets *offsets;
21469 bool clobber_ip;
21471 func_type = arm_current_func_type ();
21473 /* Naked functions don't have prologues. */
21474 if (IS_NAKED (func_type))
21476 if (flag_stack_usage_info)
21477 current_function_static_stack_size = 0;
21478 return;
21481 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21482 args_to_push = crtl->args.pretend_args_size;
21484 /* Compute which register we will have to save onto the stack. */
21485 offsets = arm_get_frame_offsets ();
21486 live_regs_mask = offsets->saved_regs_mask;
21488 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21490 if (IS_STACKALIGN (func_type))
21492 rtx r0, r1;
21494 /* Handle a word-aligned stack pointer. We generate the following:
21496 mov r0, sp
21497 bic r1, r0, #7
21498 mov sp, r1
21499 <save and restore r0 in normal prologue/epilogue>
21500 mov sp, r0
21501 bx lr
21503 The unwinder doesn't need to know about the stack realignment.
21504 Just tell it we saved SP in r0. */
21505 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21507 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21508 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21510 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21511 RTX_FRAME_RELATED_P (insn) = 1;
21512 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21514 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21516 /* ??? The CFA changes here, which may cause GDB to conclude that it
21517 has entered a different function. That said, the unwind info is
21518 correct, individually, before and after this instruction because
21519 we've described the save of SP, which will override the default
21520 handling of SP as restoring from the CFA. */
21521 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21524 /* The static chain register is the same as the IP register. If it is
21525 clobbered when creating the frame, we need to save and restore it. */
21526 clobber_ip = IS_NESTED (func_type)
21527 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21528 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21529 || flag_stack_clash_protection)
21530 && !df_regs_ever_live_p (LR_REGNUM)
21531 && arm_r3_live_at_start_p ()));
21533 /* Find somewhere to store IP whilst the frame is being created.
21534 We try the following places in order:
21536 1. The last argument register r3 if it is available.
21537 2. A slot on the stack above the frame if there are no
21538 arguments to push onto the stack.
21539 3. Register r3 again, after pushing the argument registers
21540 onto the stack, if this is a varargs function.
21541 4. The last slot on the stack created for the arguments to
21542 push, if this isn't a varargs function.
21544 Note - we only need to tell the dwarf2 backend about the SP
21545 adjustment in the second variant; the static chain register
21546 doesn't need to be unwound, as it doesn't contain a value
21547 inherited from the caller. */
21548 if (clobber_ip)
21550 if (!arm_r3_live_at_start_p ())
21551 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21552 else if (args_to_push == 0)
21554 rtx addr, dwarf;
21556 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21557 saved_regs += 4;
21559 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21560 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21561 fp_offset = 4;
21563 /* Just tell the dwarf backend that we adjusted SP. */
21564 dwarf = gen_rtx_SET (stack_pointer_rtx,
21565 plus_constant (Pmode, stack_pointer_rtx,
21566 -fp_offset));
21567 RTX_FRAME_RELATED_P (insn) = 1;
21568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21570 else
21572 /* Store the args on the stack. */
21573 if (cfun->machine->uses_anonymous_args)
21575 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21576 (0xf0 >> (args_to_push / 4)) & 0xf);
21577 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21578 saved_pretend_args = 1;
21580 else
21582 rtx addr, dwarf;
21584 if (args_to_push == 4)
21585 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21586 else
21587 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21588 plus_constant (Pmode,
21589 stack_pointer_rtx,
21590 -args_to_push));
21592 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21594 /* Just tell the dwarf backend that we adjusted SP. */
21595 dwarf = gen_rtx_SET (stack_pointer_rtx,
21596 plus_constant (Pmode, stack_pointer_rtx,
21597 -args_to_push));
21598 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21601 RTX_FRAME_RELATED_P (insn) = 1;
21602 fp_offset = args_to_push;
21603 args_to_push = 0;
21607 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21609 if (IS_INTERRUPT (func_type))
21611 /* Interrupt functions must not corrupt any registers.
21612 Creating a frame pointer however, corrupts the IP
21613 register, so we must push it first. */
21614 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21616 /* Do not set RTX_FRAME_RELATED_P on this insn.
21617 The dwarf stack unwinding code only wants to see one
21618 stack decrement per function, and this is not it. If
21619 this instruction is labeled as being part of the frame
21620 creation sequence then dwarf2out_frame_debug_expr will
21621 die when it encounters the assignment of IP to FP
21622 later on, since the use of SP here establishes SP as
21623 the CFA register and not IP.
21625 Anyway this instruction is not really part of the stack
21626 frame creation although it is part of the prologue. */
21629 insn = emit_set_insn (ip_rtx,
21630 plus_constant (Pmode, stack_pointer_rtx,
21631 fp_offset));
21632 RTX_FRAME_RELATED_P (insn) = 1;
21635 if (args_to_push)
21637 /* Push the argument registers, or reserve space for them. */
21638 if (cfun->machine->uses_anonymous_args)
21639 insn = emit_multi_reg_push
21640 ((0xf0 >> (args_to_push / 4)) & 0xf,
21641 (0xf0 >> (args_to_push / 4)) & 0xf);
21642 else
21643 insn = emit_insn
21644 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21645 GEN_INT (- args_to_push)));
21646 RTX_FRAME_RELATED_P (insn) = 1;
21649 /* If this is an interrupt service routine, and the link register
21650 is going to be pushed, and we're not generating extra
21651 push of IP (needed when frame is needed and frame layout if apcs),
21652 subtracting four from LR now will mean that the function return
21653 can be done with a single instruction. */
21654 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21655 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21656 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21657 && TARGET_ARM)
21659 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21661 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21664 if (live_regs_mask)
21666 unsigned long dwarf_regs_mask = live_regs_mask;
21668 saved_regs += bit_count (live_regs_mask) * 4;
21669 if (optimize_size && !frame_pointer_needed
21670 && saved_regs == offsets->saved_regs - offsets->saved_args)
21672 /* If no coprocessor registers are being pushed and we don't have
21673 to worry about a frame pointer then push extra registers to
21674 create the stack frame. This is done in a way that does not
21675 alter the frame layout, so is independent of the epilogue. */
21676 int n;
21677 int frame;
21678 n = 0;
21679 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21680 n++;
21681 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21682 if (frame && n * 4 >= frame)
21684 n = frame / 4;
21685 live_regs_mask |= (1 << n) - 1;
21686 saved_regs += frame;
21690 if (TARGET_LDRD
21691 && current_tune->prefer_ldrd_strd
21692 && !optimize_function_for_size_p (cfun))
21694 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21695 if (TARGET_THUMB2)
21696 thumb2_emit_strd_push (live_regs_mask);
21697 else if (TARGET_ARM
21698 && !TARGET_APCS_FRAME
21699 && !IS_INTERRUPT (func_type))
21700 arm_emit_strd_push (live_regs_mask);
21701 else
21703 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21704 RTX_FRAME_RELATED_P (insn) = 1;
21707 else
21709 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21710 RTX_FRAME_RELATED_P (insn) = 1;
21714 if (! IS_VOLATILE (func_type))
21715 saved_regs += arm_save_coproc_regs ();
21717 if (frame_pointer_needed && TARGET_ARM)
21719 /* Create the new frame pointer. */
21720 if (TARGET_APCS_FRAME)
21722 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21723 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21724 RTX_FRAME_RELATED_P (insn) = 1;
21726 else
21728 insn = GEN_INT (saved_regs - (4 + fp_offset));
21729 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21730 stack_pointer_rtx, insn));
21731 RTX_FRAME_RELATED_P (insn) = 1;
21735 size = offsets->outgoing_args - offsets->saved_args;
21736 if (flag_stack_usage_info)
21737 current_function_static_stack_size = size;
21739 /* If this isn't an interrupt service routine and we have a frame, then do
21740 stack checking. We use IP as the first scratch register, except for the
21741 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21742 if (!IS_INTERRUPT (func_type)
21743 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21744 || flag_stack_clash_protection))
21746 unsigned int regno;
21748 if (!IS_NESTED (func_type) || clobber_ip)
21749 regno = IP_REGNUM;
21750 else if (df_regs_ever_live_p (LR_REGNUM))
21751 regno = LR_REGNUM;
21752 else
21753 regno = 3;
21755 if (crtl->is_leaf && !cfun->calls_alloca)
21757 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21758 arm_emit_probe_stack_range (get_stack_check_protect (),
21759 size - get_stack_check_protect (),
21760 regno, live_regs_mask);
21762 else if (size > 0)
21763 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21764 regno, live_regs_mask);
21767 /* Recover the static chain register. */
21768 if (clobber_ip)
21770 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21771 insn = gen_rtx_REG (SImode, 3);
21772 else
21774 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21775 insn = gen_frame_mem (SImode, insn);
21777 emit_set_insn (ip_rtx, insn);
21778 emit_insn (gen_force_register_use (ip_rtx));
21781 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21783 /* This add can produce multiple insns for a large constant, so we
21784 need to get tricky. */
21785 rtx_insn *last = get_last_insn ();
21787 amount = GEN_INT (offsets->saved_args + saved_regs
21788 - offsets->outgoing_args);
21790 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21791 amount));
21794 last = last ? NEXT_INSN (last) : get_insns ();
21795 RTX_FRAME_RELATED_P (last) = 1;
21797 while (last != insn);
21799 /* If the frame pointer is needed, emit a special barrier that
21800 will prevent the scheduler from moving stores to the frame
21801 before the stack adjustment. */
21802 if (frame_pointer_needed)
21803 emit_insn (gen_stack_tie (stack_pointer_rtx,
21804 hard_frame_pointer_rtx));
21808 if (frame_pointer_needed && TARGET_THUMB2)
21809 thumb_set_frame_pointer (offsets);
21811 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21813 unsigned long mask;
21815 mask = live_regs_mask;
21816 mask &= THUMB2_WORK_REGS;
21817 if (!IS_NESTED (func_type))
21818 mask |= (1 << IP_REGNUM);
21819 arm_load_pic_register (mask);
21822 /* If we are profiling, make sure no instructions are scheduled before
21823 the call to mcount. Similarly if the user has requested no
21824 scheduling in the prolog. Similarly if we want non-call exceptions
21825 using the EABI unwinder, to prevent faulting instructions from being
21826 swapped with a stack adjustment. */
21827 if (crtl->profile || !TARGET_SCHED_PROLOG
21828 || (arm_except_unwind_info (&global_options) == UI_TARGET
21829 && cfun->can_throw_non_call_exceptions))
21830 emit_insn (gen_blockage ());
21832 /* If the link register is being kept alive, with the return address in it,
21833 then make sure that it does not get reused by the ce2 pass. */
21834 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21835 cfun->machine->lr_save_eliminated = 1;
21838 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21839 static void
21840 arm_print_condition (FILE *stream)
21842 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21844 /* Branch conversion is not implemented for Thumb-2. */
21845 if (TARGET_THUMB)
21847 output_operand_lossage ("predicated Thumb instruction");
21848 return;
21850 if (current_insn_predicate != NULL)
21852 output_operand_lossage
21853 ("predicated instruction in conditional sequence");
21854 return;
21857 fputs (arm_condition_codes[arm_current_cc], stream);
21859 else if (current_insn_predicate)
21861 enum arm_cond_code code;
21863 if (TARGET_THUMB1)
21865 output_operand_lossage ("predicated Thumb instruction");
21866 return;
21869 code = get_arm_condition_code (current_insn_predicate);
21870 fputs (arm_condition_codes[code], stream);
21875 /* Globally reserved letters: acln
21876 Puncutation letters currently used: @_|?().!#
21877 Lower case letters currently used: bcdefhimpqtvwxyz
21878 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21879 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21881 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21883 If CODE is 'd', then the X is a condition operand and the instruction
21884 should only be executed if the condition is true.
21885 if CODE is 'D', then the X is a condition operand and the instruction
21886 should only be executed if the condition is false: however, if the mode
21887 of the comparison is CCFPEmode, then always execute the instruction -- we
21888 do this because in these circumstances !GE does not necessarily imply LT;
21889 in these cases the instruction pattern will take care to make sure that
21890 an instruction containing %d will follow, thereby undoing the effects of
21891 doing this instruction unconditionally.
21892 If CODE is 'N' then X is a floating point operand that must be negated
21893 before output.
21894 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21895 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21896 static void
21897 arm_print_operand (FILE *stream, rtx x, int code)
21899 switch (code)
21901 case '@':
21902 fputs (ASM_COMMENT_START, stream);
21903 return;
21905 case '_':
21906 fputs (user_label_prefix, stream);
21907 return;
21909 case '|':
21910 fputs (REGISTER_PREFIX, stream);
21911 return;
21913 case '?':
21914 arm_print_condition (stream);
21915 return;
21917 case '.':
21918 /* The current condition code for a condition code setting instruction.
21919 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21920 fputc('s', stream);
21921 arm_print_condition (stream);
21922 return;
21924 case '!':
21925 /* If the instruction is conditionally executed then print
21926 the current condition code, otherwise print 's'. */
21927 gcc_assert (TARGET_THUMB2);
21928 if (current_insn_predicate)
21929 arm_print_condition (stream);
21930 else
21931 fputc('s', stream);
21932 break;
21934 /* %# is a "break" sequence. It doesn't output anything, but is used to
21935 separate e.g. operand numbers from following text, if that text consists
21936 of further digits which we don't want to be part of the operand
21937 number. */
21938 case '#':
21939 return;
21941 case 'N':
21943 REAL_VALUE_TYPE r;
21944 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21945 fprintf (stream, "%s", fp_const_from_val (&r));
21947 return;
21949 /* An integer or symbol address without a preceding # sign. */
21950 case 'c':
21951 switch (GET_CODE (x))
21953 case CONST_INT:
21954 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21955 break;
21957 case SYMBOL_REF:
21958 output_addr_const (stream, x);
21959 break;
21961 case CONST:
21962 if (GET_CODE (XEXP (x, 0)) == PLUS
21963 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21965 output_addr_const (stream, x);
21966 break;
21968 /* Fall through. */
21970 default:
21971 output_operand_lossage ("Unsupported operand for code '%c'", code);
21973 return;
21975 /* An integer that we want to print in HEX. */
21976 case 'x':
21977 switch (GET_CODE (x))
21979 case CONST_INT:
21980 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21981 break;
21983 default:
21984 output_operand_lossage ("Unsupported operand for code '%c'", code);
21986 return;
21988 case 'B':
21989 if (CONST_INT_P (x))
21991 HOST_WIDE_INT val;
21992 val = ARM_SIGN_EXTEND (~INTVAL (x));
21993 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21995 else
21997 putc ('~', stream);
21998 output_addr_const (stream, x);
22000 return;
22002 case 'b':
22003 /* Print the log2 of a CONST_INT. */
22005 HOST_WIDE_INT val;
22007 if (!CONST_INT_P (x)
22008 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22009 output_operand_lossage ("Unsupported operand for code '%c'", code);
22010 else
22011 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22013 return;
22015 case 'L':
22016 /* The low 16 bits of an immediate constant. */
22017 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22018 return;
22020 case 'i':
22021 fprintf (stream, "%s", arithmetic_instr (x, 1));
22022 return;
22024 case 'I':
22025 fprintf (stream, "%s", arithmetic_instr (x, 0));
22026 return;
22028 case 'S':
22030 HOST_WIDE_INT val;
22031 const char *shift;
22033 shift = shift_op (x, &val);
22035 if (shift)
22037 fprintf (stream, ", %s ", shift);
22038 if (val == -1)
22039 arm_print_operand (stream, XEXP (x, 1), 0);
22040 else
22041 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22044 return;
22046 /* An explanation of the 'Q', 'R' and 'H' register operands:
22048 In a pair of registers containing a DI or DF value the 'Q'
22049 operand returns the register number of the register containing
22050 the least significant part of the value. The 'R' operand returns
22051 the register number of the register containing the most
22052 significant part of the value.
22054 The 'H' operand returns the higher of the two register numbers.
22055 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22056 same as the 'Q' operand, since the most significant part of the
22057 value is held in the lower number register. The reverse is true
22058 on systems where WORDS_BIG_ENDIAN is false.
22060 The purpose of these operands is to distinguish between cases
22061 where the endian-ness of the values is important (for example
22062 when they are added together), and cases where the endian-ness
22063 is irrelevant, but the order of register operations is important.
22064 For example when loading a value from memory into a register
22065 pair, the endian-ness does not matter. Provided that the value
22066 from the lower memory address is put into the lower numbered
22067 register, and the value from the higher address is put into the
22068 higher numbered register, the load will work regardless of whether
22069 the value being loaded is big-wordian or little-wordian. The
22070 order of the two register loads can matter however, if the address
22071 of the memory location is actually held in one of the registers
22072 being overwritten by the load.
22074 The 'Q' and 'R' constraints are also available for 64-bit
22075 constants. */
22076 case 'Q':
22077 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22079 rtx part = gen_lowpart (SImode, x);
22080 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22081 return;
22084 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22086 output_operand_lossage ("invalid operand for code '%c'", code);
22087 return;
22090 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22091 return;
22093 case 'R':
22094 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22096 machine_mode mode = GET_MODE (x);
22097 rtx part;
22099 if (mode == VOIDmode)
22100 mode = DImode;
22101 part = gen_highpart_mode (SImode, mode, x);
22102 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22103 return;
22106 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22108 output_operand_lossage ("invalid operand for code '%c'", code);
22109 return;
22112 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22113 return;
22115 case 'H':
22116 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22118 output_operand_lossage ("invalid operand for code '%c'", code);
22119 return;
22122 asm_fprintf (stream, "%r", REGNO (x) + 1);
22123 return;
22125 case 'J':
22126 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22128 output_operand_lossage ("invalid operand for code '%c'", code);
22129 return;
22132 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22133 return;
22135 case 'K':
22136 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22138 output_operand_lossage ("invalid operand for code '%c'", code);
22139 return;
22142 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22143 return;
22145 case 'm':
22146 asm_fprintf (stream, "%r",
22147 REG_P (XEXP (x, 0))
22148 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22149 return;
22151 case 'M':
22152 asm_fprintf (stream, "{%r-%r}",
22153 REGNO (x),
22154 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22155 return;
22157 /* Like 'M', but writing doubleword vector registers, for use by Neon
22158 insns. */
22159 case 'h':
22161 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22162 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22163 if (numregs == 1)
22164 asm_fprintf (stream, "{d%d}", regno);
22165 else
22166 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22168 return;
22170 case 'd':
22171 /* CONST_TRUE_RTX means always -- that's the default. */
22172 if (x == const_true_rtx)
22173 return;
22175 if (!COMPARISON_P (x))
22177 output_operand_lossage ("invalid operand for code '%c'", code);
22178 return;
22181 fputs (arm_condition_codes[get_arm_condition_code (x)],
22182 stream);
22183 return;
22185 case 'D':
22186 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22187 want to do that. */
22188 if (x == const_true_rtx)
22190 output_operand_lossage ("instruction never executed");
22191 return;
22193 if (!COMPARISON_P (x))
22195 output_operand_lossage ("invalid operand for code '%c'", code);
22196 return;
22199 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22200 (get_arm_condition_code (x))],
22201 stream);
22202 return;
22204 case 's':
22205 case 'V':
22206 case 'W':
22207 case 'X':
22208 case 'Y':
22209 case 'Z':
22210 /* Former Maverick support, removed after GCC-4.7. */
22211 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22212 return;
22214 case 'U':
22215 if (!REG_P (x)
22216 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22217 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22218 /* Bad value for wCG register number. */
22220 output_operand_lossage ("invalid operand for code '%c'", code);
22221 return;
22224 else
22225 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22226 return;
22228 /* Print an iWMMXt control register name. */
22229 case 'w':
22230 if (!CONST_INT_P (x)
22231 || INTVAL (x) < 0
22232 || INTVAL (x) >= 16)
22233 /* Bad value for wC register number. */
22235 output_operand_lossage ("invalid operand for code '%c'", code);
22236 return;
22239 else
22241 static const char * wc_reg_names [16] =
22243 "wCID", "wCon", "wCSSF", "wCASF",
22244 "wC4", "wC5", "wC6", "wC7",
22245 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22246 "wC12", "wC13", "wC14", "wC15"
22249 fputs (wc_reg_names [INTVAL (x)], stream);
22251 return;
22253 /* Print the high single-precision register of a VFP double-precision
22254 register. */
22255 case 'p':
22257 machine_mode mode = GET_MODE (x);
22258 int regno;
22260 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22262 output_operand_lossage ("invalid operand for code '%c'", code);
22263 return;
22266 regno = REGNO (x);
22267 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22269 output_operand_lossage ("invalid operand for code '%c'", code);
22270 return;
22273 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22275 return;
22277 /* Print a VFP/Neon double precision or quad precision register name. */
22278 case 'P':
22279 case 'q':
22281 machine_mode mode = GET_MODE (x);
22282 int is_quad = (code == 'q');
22283 int regno;
22285 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22287 output_operand_lossage ("invalid operand for code '%c'", code);
22288 return;
22291 if (!REG_P (x)
22292 || !IS_VFP_REGNUM (REGNO (x)))
22294 output_operand_lossage ("invalid operand for code '%c'", code);
22295 return;
22298 regno = REGNO (x);
22299 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22300 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22302 output_operand_lossage ("invalid operand for code '%c'", code);
22303 return;
22306 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22307 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22309 return;
22311 /* These two codes print the low/high doubleword register of a Neon quad
22312 register, respectively. For pair-structure types, can also print
22313 low/high quadword registers. */
22314 case 'e':
22315 case 'f':
22317 machine_mode mode = GET_MODE (x);
22318 int regno;
22320 if ((GET_MODE_SIZE (mode) != 16
22321 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22323 output_operand_lossage ("invalid operand for code '%c'", code);
22324 return;
22327 regno = REGNO (x);
22328 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22330 output_operand_lossage ("invalid operand for code '%c'", code);
22331 return;
22334 if (GET_MODE_SIZE (mode) == 16)
22335 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22336 + (code == 'f' ? 1 : 0));
22337 else
22338 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22339 + (code == 'f' ? 1 : 0));
22341 return;
22343 /* Print a VFPv3 floating-point constant, represented as an integer
22344 index. */
22345 case 'G':
22347 int index = vfp3_const_double_index (x);
22348 gcc_assert (index != -1);
22349 fprintf (stream, "%d", index);
22351 return;
22353 /* Print bits representing opcode features for Neon.
22355 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22356 and polynomials as unsigned.
22358 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22360 Bit 2 is 1 for rounding functions, 0 otherwise. */
22362 /* Identify the type as 's', 'u', 'p' or 'f'. */
22363 case 'T':
22365 HOST_WIDE_INT bits = INTVAL (x);
22366 fputc ("uspf"[bits & 3], stream);
22368 return;
22370 /* Likewise, but signed and unsigned integers are both 'i'. */
22371 case 'F':
22373 HOST_WIDE_INT bits = INTVAL (x);
22374 fputc ("iipf"[bits & 3], stream);
22376 return;
22378 /* As for 'T', but emit 'u' instead of 'p'. */
22379 case 't':
22381 HOST_WIDE_INT bits = INTVAL (x);
22382 fputc ("usuf"[bits & 3], stream);
22384 return;
22386 /* Bit 2: rounding (vs none). */
22387 case 'O':
22389 HOST_WIDE_INT bits = INTVAL (x);
22390 fputs ((bits & 4) != 0 ? "r" : "", stream);
22392 return;
22394 /* Memory operand for vld1/vst1 instruction. */
22395 case 'A':
22397 rtx addr;
22398 bool postinc = FALSE;
22399 rtx postinc_reg = NULL;
22400 unsigned align, memsize, align_bits;
22402 gcc_assert (MEM_P (x));
22403 addr = XEXP (x, 0);
22404 if (GET_CODE (addr) == POST_INC)
22406 postinc = 1;
22407 addr = XEXP (addr, 0);
22409 if (GET_CODE (addr) == POST_MODIFY)
22411 postinc_reg = XEXP( XEXP (addr, 1), 1);
22412 addr = XEXP (addr, 0);
22414 asm_fprintf (stream, "[%r", REGNO (addr));
22416 /* We know the alignment of this access, so we can emit a hint in the
22417 instruction (for some alignments) as an aid to the memory subsystem
22418 of the target. */
22419 align = MEM_ALIGN (x) >> 3;
22420 memsize = MEM_SIZE (x);
22422 /* Only certain alignment specifiers are supported by the hardware. */
22423 if (memsize == 32 && (align % 32) == 0)
22424 align_bits = 256;
22425 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22426 align_bits = 128;
22427 else if (memsize >= 8 && (align % 8) == 0)
22428 align_bits = 64;
22429 else
22430 align_bits = 0;
22432 if (align_bits != 0)
22433 asm_fprintf (stream, ":%d", align_bits);
22435 asm_fprintf (stream, "]");
22437 if (postinc)
22438 fputs("!", stream);
22439 if (postinc_reg)
22440 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22442 return;
22444 case 'C':
22446 rtx addr;
22448 gcc_assert (MEM_P (x));
22449 addr = XEXP (x, 0);
22450 gcc_assert (REG_P (addr));
22451 asm_fprintf (stream, "[%r]", REGNO (addr));
22453 return;
22455 /* Translate an S register number into a D register number and element index. */
22456 case 'y':
22458 machine_mode mode = GET_MODE (x);
22459 int regno;
22461 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22463 output_operand_lossage ("invalid operand for code '%c'", code);
22464 return;
22467 regno = REGNO (x);
22468 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22470 output_operand_lossage ("invalid operand for code '%c'", code);
22471 return;
22474 regno = regno - FIRST_VFP_REGNUM;
22475 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22477 return;
22479 case 'v':
22480 gcc_assert (CONST_DOUBLE_P (x));
22481 int result;
22482 result = vfp3_const_double_for_fract_bits (x);
22483 if (result == 0)
22484 result = vfp3_const_double_for_bits (x);
22485 fprintf (stream, "#%d", result);
22486 return;
22488 /* Register specifier for vld1.16/vst1.16. Translate the S register
22489 number into a D register number and element index. */
22490 case 'z':
22492 machine_mode mode = GET_MODE (x);
22493 int regno;
22495 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22497 output_operand_lossage ("invalid operand for code '%c'", code);
22498 return;
22501 regno = REGNO (x);
22502 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22504 output_operand_lossage ("invalid operand for code '%c'", code);
22505 return;
22508 regno = regno - FIRST_VFP_REGNUM;
22509 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22511 return;
22513 default:
22514 if (x == 0)
22516 output_operand_lossage ("missing operand");
22517 return;
22520 switch (GET_CODE (x))
22522 case REG:
22523 asm_fprintf (stream, "%r", REGNO (x));
22524 break;
22526 case MEM:
22527 output_address (GET_MODE (x), XEXP (x, 0));
22528 break;
22530 case CONST_DOUBLE:
22532 char fpstr[20];
22533 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22534 sizeof (fpstr), 0, 1);
22535 fprintf (stream, "#%s", fpstr);
22537 break;
22539 default:
22540 gcc_assert (GET_CODE (x) != NEG);
22541 fputc ('#', stream);
22542 if (GET_CODE (x) == HIGH)
22544 fputs (":lower16:", stream);
22545 x = XEXP (x, 0);
22548 output_addr_const (stream, x);
22549 break;
22554 /* Target hook for printing a memory address. */
22555 static void
22556 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22558 if (TARGET_32BIT)
22560 int is_minus = GET_CODE (x) == MINUS;
22562 if (REG_P (x))
22563 asm_fprintf (stream, "[%r]", REGNO (x));
22564 else if (GET_CODE (x) == PLUS || is_minus)
22566 rtx base = XEXP (x, 0);
22567 rtx index = XEXP (x, 1);
22568 HOST_WIDE_INT offset = 0;
22569 if (!REG_P (base)
22570 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22572 /* Ensure that BASE is a register. */
22573 /* (one of them must be). */
22574 /* Also ensure the SP is not used as in index register. */
22575 std::swap (base, index);
22577 switch (GET_CODE (index))
22579 case CONST_INT:
22580 offset = INTVAL (index);
22581 if (is_minus)
22582 offset = -offset;
22583 asm_fprintf (stream, "[%r, #%wd]",
22584 REGNO (base), offset);
22585 break;
22587 case REG:
22588 asm_fprintf (stream, "[%r, %s%r]",
22589 REGNO (base), is_minus ? "-" : "",
22590 REGNO (index));
22591 break;
22593 case MULT:
22594 case ASHIFTRT:
22595 case LSHIFTRT:
22596 case ASHIFT:
22597 case ROTATERT:
22599 asm_fprintf (stream, "[%r, %s%r",
22600 REGNO (base), is_minus ? "-" : "",
22601 REGNO (XEXP (index, 0)));
22602 arm_print_operand (stream, index, 'S');
22603 fputs ("]", stream);
22604 break;
22607 default:
22608 gcc_unreachable ();
22611 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22612 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22614 gcc_assert (REG_P (XEXP (x, 0)));
22616 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22617 asm_fprintf (stream, "[%r, #%s%d]!",
22618 REGNO (XEXP (x, 0)),
22619 GET_CODE (x) == PRE_DEC ? "-" : "",
22620 GET_MODE_SIZE (mode));
22621 else
22622 asm_fprintf (stream, "[%r], #%s%d",
22623 REGNO (XEXP (x, 0)),
22624 GET_CODE (x) == POST_DEC ? "-" : "",
22625 GET_MODE_SIZE (mode));
22627 else if (GET_CODE (x) == PRE_MODIFY)
22629 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22630 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22631 asm_fprintf (stream, "#%wd]!",
22632 INTVAL (XEXP (XEXP (x, 1), 1)));
22633 else
22634 asm_fprintf (stream, "%r]!",
22635 REGNO (XEXP (XEXP (x, 1), 1)));
22637 else if (GET_CODE (x) == POST_MODIFY)
22639 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22640 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22641 asm_fprintf (stream, "#%wd",
22642 INTVAL (XEXP (XEXP (x, 1), 1)));
22643 else
22644 asm_fprintf (stream, "%r",
22645 REGNO (XEXP (XEXP (x, 1), 1)));
22647 else output_addr_const (stream, x);
22649 else
22651 if (REG_P (x))
22652 asm_fprintf (stream, "[%r]", REGNO (x));
22653 else if (GET_CODE (x) == POST_INC)
22654 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22655 else if (GET_CODE (x) == PLUS)
22657 gcc_assert (REG_P (XEXP (x, 0)));
22658 if (CONST_INT_P (XEXP (x, 1)))
22659 asm_fprintf (stream, "[%r, #%wd]",
22660 REGNO (XEXP (x, 0)),
22661 INTVAL (XEXP (x, 1)));
22662 else
22663 asm_fprintf (stream, "[%r, %r]",
22664 REGNO (XEXP (x, 0)),
22665 REGNO (XEXP (x, 1)));
22667 else
22668 output_addr_const (stream, x);
22672 /* Target hook for indicating whether a punctuation character for
22673 TARGET_PRINT_OPERAND is valid. */
22674 static bool
22675 arm_print_operand_punct_valid_p (unsigned char code)
22677 return (code == '@' || code == '|' || code == '.'
22678 || code == '(' || code == ')' || code == '#'
22679 || (TARGET_32BIT && (code == '?'))
22680 || (TARGET_THUMB2 && (code == '!'))
22681 || (TARGET_THUMB && (code == '_')));
22684 /* Target hook for assembling integer objects. The ARM version needs to
22685 handle word-sized values specially. */
22686 static bool
22687 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22689 machine_mode mode;
22691 if (size == UNITS_PER_WORD && aligned_p)
22693 fputs ("\t.word\t", asm_out_file);
22694 output_addr_const (asm_out_file, x);
22696 /* Mark symbols as position independent. We only do this in the
22697 .text segment, not in the .data segment. */
22698 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22699 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22701 /* See legitimize_pic_address for an explanation of the
22702 TARGET_VXWORKS_RTP check. */
22703 /* References to weak symbols cannot be resolved locally:
22704 they may be overridden by a non-weak definition at link
22705 time. */
22706 if (!arm_pic_data_is_text_relative
22707 || (GET_CODE (x) == SYMBOL_REF
22708 && (!SYMBOL_REF_LOCAL_P (x)
22709 || (SYMBOL_REF_DECL (x)
22710 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22711 fputs ("(GOT)", asm_out_file);
22712 else
22713 fputs ("(GOTOFF)", asm_out_file);
22715 fputc ('\n', asm_out_file);
22716 return true;
22719 mode = GET_MODE (x);
22721 if (arm_vector_mode_supported_p (mode))
22723 int i, units;
22725 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22727 units = CONST_VECTOR_NUNITS (x);
22728 size = GET_MODE_UNIT_SIZE (mode);
22730 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22731 for (i = 0; i < units; i++)
22733 rtx elt = CONST_VECTOR_ELT (x, i);
22734 assemble_integer
22735 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22737 else
22738 for (i = 0; i < units; i++)
22740 rtx elt = CONST_VECTOR_ELT (x, i);
22741 assemble_real
22742 (*CONST_DOUBLE_REAL_VALUE (elt),
22743 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22744 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22747 return true;
22750 return default_assemble_integer (x, size, aligned_p);
22753 static void
22754 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22756 section *s;
22758 if (!TARGET_AAPCS_BASED)
22760 (is_ctor ?
22761 default_named_section_asm_out_constructor
22762 : default_named_section_asm_out_destructor) (symbol, priority);
22763 return;
22766 /* Put these in the .init_array section, using a special relocation. */
22767 if (priority != DEFAULT_INIT_PRIORITY)
22769 char buf[18];
22770 sprintf (buf, "%s.%.5u",
22771 is_ctor ? ".init_array" : ".fini_array",
22772 priority);
22773 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22775 else if (is_ctor)
22776 s = ctors_section;
22777 else
22778 s = dtors_section;
22780 switch_to_section (s);
22781 assemble_align (POINTER_SIZE);
22782 fputs ("\t.word\t", asm_out_file);
22783 output_addr_const (asm_out_file, symbol);
22784 fputs ("(target1)\n", asm_out_file);
22787 /* Add a function to the list of static constructors. */
22789 static void
22790 arm_elf_asm_constructor (rtx symbol, int priority)
22792 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22795 /* Add a function to the list of static destructors. */
22797 static void
22798 arm_elf_asm_destructor (rtx symbol, int priority)
22800 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22803 /* A finite state machine takes care of noticing whether or not instructions
22804 can be conditionally executed, and thus decrease execution time and code
22805 size by deleting branch instructions. The fsm is controlled by
22806 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22808 /* The state of the fsm controlling condition codes are:
22809 0: normal, do nothing special
22810 1: make ASM_OUTPUT_OPCODE not output this instruction
22811 2: make ASM_OUTPUT_OPCODE not output this instruction
22812 3: make instructions conditional
22813 4: make instructions conditional
22815 State transitions (state->state by whom under condition):
22816 0 -> 1 final_prescan_insn if the `target' is a label
22817 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22818 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22819 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22820 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22821 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22822 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22823 (the target insn is arm_target_insn).
22825 If the jump clobbers the conditions then we use states 2 and 4.
22827 A similar thing can be done with conditional return insns.
22829 XXX In case the `target' is an unconditional branch, this conditionalising
22830 of the instructions always reduces code size, but not always execution
22831 time. But then, I want to reduce the code size to somewhere near what
22832 /bin/cc produces. */
22834 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22835 instructions. When a COND_EXEC instruction is seen the subsequent
22836 instructions are scanned so that multiple conditional instructions can be
22837 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22838 specify the length and true/false mask for the IT block. These will be
22839 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22841 /* Returns the index of the ARM condition code string in
22842 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22843 COMPARISON should be an rtx like `(eq (...) (...))'. */
22845 enum arm_cond_code
22846 maybe_get_arm_condition_code (rtx comparison)
22848 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22849 enum arm_cond_code code;
22850 enum rtx_code comp_code = GET_CODE (comparison);
22852 if (GET_MODE_CLASS (mode) != MODE_CC)
22853 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22854 XEXP (comparison, 1));
22856 switch (mode)
22858 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22859 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22860 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22861 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22862 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22863 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22864 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22865 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22866 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22867 case E_CC_DLTUmode: code = ARM_CC;
22869 dominance:
22870 if (comp_code == EQ)
22871 return ARM_INVERSE_CONDITION_CODE (code);
22872 if (comp_code == NE)
22873 return code;
22874 return ARM_NV;
22876 case E_CC_NOOVmode:
22877 switch (comp_code)
22879 case NE: return ARM_NE;
22880 case EQ: return ARM_EQ;
22881 case GE: return ARM_PL;
22882 case LT: return ARM_MI;
22883 default: return ARM_NV;
22886 case E_CC_Zmode:
22887 switch (comp_code)
22889 case NE: return ARM_NE;
22890 case EQ: return ARM_EQ;
22891 default: return ARM_NV;
22894 case E_CC_Nmode:
22895 switch (comp_code)
22897 case NE: return ARM_MI;
22898 case EQ: return ARM_PL;
22899 default: return ARM_NV;
22902 case E_CCFPEmode:
22903 case E_CCFPmode:
22904 /* We can handle all cases except UNEQ and LTGT. */
22905 switch (comp_code)
22907 case GE: return ARM_GE;
22908 case GT: return ARM_GT;
22909 case LE: return ARM_LS;
22910 case LT: return ARM_MI;
22911 case NE: return ARM_NE;
22912 case EQ: return ARM_EQ;
22913 case ORDERED: return ARM_VC;
22914 case UNORDERED: return ARM_VS;
22915 case UNLT: return ARM_LT;
22916 case UNLE: return ARM_LE;
22917 case UNGT: return ARM_HI;
22918 case UNGE: return ARM_PL;
22919 /* UNEQ and LTGT do not have a representation. */
22920 case UNEQ: /* Fall through. */
22921 case LTGT: /* Fall through. */
22922 default: return ARM_NV;
22925 case E_CC_SWPmode:
22926 switch (comp_code)
22928 case NE: return ARM_NE;
22929 case EQ: return ARM_EQ;
22930 case GE: return ARM_LE;
22931 case GT: return ARM_LT;
22932 case LE: return ARM_GE;
22933 case LT: return ARM_GT;
22934 case GEU: return ARM_LS;
22935 case GTU: return ARM_CC;
22936 case LEU: return ARM_CS;
22937 case LTU: return ARM_HI;
22938 default: return ARM_NV;
22941 case E_CC_Cmode:
22942 switch (comp_code)
22944 case LTU: return ARM_CS;
22945 case GEU: return ARM_CC;
22946 case NE: return ARM_CS;
22947 case EQ: return ARM_CC;
22948 default: return ARM_NV;
22951 case E_CC_CZmode:
22952 switch (comp_code)
22954 case NE: return ARM_NE;
22955 case EQ: return ARM_EQ;
22956 case GEU: return ARM_CS;
22957 case GTU: return ARM_HI;
22958 case LEU: return ARM_LS;
22959 case LTU: return ARM_CC;
22960 default: return ARM_NV;
22963 case E_CC_NCVmode:
22964 switch (comp_code)
22966 case GE: return ARM_GE;
22967 case LT: return ARM_LT;
22968 case GEU: return ARM_CS;
22969 case LTU: return ARM_CC;
22970 default: return ARM_NV;
22973 case E_CC_Vmode:
22974 switch (comp_code)
22976 case NE: return ARM_VS;
22977 case EQ: return ARM_VC;
22978 default: return ARM_NV;
22981 case E_CCmode:
22982 switch (comp_code)
22984 case NE: return ARM_NE;
22985 case EQ: return ARM_EQ;
22986 case GE: return ARM_GE;
22987 case GT: return ARM_GT;
22988 case LE: return ARM_LE;
22989 case LT: return ARM_LT;
22990 case GEU: return ARM_CS;
22991 case GTU: return ARM_HI;
22992 case LEU: return ARM_LS;
22993 case LTU: return ARM_CC;
22994 default: return ARM_NV;
22997 default: gcc_unreachable ();
23001 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23002 static enum arm_cond_code
23003 get_arm_condition_code (rtx comparison)
23005 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23006 gcc_assert (code != ARM_NV);
23007 return code;
23010 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23011 code registers when not targetting Thumb1. The VFP condition register
23012 only exists when generating hard-float code. */
23013 static bool
23014 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23016 if (!TARGET_32BIT)
23017 return false;
23019 *p1 = CC_REGNUM;
23020 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23021 return true;
23024 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23025 instructions. */
23026 void
23027 thumb2_final_prescan_insn (rtx_insn *insn)
23029 rtx_insn *first_insn = insn;
23030 rtx body = PATTERN (insn);
23031 rtx predicate;
23032 enum arm_cond_code code;
23033 int n;
23034 int mask;
23035 int max;
23037 /* max_insns_skipped in the tune was already taken into account in the
23038 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23039 just emit the IT blocks as we can. It does not make sense to split
23040 the IT blocks. */
23041 max = MAX_INSN_PER_IT_BLOCK;
23043 /* Remove the previous insn from the count of insns to be output. */
23044 if (arm_condexec_count)
23045 arm_condexec_count--;
23047 /* Nothing to do if we are already inside a conditional block. */
23048 if (arm_condexec_count)
23049 return;
23051 if (GET_CODE (body) != COND_EXEC)
23052 return;
23054 /* Conditional jumps are implemented directly. */
23055 if (JUMP_P (insn))
23056 return;
23058 predicate = COND_EXEC_TEST (body);
23059 arm_current_cc = get_arm_condition_code (predicate);
23061 n = get_attr_ce_count (insn);
23062 arm_condexec_count = 1;
23063 arm_condexec_mask = (1 << n) - 1;
23064 arm_condexec_masklen = n;
23065 /* See if subsequent instructions can be combined into the same block. */
23066 for (;;)
23068 insn = next_nonnote_insn (insn);
23070 /* Jumping into the middle of an IT block is illegal, so a label or
23071 barrier terminates the block. */
23072 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23073 break;
23075 body = PATTERN (insn);
23076 /* USE and CLOBBER aren't really insns, so just skip them. */
23077 if (GET_CODE (body) == USE
23078 || GET_CODE (body) == CLOBBER)
23079 continue;
23081 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23082 if (GET_CODE (body) != COND_EXEC)
23083 break;
23084 /* Maximum number of conditionally executed instructions in a block. */
23085 n = get_attr_ce_count (insn);
23086 if (arm_condexec_masklen + n > max)
23087 break;
23089 predicate = COND_EXEC_TEST (body);
23090 code = get_arm_condition_code (predicate);
23091 mask = (1 << n) - 1;
23092 if (arm_current_cc == code)
23093 arm_condexec_mask |= (mask << arm_condexec_masklen);
23094 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23095 break;
23097 arm_condexec_count++;
23098 arm_condexec_masklen += n;
23100 /* A jump must be the last instruction in a conditional block. */
23101 if (JUMP_P (insn))
23102 break;
23104 /* Restore recog_data (getting the attributes of other insns can
23105 destroy this array, but final.c assumes that it remains intact
23106 across this call). */
23107 extract_constrain_insn_cached (first_insn);
23110 void
23111 arm_final_prescan_insn (rtx_insn *insn)
23113 /* BODY will hold the body of INSN. */
23114 rtx body = PATTERN (insn);
23116 /* This will be 1 if trying to repeat the trick, and things need to be
23117 reversed if it appears to fail. */
23118 int reverse = 0;
23120 /* If we start with a return insn, we only succeed if we find another one. */
23121 int seeking_return = 0;
23122 enum rtx_code return_code = UNKNOWN;
23124 /* START_INSN will hold the insn from where we start looking. This is the
23125 first insn after the following code_label if REVERSE is true. */
23126 rtx_insn *start_insn = insn;
23128 /* If in state 4, check if the target branch is reached, in order to
23129 change back to state 0. */
23130 if (arm_ccfsm_state == 4)
23132 if (insn == arm_target_insn)
23134 arm_target_insn = NULL;
23135 arm_ccfsm_state = 0;
23137 return;
23140 /* If in state 3, it is possible to repeat the trick, if this insn is an
23141 unconditional branch to a label, and immediately following this branch
23142 is the previous target label which is only used once, and the label this
23143 branch jumps to is not too far off. */
23144 if (arm_ccfsm_state == 3)
23146 if (simplejump_p (insn))
23148 start_insn = next_nonnote_insn (start_insn);
23149 if (BARRIER_P (start_insn))
23151 /* XXX Isn't this always a barrier? */
23152 start_insn = next_nonnote_insn (start_insn);
23154 if (LABEL_P (start_insn)
23155 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23156 && LABEL_NUSES (start_insn) == 1)
23157 reverse = TRUE;
23158 else
23159 return;
23161 else if (ANY_RETURN_P (body))
23163 start_insn = next_nonnote_insn (start_insn);
23164 if (BARRIER_P (start_insn))
23165 start_insn = next_nonnote_insn (start_insn);
23166 if (LABEL_P (start_insn)
23167 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23168 && LABEL_NUSES (start_insn) == 1)
23170 reverse = TRUE;
23171 seeking_return = 1;
23172 return_code = GET_CODE (body);
23174 else
23175 return;
23177 else
23178 return;
23181 gcc_assert (!arm_ccfsm_state || reverse);
23182 if (!JUMP_P (insn))
23183 return;
23185 /* This jump might be paralleled with a clobber of the condition codes
23186 the jump should always come first */
23187 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23188 body = XVECEXP (body, 0, 0);
23190 if (reverse
23191 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23192 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23194 int insns_skipped;
23195 int fail = FALSE, succeed = FALSE;
23196 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23197 int then_not_else = TRUE;
23198 rtx_insn *this_insn = start_insn;
23199 rtx label = 0;
23201 /* Register the insn jumped to. */
23202 if (reverse)
23204 if (!seeking_return)
23205 label = XEXP (SET_SRC (body), 0);
23207 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23208 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23209 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23211 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23212 then_not_else = FALSE;
23214 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23216 seeking_return = 1;
23217 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23219 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23221 seeking_return = 1;
23222 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23223 then_not_else = FALSE;
23225 else
23226 gcc_unreachable ();
23228 /* See how many insns this branch skips, and what kind of insns. If all
23229 insns are okay, and the label or unconditional branch to the same
23230 label is not too far away, succeed. */
23231 for (insns_skipped = 0;
23232 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23234 rtx scanbody;
23236 this_insn = next_nonnote_insn (this_insn);
23237 if (!this_insn)
23238 break;
23240 switch (GET_CODE (this_insn))
23242 case CODE_LABEL:
23243 /* Succeed if it is the target label, otherwise fail since
23244 control falls in from somewhere else. */
23245 if (this_insn == label)
23247 arm_ccfsm_state = 1;
23248 succeed = TRUE;
23250 else
23251 fail = TRUE;
23252 break;
23254 case BARRIER:
23255 /* Succeed if the following insn is the target label.
23256 Otherwise fail.
23257 If return insns are used then the last insn in a function
23258 will be a barrier. */
23259 this_insn = next_nonnote_insn (this_insn);
23260 if (this_insn && this_insn == label)
23262 arm_ccfsm_state = 1;
23263 succeed = TRUE;
23265 else
23266 fail = TRUE;
23267 break;
23269 case CALL_INSN:
23270 /* The AAPCS says that conditional calls should not be
23271 used since they make interworking inefficient (the
23272 linker can't transform BL<cond> into BLX). That's
23273 only a problem if the machine has BLX. */
23274 if (arm_arch5)
23276 fail = TRUE;
23277 break;
23280 /* Succeed if the following insn is the target label, or
23281 if the following two insns are a barrier and the
23282 target label. */
23283 this_insn = next_nonnote_insn (this_insn);
23284 if (this_insn && BARRIER_P (this_insn))
23285 this_insn = next_nonnote_insn (this_insn);
23287 if (this_insn && this_insn == label
23288 && insns_skipped < max_insns_skipped)
23290 arm_ccfsm_state = 1;
23291 succeed = TRUE;
23293 else
23294 fail = TRUE;
23295 break;
23297 case JUMP_INSN:
23298 /* If this is an unconditional branch to the same label, succeed.
23299 If it is to another label, do nothing. If it is conditional,
23300 fail. */
23301 /* XXX Probably, the tests for SET and the PC are
23302 unnecessary. */
23304 scanbody = PATTERN (this_insn);
23305 if (GET_CODE (scanbody) == SET
23306 && GET_CODE (SET_DEST (scanbody)) == PC)
23308 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23309 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23311 arm_ccfsm_state = 2;
23312 succeed = TRUE;
23314 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23315 fail = TRUE;
23317 /* Fail if a conditional return is undesirable (e.g. on a
23318 StrongARM), but still allow this if optimizing for size. */
23319 else if (GET_CODE (scanbody) == return_code
23320 && !use_return_insn (TRUE, NULL)
23321 && !optimize_size)
23322 fail = TRUE;
23323 else if (GET_CODE (scanbody) == return_code)
23325 arm_ccfsm_state = 2;
23326 succeed = TRUE;
23328 else if (GET_CODE (scanbody) == PARALLEL)
23330 switch (get_attr_conds (this_insn))
23332 case CONDS_NOCOND:
23333 break;
23334 default:
23335 fail = TRUE;
23336 break;
23339 else
23340 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23342 break;
23344 case INSN:
23345 /* Instructions using or affecting the condition codes make it
23346 fail. */
23347 scanbody = PATTERN (this_insn);
23348 if (!(GET_CODE (scanbody) == SET
23349 || GET_CODE (scanbody) == PARALLEL)
23350 || get_attr_conds (this_insn) != CONDS_NOCOND)
23351 fail = TRUE;
23352 break;
23354 default:
23355 break;
23358 if (succeed)
23360 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23361 arm_target_label = CODE_LABEL_NUMBER (label);
23362 else
23364 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23366 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23368 this_insn = next_nonnote_insn (this_insn);
23369 gcc_assert (!this_insn
23370 || (!BARRIER_P (this_insn)
23371 && !LABEL_P (this_insn)));
23373 if (!this_insn)
23375 /* Oh, dear! we ran off the end.. give up. */
23376 extract_constrain_insn_cached (insn);
23377 arm_ccfsm_state = 0;
23378 arm_target_insn = NULL;
23379 return;
23381 arm_target_insn = this_insn;
23384 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23385 what it was. */
23386 if (!reverse)
23387 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23389 if (reverse || then_not_else)
23390 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23393 /* Restore recog_data (getting the attributes of other insns can
23394 destroy this array, but final.c assumes that it remains intact
23395 across this call. */
23396 extract_constrain_insn_cached (insn);
23400 /* Output IT instructions. */
23401 void
23402 thumb2_asm_output_opcode (FILE * stream)
23404 char buff[5];
23405 int n;
23407 if (arm_condexec_mask)
23409 for (n = 0; n < arm_condexec_masklen; n++)
23410 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23411 buff[n] = 0;
23412 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23413 arm_condition_codes[arm_current_cc]);
23414 arm_condexec_mask = 0;
23418 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23419 UNITS_PER_WORD bytes wide. */
23420 static unsigned int
23421 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23423 if (TARGET_32BIT
23424 && regno > PC_REGNUM
23425 && regno != FRAME_POINTER_REGNUM
23426 && regno != ARG_POINTER_REGNUM
23427 && !IS_VFP_REGNUM (regno))
23428 return 1;
23430 return ARM_NUM_REGS (mode);
23433 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23434 static bool
23435 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23437 if (GET_MODE_CLASS (mode) == MODE_CC)
23438 return (regno == CC_REGNUM
23439 || (TARGET_HARD_FLOAT
23440 && regno == VFPCC_REGNUM));
23442 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23443 return false;
23445 if (TARGET_THUMB1)
23446 /* For the Thumb we only allow values bigger than SImode in
23447 registers 0 - 6, so that there is always a second low
23448 register available to hold the upper part of the value.
23449 We probably we ought to ensure that the register is the
23450 start of an even numbered register pair. */
23451 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23453 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23455 if (mode == SFmode || mode == SImode)
23456 return VFP_REGNO_OK_FOR_SINGLE (regno);
23458 if (mode == DFmode)
23459 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23461 if (mode == HFmode)
23462 return VFP_REGNO_OK_FOR_SINGLE (regno);
23464 /* VFP registers can hold HImode values. */
23465 if (mode == HImode)
23466 return VFP_REGNO_OK_FOR_SINGLE (regno);
23468 if (TARGET_NEON)
23469 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23470 || (VALID_NEON_QREG_MODE (mode)
23471 && NEON_REGNO_OK_FOR_QUAD (regno))
23472 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23473 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23474 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23475 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23476 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23478 return false;
23481 if (TARGET_REALLY_IWMMXT)
23483 if (IS_IWMMXT_GR_REGNUM (regno))
23484 return mode == SImode;
23486 if (IS_IWMMXT_REGNUM (regno))
23487 return VALID_IWMMXT_REG_MODE (mode);
23490 /* We allow almost any value to be stored in the general registers.
23491 Restrict doubleword quantities to even register pairs in ARM state
23492 so that we can use ldrd. Do not allow very large Neon structure
23493 opaque modes in general registers; they would use too many. */
23494 if (regno <= LAST_ARM_REGNUM)
23496 if (ARM_NUM_REGS (mode) > 4)
23497 return false;
23499 if (TARGET_THUMB2)
23500 return true;
23502 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23505 if (regno == FRAME_POINTER_REGNUM
23506 || regno == ARG_POINTER_REGNUM)
23507 /* We only allow integers in the fake hard registers. */
23508 return GET_MODE_CLASS (mode) == MODE_INT;
23510 return false;
23513 /* Implement TARGET_MODES_TIEABLE_P. */
23515 static bool
23516 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23518 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23519 return true;
23521 /* We specifically want to allow elements of "structure" modes to
23522 be tieable to the structure. This more general condition allows
23523 other rarer situations too. */
23524 if (TARGET_NEON
23525 && (VALID_NEON_DREG_MODE (mode1)
23526 || VALID_NEON_QREG_MODE (mode1)
23527 || VALID_NEON_STRUCT_MODE (mode1))
23528 && (VALID_NEON_DREG_MODE (mode2)
23529 || VALID_NEON_QREG_MODE (mode2)
23530 || VALID_NEON_STRUCT_MODE (mode2)))
23531 return true;
23533 return false;
23536 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23537 not used in arm mode. */
23539 enum reg_class
23540 arm_regno_class (int regno)
23542 if (regno == PC_REGNUM)
23543 return NO_REGS;
23545 if (TARGET_THUMB1)
23547 if (regno == STACK_POINTER_REGNUM)
23548 return STACK_REG;
23549 if (regno == CC_REGNUM)
23550 return CC_REG;
23551 if (regno < 8)
23552 return LO_REGS;
23553 return HI_REGS;
23556 if (TARGET_THUMB2 && regno < 8)
23557 return LO_REGS;
23559 if ( regno <= LAST_ARM_REGNUM
23560 || regno == FRAME_POINTER_REGNUM
23561 || regno == ARG_POINTER_REGNUM)
23562 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23564 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23565 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23567 if (IS_VFP_REGNUM (regno))
23569 if (regno <= D7_VFP_REGNUM)
23570 return VFP_D0_D7_REGS;
23571 else if (regno <= LAST_LO_VFP_REGNUM)
23572 return VFP_LO_REGS;
23573 else
23574 return VFP_HI_REGS;
23577 if (IS_IWMMXT_REGNUM (regno))
23578 return IWMMXT_REGS;
23580 if (IS_IWMMXT_GR_REGNUM (regno))
23581 return IWMMXT_GR_REGS;
23583 return NO_REGS;
23586 /* Handle a special case when computing the offset
23587 of an argument from the frame pointer. */
23589 arm_debugger_arg_offset (int value, rtx addr)
23591 rtx_insn *insn;
23593 /* We are only interested if dbxout_parms() failed to compute the offset. */
23594 if (value != 0)
23595 return 0;
23597 /* We can only cope with the case where the address is held in a register. */
23598 if (!REG_P (addr))
23599 return 0;
23601 /* If we are using the frame pointer to point at the argument, then
23602 an offset of 0 is correct. */
23603 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23604 return 0;
23606 /* If we are using the stack pointer to point at the
23607 argument, then an offset of 0 is correct. */
23608 /* ??? Check this is consistent with thumb2 frame layout. */
23609 if ((TARGET_THUMB || !frame_pointer_needed)
23610 && REGNO (addr) == SP_REGNUM)
23611 return 0;
23613 /* Oh dear. The argument is pointed to by a register rather
23614 than being held in a register, or being stored at a known
23615 offset from the frame pointer. Since GDB only understands
23616 those two kinds of argument we must translate the address
23617 held in the register into an offset from the frame pointer.
23618 We do this by searching through the insns for the function
23619 looking to see where this register gets its value. If the
23620 register is initialized from the frame pointer plus an offset
23621 then we are in luck and we can continue, otherwise we give up.
23623 This code is exercised by producing debugging information
23624 for a function with arguments like this:
23626 double func (double a, double b, int c, double d) {return d;}
23628 Without this code the stab for parameter 'd' will be set to
23629 an offset of 0 from the frame pointer, rather than 8. */
23631 /* The if() statement says:
23633 If the insn is a normal instruction
23634 and if the insn is setting the value in a register
23635 and if the register being set is the register holding the address of the argument
23636 and if the address is computing by an addition
23637 that involves adding to a register
23638 which is the frame pointer
23639 a constant integer
23641 then... */
23643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23645 if ( NONJUMP_INSN_P (insn)
23646 && GET_CODE (PATTERN (insn)) == SET
23647 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23648 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23649 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23650 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23651 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23654 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23656 break;
23660 if (value == 0)
23662 debug_rtx (addr);
23663 warning (0, "unable to compute real location of stacked parameter");
23664 value = 8; /* XXX magic hack */
23667 return value;
23670 /* Implement TARGET_PROMOTED_TYPE. */
23672 static tree
23673 arm_promoted_type (const_tree t)
23675 if (SCALAR_FLOAT_TYPE_P (t)
23676 && TYPE_PRECISION (t) == 16
23677 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23678 return float_type_node;
23679 return NULL_TREE;
23682 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23683 This simply adds HFmode as a supported mode; even though we don't
23684 implement arithmetic on this type directly, it's supported by
23685 optabs conversions, much the way the double-word arithmetic is
23686 special-cased in the default hook. */
23688 static bool
23689 arm_scalar_mode_supported_p (scalar_mode mode)
23691 if (mode == HFmode)
23692 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23693 else if (ALL_FIXED_POINT_MODE_P (mode))
23694 return true;
23695 else
23696 return default_scalar_mode_supported_p (mode);
23699 /* Set the value of FLT_EVAL_METHOD.
23700 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23702 0: evaluate all operations and constants, whose semantic type has at
23703 most the range and precision of type float, to the range and
23704 precision of float; evaluate all other operations and constants to
23705 the range and precision of the semantic type;
23707 N, where _FloatN is a supported interchange floating type
23708 evaluate all operations and constants, whose semantic type has at
23709 most the range and precision of _FloatN type, to the range and
23710 precision of the _FloatN type; evaluate all other operations and
23711 constants to the range and precision of the semantic type;
23713 If we have the ARMv8.2-A extensions then we support _Float16 in native
23714 precision, so we should set this to 16. Otherwise, we support the type,
23715 but want to evaluate expressions in float precision, so set this to
23716 0. */
23718 static enum flt_eval_method
23719 arm_excess_precision (enum excess_precision_type type)
23721 switch (type)
23723 case EXCESS_PRECISION_TYPE_FAST:
23724 case EXCESS_PRECISION_TYPE_STANDARD:
23725 /* We can calculate either in 16-bit range and precision or
23726 32-bit range and precision. Make that decision based on whether
23727 we have native support for the ARMv8.2-A 16-bit floating-point
23728 instructions or not. */
23729 return (TARGET_VFP_FP16INST
23730 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23731 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23732 case EXCESS_PRECISION_TYPE_IMPLICIT:
23733 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23734 default:
23735 gcc_unreachable ();
23737 return FLT_EVAL_METHOD_UNPREDICTABLE;
23741 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23742 _Float16 if we are using anything other than ieee format for 16-bit
23743 floating point. Otherwise, punt to the default implementation. */
23744 static opt_scalar_float_mode
23745 arm_floatn_mode (int n, bool extended)
23747 if (!extended && n == 16)
23749 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23750 return HFmode;
23751 return opt_scalar_float_mode ();
23754 return default_floatn_mode (n, extended);
23758 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23759 not to early-clobber SRC registers in the process.
23761 We assume that the operands described by SRC and DEST represent a
23762 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23763 number of components into which the copy has been decomposed. */
23764 void
23765 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23767 unsigned int i;
23769 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23770 || REGNO (operands[0]) < REGNO (operands[1]))
23772 for (i = 0; i < count; i++)
23774 operands[2 * i] = dest[i];
23775 operands[2 * i + 1] = src[i];
23778 else
23780 for (i = 0; i < count; i++)
23782 operands[2 * i] = dest[count - i - 1];
23783 operands[2 * i + 1] = src[count - i - 1];
23788 /* Split operands into moves from op[1] + op[2] into op[0]. */
23790 void
23791 neon_split_vcombine (rtx operands[3])
23793 unsigned int dest = REGNO (operands[0]);
23794 unsigned int src1 = REGNO (operands[1]);
23795 unsigned int src2 = REGNO (operands[2]);
23796 machine_mode halfmode = GET_MODE (operands[1]);
23797 unsigned int halfregs = REG_NREGS (operands[1]);
23798 rtx destlo, desthi;
23800 if (src1 == dest && src2 == dest + halfregs)
23802 /* No-op move. Can't split to nothing; emit something. */
23803 emit_note (NOTE_INSN_DELETED);
23804 return;
23807 /* Preserve register attributes for variable tracking. */
23808 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23809 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23810 GET_MODE_SIZE (halfmode));
23812 /* Special case of reversed high/low parts. Use VSWP. */
23813 if (src2 == dest && src1 == dest + halfregs)
23815 rtx x = gen_rtx_SET (destlo, operands[1]);
23816 rtx y = gen_rtx_SET (desthi, operands[2]);
23817 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23818 return;
23821 if (!reg_overlap_mentioned_p (operands[2], destlo))
23823 /* Try to avoid unnecessary moves if part of the result
23824 is in the right place already. */
23825 if (src1 != dest)
23826 emit_move_insn (destlo, operands[1]);
23827 if (src2 != dest + halfregs)
23828 emit_move_insn (desthi, operands[2]);
23830 else
23832 if (src2 != dest + halfregs)
23833 emit_move_insn (desthi, operands[2]);
23834 if (src1 != dest)
23835 emit_move_insn (destlo, operands[1]);
23839 /* Return the number (counting from 0) of
23840 the least significant set bit in MASK. */
23842 inline static int
23843 number_of_first_bit_set (unsigned mask)
23845 return ctz_hwi (mask);
23848 /* Like emit_multi_reg_push, but allowing for a different set of
23849 registers to be described as saved. MASK is the set of registers
23850 to be saved; REAL_REGS is the set of registers to be described as
23851 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23853 static rtx_insn *
23854 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23856 unsigned long regno;
23857 rtx par[10], tmp, reg;
23858 rtx_insn *insn;
23859 int i, j;
23861 /* Build the parallel of the registers actually being stored. */
23862 for (i = 0; mask; ++i, mask &= mask - 1)
23864 regno = ctz_hwi (mask);
23865 reg = gen_rtx_REG (SImode, regno);
23867 if (i == 0)
23868 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23869 else
23870 tmp = gen_rtx_USE (VOIDmode, reg);
23872 par[i] = tmp;
23875 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23876 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23877 tmp = gen_frame_mem (BLKmode, tmp);
23878 tmp = gen_rtx_SET (tmp, par[0]);
23879 par[0] = tmp;
23881 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23882 insn = emit_insn (tmp);
23884 /* Always build the stack adjustment note for unwind info. */
23885 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23886 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23887 par[0] = tmp;
23889 /* Build the parallel of the registers recorded as saved for unwind. */
23890 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23892 regno = ctz_hwi (real_regs);
23893 reg = gen_rtx_REG (SImode, regno);
23895 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23896 tmp = gen_frame_mem (SImode, tmp);
23897 tmp = gen_rtx_SET (tmp, reg);
23898 RTX_FRAME_RELATED_P (tmp) = 1;
23899 par[j + 1] = tmp;
23902 if (j == 0)
23903 tmp = par[0];
23904 else
23906 RTX_FRAME_RELATED_P (par[0]) = 1;
23907 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23910 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23912 return insn;
23915 /* Emit code to push or pop registers to or from the stack. F is the
23916 assembly file. MASK is the registers to pop. */
23917 static void
23918 thumb_pop (FILE *f, unsigned long mask)
23920 int regno;
23921 int lo_mask = mask & 0xFF;
23923 gcc_assert (mask);
23925 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23927 /* Special case. Do not generate a POP PC statement here, do it in
23928 thumb_exit() */
23929 thumb_exit (f, -1);
23930 return;
23933 fprintf (f, "\tpop\t{");
23935 /* Look at the low registers first. */
23936 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23938 if (lo_mask & 1)
23940 asm_fprintf (f, "%r", regno);
23942 if ((lo_mask & ~1) != 0)
23943 fprintf (f, ", ");
23947 if (mask & (1 << PC_REGNUM))
23949 /* Catch popping the PC. */
23950 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23951 || IS_CMSE_ENTRY (arm_current_func_type ()))
23953 /* The PC is never poped directly, instead
23954 it is popped into r3 and then BX is used. */
23955 fprintf (f, "}\n");
23957 thumb_exit (f, -1);
23959 return;
23961 else
23963 if (mask & 0xFF)
23964 fprintf (f, ", ");
23966 asm_fprintf (f, "%r", PC_REGNUM);
23970 fprintf (f, "}\n");
23973 /* Generate code to return from a thumb function.
23974 If 'reg_containing_return_addr' is -1, then the return address is
23975 actually on the stack, at the stack pointer.
23977 Note: do not forget to update length attribute of corresponding insn pattern
23978 when changing assembly output (eg. length attribute of epilogue_insns when
23979 updating Armv8-M Baseline Security Extensions register clearing
23980 sequences). */
23981 static void
23982 thumb_exit (FILE *f, int reg_containing_return_addr)
23984 unsigned regs_available_for_popping;
23985 unsigned regs_to_pop;
23986 int pops_needed;
23987 unsigned available;
23988 unsigned required;
23989 machine_mode mode;
23990 int size;
23991 int restore_a4 = FALSE;
23993 /* Compute the registers we need to pop. */
23994 regs_to_pop = 0;
23995 pops_needed = 0;
23997 if (reg_containing_return_addr == -1)
23999 regs_to_pop |= 1 << LR_REGNUM;
24000 ++pops_needed;
24003 if (TARGET_BACKTRACE)
24005 /* Restore the (ARM) frame pointer and stack pointer. */
24006 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24007 pops_needed += 2;
24010 /* If there is nothing to pop then just emit the BX instruction and
24011 return. */
24012 if (pops_needed == 0)
24014 if (crtl->calls_eh_return)
24015 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24017 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24019 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24020 reg_containing_return_addr);
24021 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24023 else
24024 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24025 return;
24027 /* Otherwise if we are not supporting interworking and we have not created
24028 a backtrace structure and the function was not entered in ARM mode then
24029 just pop the return address straight into the PC. */
24030 else if (!TARGET_INTERWORK
24031 && !TARGET_BACKTRACE
24032 && !is_called_in_ARM_mode (current_function_decl)
24033 && !crtl->calls_eh_return
24034 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24036 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24037 return;
24040 /* Find out how many of the (return) argument registers we can corrupt. */
24041 regs_available_for_popping = 0;
24043 /* If returning via __builtin_eh_return, the bottom three registers
24044 all contain information needed for the return. */
24045 if (crtl->calls_eh_return)
24046 size = 12;
24047 else
24049 /* If we can deduce the registers used from the function's
24050 return value. This is more reliable that examining
24051 df_regs_ever_live_p () because that will be set if the register is
24052 ever used in the function, not just if the register is used
24053 to hold a return value. */
24055 if (crtl->return_rtx != 0)
24056 mode = GET_MODE (crtl->return_rtx);
24057 else
24058 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24060 size = GET_MODE_SIZE (mode);
24062 if (size == 0)
24064 /* In a void function we can use any argument register.
24065 In a function that returns a structure on the stack
24066 we can use the second and third argument registers. */
24067 if (mode == VOIDmode)
24068 regs_available_for_popping =
24069 (1 << ARG_REGISTER (1))
24070 | (1 << ARG_REGISTER (2))
24071 | (1 << ARG_REGISTER (3));
24072 else
24073 regs_available_for_popping =
24074 (1 << ARG_REGISTER (2))
24075 | (1 << ARG_REGISTER (3));
24077 else if (size <= 4)
24078 regs_available_for_popping =
24079 (1 << ARG_REGISTER (2))
24080 | (1 << ARG_REGISTER (3));
24081 else if (size <= 8)
24082 regs_available_for_popping =
24083 (1 << ARG_REGISTER (3));
24086 /* Match registers to be popped with registers into which we pop them. */
24087 for (available = regs_available_for_popping,
24088 required = regs_to_pop;
24089 required != 0 && available != 0;
24090 available &= ~(available & - available),
24091 required &= ~(required & - required))
24092 -- pops_needed;
24094 /* If we have any popping registers left over, remove them. */
24095 if (available > 0)
24096 regs_available_for_popping &= ~available;
24098 /* Otherwise if we need another popping register we can use
24099 the fourth argument register. */
24100 else if (pops_needed)
24102 /* If we have not found any free argument registers and
24103 reg a4 contains the return address, we must move it. */
24104 if (regs_available_for_popping == 0
24105 && reg_containing_return_addr == LAST_ARG_REGNUM)
24107 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24108 reg_containing_return_addr = LR_REGNUM;
24110 else if (size > 12)
24112 /* Register a4 is being used to hold part of the return value,
24113 but we have dire need of a free, low register. */
24114 restore_a4 = TRUE;
24116 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24119 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24121 /* The fourth argument register is available. */
24122 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24124 --pops_needed;
24128 /* Pop as many registers as we can. */
24129 thumb_pop (f, regs_available_for_popping);
24131 /* Process the registers we popped. */
24132 if (reg_containing_return_addr == -1)
24134 /* The return address was popped into the lowest numbered register. */
24135 regs_to_pop &= ~(1 << LR_REGNUM);
24137 reg_containing_return_addr =
24138 number_of_first_bit_set (regs_available_for_popping);
24140 /* Remove this register for the mask of available registers, so that
24141 the return address will not be corrupted by further pops. */
24142 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24145 /* If we popped other registers then handle them here. */
24146 if (regs_available_for_popping)
24148 int frame_pointer;
24150 /* Work out which register currently contains the frame pointer. */
24151 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24153 /* Move it into the correct place. */
24154 asm_fprintf (f, "\tmov\t%r, %r\n",
24155 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24157 /* (Temporarily) remove it from the mask of popped registers. */
24158 regs_available_for_popping &= ~(1 << frame_pointer);
24159 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24161 if (regs_available_for_popping)
24163 int stack_pointer;
24165 /* We popped the stack pointer as well,
24166 find the register that contains it. */
24167 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24169 /* Move it into the stack register. */
24170 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24172 /* At this point we have popped all necessary registers, so
24173 do not worry about restoring regs_available_for_popping
24174 to its correct value:
24176 assert (pops_needed == 0)
24177 assert (regs_available_for_popping == (1 << frame_pointer))
24178 assert (regs_to_pop == (1 << STACK_POINTER)) */
24180 else
24182 /* Since we have just move the popped value into the frame
24183 pointer, the popping register is available for reuse, and
24184 we know that we still have the stack pointer left to pop. */
24185 regs_available_for_popping |= (1 << frame_pointer);
24189 /* If we still have registers left on the stack, but we no longer have
24190 any registers into which we can pop them, then we must move the return
24191 address into the link register and make available the register that
24192 contained it. */
24193 if (regs_available_for_popping == 0 && pops_needed > 0)
24195 regs_available_for_popping |= 1 << reg_containing_return_addr;
24197 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24198 reg_containing_return_addr);
24200 reg_containing_return_addr = LR_REGNUM;
24203 /* If we have registers left on the stack then pop some more.
24204 We know that at most we will want to pop FP and SP. */
24205 if (pops_needed > 0)
24207 int popped_into;
24208 int move_to;
24210 thumb_pop (f, regs_available_for_popping);
24212 /* We have popped either FP or SP.
24213 Move whichever one it is into the correct register. */
24214 popped_into = number_of_first_bit_set (regs_available_for_popping);
24215 move_to = number_of_first_bit_set (regs_to_pop);
24217 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24218 --pops_needed;
24221 /* If we still have not popped everything then we must have only
24222 had one register available to us and we are now popping the SP. */
24223 if (pops_needed > 0)
24225 int popped_into;
24227 thumb_pop (f, regs_available_for_popping);
24229 popped_into = number_of_first_bit_set (regs_available_for_popping);
24231 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24233 assert (regs_to_pop == (1 << STACK_POINTER))
24234 assert (pops_needed == 1)
24238 /* If necessary restore the a4 register. */
24239 if (restore_a4)
24241 if (reg_containing_return_addr != LR_REGNUM)
24243 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24244 reg_containing_return_addr = LR_REGNUM;
24247 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24250 if (crtl->calls_eh_return)
24251 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24253 /* Return to caller. */
24254 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24256 /* This is for the cases where LR is not being used to contain the return
24257 address. It may therefore contain information that we might not want
24258 to leak, hence it must be cleared. The value in R0 will never be a
24259 secret at this point, so it is safe to use it, see the clearing code
24260 in 'cmse_nonsecure_entry_clear_before_return'. */
24261 if (reg_containing_return_addr != LR_REGNUM)
24262 asm_fprintf (f, "\tmov\tlr, r0\n");
24264 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24265 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24267 else
24268 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24271 /* Scan INSN just before assembler is output for it.
24272 For Thumb-1, we track the status of the condition codes; this
24273 information is used in the cbranchsi4_insn pattern. */
24274 void
24275 thumb1_final_prescan_insn (rtx_insn *insn)
24277 if (flag_print_asm_name)
24278 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24279 INSN_ADDRESSES (INSN_UID (insn)));
24280 /* Don't overwrite the previous setter when we get to a cbranch. */
24281 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24283 enum attr_conds conds;
24285 if (cfun->machine->thumb1_cc_insn)
24287 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24288 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24289 CC_STATUS_INIT;
24291 conds = get_attr_conds (insn);
24292 if (conds == CONDS_SET)
24294 rtx set = single_set (insn);
24295 cfun->machine->thumb1_cc_insn = insn;
24296 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24297 cfun->machine->thumb1_cc_op1 = const0_rtx;
24298 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24299 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24301 rtx src1 = XEXP (SET_SRC (set), 1);
24302 if (src1 == const0_rtx)
24303 cfun->machine->thumb1_cc_mode = CCmode;
24305 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24307 /* Record the src register operand instead of dest because
24308 cprop_hardreg pass propagates src. */
24309 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24312 else if (conds != CONDS_NOCOND)
24313 cfun->machine->thumb1_cc_insn = NULL_RTX;
24316 /* Check if unexpected far jump is used. */
24317 if (cfun->machine->lr_save_eliminated
24318 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24319 internal_error("Unexpected thumb1 far jump");
24323 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24325 unsigned HOST_WIDE_INT mask = 0xff;
24326 int i;
24328 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24329 if (val == 0) /* XXX */
24330 return 0;
24332 for (i = 0; i < 25; i++)
24333 if ((val & (mask << i)) == val)
24334 return 1;
24336 return 0;
24339 /* Returns nonzero if the current function contains,
24340 or might contain a far jump. */
24341 static int
24342 thumb_far_jump_used_p (void)
24344 rtx_insn *insn;
24345 bool far_jump = false;
24346 unsigned int func_size = 0;
24348 /* If we have already decided that far jumps may be used,
24349 do not bother checking again, and always return true even if
24350 it turns out that they are not being used. Once we have made
24351 the decision that far jumps are present (and that hence the link
24352 register will be pushed onto the stack) we cannot go back on it. */
24353 if (cfun->machine->far_jump_used)
24354 return 1;
24356 /* If this function is not being called from the prologue/epilogue
24357 generation code then it must be being called from the
24358 INITIAL_ELIMINATION_OFFSET macro. */
24359 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24361 /* In this case we know that we are being asked about the elimination
24362 of the arg pointer register. If that register is not being used,
24363 then there are no arguments on the stack, and we do not have to
24364 worry that a far jump might force the prologue to push the link
24365 register, changing the stack offsets. In this case we can just
24366 return false, since the presence of far jumps in the function will
24367 not affect stack offsets.
24369 If the arg pointer is live (or if it was live, but has now been
24370 eliminated and so set to dead) then we do have to test to see if
24371 the function might contain a far jump. This test can lead to some
24372 false negatives, since before reload is completed, then length of
24373 branch instructions is not known, so gcc defaults to returning their
24374 longest length, which in turn sets the far jump attribute to true.
24376 A false negative will not result in bad code being generated, but it
24377 will result in a needless push and pop of the link register. We
24378 hope that this does not occur too often.
24380 If we need doubleword stack alignment this could affect the other
24381 elimination offsets so we can't risk getting it wrong. */
24382 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24383 cfun->machine->arg_pointer_live = 1;
24384 else if (!cfun->machine->arg_pointer_live)
24385 return 0;
24388 /* We should not change far_jump_used during or after reload, as there is
24389 no chance to change stack frame layout. */
24390 if (reload_in_progress || reload_completed)
24391 return 0;
24393 /* Check to see if the function contains a branch
24394 insn with the far jump attribute set. */
24395 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24397 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24399 far_jump = true;
24401 func_size += get_attr_length (insn);
24404 /* Attribute far_jump will always be true for thumb1 before
24405 shorten_branch pass. So checking far_jump attribute before
24406 shorten_branch isn't much useful.
24408 Following heuristic tries to estimate more accurately if a far jump
24409 may finally be used. The heuristic is very conservative as there is
24410 no chance to roll-back the decision of not to use far jump.
24412 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24413 2-byte insn is associated with a 4 byte constant pool. Using
24414 function size 2048/3 as the threshold is conservative enough. */
24415 if (far_jump)
24417 if ((func_size * 3) >= 2048)
24419 /* Record the fact that we have decided that
24420 the function does use far jumps. */
24421 cfun->machine->far_jump_used = 1;
24422 return 1;
24426 return 0;
24429 /* Return nonzero if FUNC must be entered in ARM mode. */
24430 static bool
24431 is_called_in_ARM_mode (tree func)
24433 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24435 /* Ignore the problem about functions whose address is taken. */
24436 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24437 return true;
24439 #ifdef ARM_PE
24440 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24441 #else
24442 return false;
24443 #endif
24446 /* Given the stack offsets and register mask in OFFSETS, decide how
24447 many additional registers to push instead of subtracting a constant
24448 from SP. For epilogues the principle is the same except we use pop.
24449 FOR_PROLOGUE indicates which we're generating. */
24450 static int
24451 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24453 HOST_WIDE_INT amount;
24454 unsigned long live_regs_mask = offsets->saved_regs_mask;
24455 /* Extract a mask of the ones we can give to the Thumb's push/pop
24456 instruction. */
24457 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24458 /* Then count how many other high registers will need to be pushed. */
24459 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24460 int n_free, reg_base, size;
24462 if (!for_prologue && frame_pointer_needed)
24463 amount = offsets->locals_base - offsets->saved_regs;
24464 else
24465 amount = offsets->outgoing_args - offsets->saved_regs;
24467 /* If the stack frame size is 512 exactly, we can save one load
24468 instruction, which should make this a win even when optimizing
24469 for speed. */
24470 if (!optimize_size && amount != 512)
24471 return 0;
24473 /* Can't do this if there are high registers to push. */
24474 if (high_regs_pushed != 0)
24475 return 0;
24477 /* Shouldn't do it in the prologue if no registers would normally
24478 be pushed at all. In the epilogue, also allow it if we'll have
24479 a pop insn for the PC. */
24480 if (l_mask == 0
24481 && (for_prologue
24482 || TARGET_BACKTRACE
24483 || (live_regs_mask & 1 << LR_REGNUM) == 0
24484 || TARGET_INTERWORK
24485 || crtl->args.pretend_args_size != 0))
24486 return 0;
24488 /* Don't do this if thumb_expand_prologue wants to emit instructions
24489 between the push and the stack frame allocation. */
24490 if (for_prologue
24491 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24492 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24493 return 0;
24495 reg_base = 0;
24496 n_free = 0;
24497 if (!for_prologue)
24499 size = arm_size_return_regs ();
24500 reg_base = ARM_NUM_INTS (size);
24501 live_regs_mask >>= reg_base;
24504 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24505 && (for_prologue || call_used_regs[reg_base + n_free]))
24507 live_regs_mask >>= 1;
24508 n_free++;
24511 if (n_free == 0)
24512 return 0;
24513 gcc_assert (amount / 4 * 4 == amount);
24515 if (amount >= 512 && (amount - n_free * 4) < 512)
24516 return (amount - 508) / 4;
24517 if (amount <= n_free * 4)
24518 return amount / 4;
24519 return 0;
24522 /* The bits which aren't usefully expanded as rtl. */
24523 const char *
24524 thumb1_unexpanded_epilogue (void)
24526 arm_stack_offsets *offsets;
24527 int regno;
24528 unsigned long live_regs_mask = 0;
24529 int high_regs_pushed = 0;
24530 int extra_pop;
24531 int had_to_push_lr;
24532 int size;
24534 if (cfun->machine->return_used_this_function != 0)
24535 return "";
24537 if (IS_NAKED (arm_current_func_type ()))
24538 return "";
24540 offsets = arm_get_frame_offsets ();
24541 live_regs_mask = offsets->saved_regs_mask;
24542 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24544 /* If we can deduce the registers used from the function's return value.
24545 This is more reliable that examining df_regs_ever_live_p () because that
24546 will be set if the register is ever used in the function, not just if
24547 the register is used to hold a return value. */
24548 size = arm_size_return_regs ();
24550 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24551 if (extra_pop > 0)
24553 unsigned long extra_mask = (1 << extra_pop) - 1;
24554 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24557 /* The prolog may have pushed some high registers to use as
24558 work registers. e.g. the testsuite file:
24559 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24560 compiles to produce:
24561 push {r4, r5, r6, r7, lr}
24562 mov r7, r9
24563 mov r6, r8
24564 push {r6, r7}
24565 as part of the prolog. We have to undo that pushing here. */
24567 if (high_regs_pushed)
24569 unsigned long mask = live_regs_mask & 0xff;
24570 int next_hi_reg;
24572 /* The available low registers depend on the size of the value we are
24573 returning. */
24574 if (size <= 12)
24575 mask |= 1 << 3;
24576 if (size <= 8)
24577 mask |= 1 << 2;
24579 if (mask == 0)
24580 /* Oh dear! We have no low registers into which we can pop
24581 high registers! */
24582 internal_error
24583 ("no low registers available for popping high registers");
24585 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24586 if (live_regs_mask & (1 << next_hi_reg))
24587 break;
24589 while (high_regs_pushed)
24591 /* Find lo register(s) into which the high register(s) can
24592 be popped. */
24593 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24595 if (mask & (1 << regno))
24596 high_regs_pushed--;
24597 if (high_regs_pushed == 0)
24598 break;
24601 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24603 /* Pop the values into the low register(s). */
24604 thumb_pop (asm_out_file, mask);
24606 /* Move the value(s) into the high registers. */
24607 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24609 if (mask & (1 << regno))
24611 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24612 regno);
24614 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24615 if (live_regs_mask & (1 << next_hi_reg))
24616 break;
24620 live_regs_mask &= ~0x0f00;
24623 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24624 live_regs_mask &= 0xff;
24626 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24628 /* Pop the return address into the PC. */
24629 if (had_to_push_lr)
24630 live_regs_mask |= 1 << PC_REGNUM;
24632 /* Either no argument registers were pushed or a backtrace
24633 structure was created which includes an adjusted stack
24634 pointer, so just pop everything. */
24635 if (live_regs_mask)
24636 thumb_pop (asm_out_file, live_regs_mask);
24638 /* We have either just popped the return address into the
24639 PC or it is was kept in LR for the entire function.
24640 Note that thumb_pop has already called thumb_exit if the
24641 PC was in the list. */
24642 if (!had_to_push_lr)
24643 thumb_exit (asm_out_file, LR_REGNUM);
24645 else
24647 /* Pop everything but the return address. */
24648 if (live_regs_mask)
24649 thumb_pop (asm_out_file, live_regs_mask);
24651 if (had_to_push_lr)
24653 if (size > 12)
24655 /* We have no free low regs, so save one. */
24656 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24657 LAST_ARG_REGNUM);
24660 /* Get the return address into a temporary register. */
24661 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24663 if (size > 12)
24665 /* Move the return address to lr. */
24666 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24667 LAST_ARG_REGNUM);
24668 /* Restore the low register. */
24669 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24670 IP_REGNUM);
24671 regno = LR_REGNUM;
24673 else
24674 regno = LAST_ARG_REGNUM;
24676 else
24677 regno = LR_REGNUM;
24679 /* Remove the argument registers that were pushed onto the stack. */
24680 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24681 SP_REGNUM, SP_REGNUM,
24682 crtl->args.pretend_args_size);
24684 thumb_exit (asm_out_file, regno);
24687 return "";
24690 /* Functions to save and restore machine-specific function data. */
24691 static struct machine_function *
24692 arm_init_machine_status (void)
24694 struct machine_function *machine;
24695 machine = ggc_cleared_alloc<machine_function> ();
24697 #if ARM_FT_UNKNOWN != 0
24698 machine->func_type = ARM_FT_UNKNOWN;
24699 #endif
24700 return machine;
24703 /* Return an RTX indicating where the return address to the
24704 calling function can be found. */
24706 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24708 if (count != 0)
24709 return NULL_RTX;
24711 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24714 /* Do anything needed before RTL is emitted for each function. */
24715 void
24716 arm_init_expanders (void)
24718 /* Arrange to initialize and mark the machine per-function status. */
24719 init_machine_status = arm_init_machine_status;
24721 /* This is to stop the combine pass optimizing away the alignment
24722 adjustment of va_arg. */
24723 /* ??? It is claimed that this should not be necessary. */
24724 if (cfun)
24725 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24728 /* Check that FUNC is called with a different mode. */
24730 bool
24731 arm_change_mode_p (tree func)
24733 if (TREE_CODE (func) != FUNCTION_DECL)
24734 return false;
24736 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24738 if (!callee_tree)
24739 callee_tree = target_option_default_node;
24741 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24742 int flags = callee_opts->x_target_flags;
24744 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24747 /* Like arm_compute_initial_elimination offset. Simpler because there
24748 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24749 to point at the base of the local variables after static stack
24750 space for a function has been allocated. */
24752 HOST_WIDE_INT
24753 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24755 arm_stack_offsets *offsets;
24757 offsets = arm_get_frame_offsets ();
24759 switch (from)
24761 case ARG_POINTER_REGNUM:
24762 switch (to)
24764 case STACK_POINTER_REGNUM:
24765 return offsets->outgoing_args - offsets->saved_args;
24767 case FRAME_POINTER_REGNUM:
24768 return offsets->soft_frame - offsets->saved_args;
24770 case ARM_HARD_FRAME_POINTER_REGNUM:
24771 return offsets->saved_regs - offsets->saved_args;
24773 case THUMB_HARD_FRAME_POINTER_REGNUM:
24774 return offsets->locals_base - offsets->saved_args;
24776 default:
24777 gcc_unreachable ();
24779 break;
24781 case FRAME_POINTER_REGNUM:
24782 switch (to)
24784 case STACK_POINTER_REGNUM:
24785 return offsets->outgoing_args - offsets->soft_frame;
24787 case ARM_HARD_FRAME_POINTER_REGNUM:
24788 return offsets->saved_regs - offsets->soft_frame;
24790 case THUMB_HARD_FRAME_POINTER_REGNUM:
24791 return offsets->locals_base - offsets->soft_frame;
24793 default:
24794 gcc_unreachable ();
24796 break;
24798 default:
24799 gcc_unreachable ();
24803 /* Generate the function's prologue. */
24805 void
24806 thumb1_expand_prologue (void)
24808 rtx_insn *insn;
24810 HOST_WIDE_INT amount;
24811 HOST_WIDE_INT size;
24812 arm_stack_offsets *offsets;
24813 unsigned long func_type;
24814 int regno;
24815 unsigned long live_regs_mask;
24816 unsigned long l_mask;
24817 unsigned high_regs_pushed = 0;
24818 bool lr_needs_saving;
24820 func_type = arm_current_func_type ();
24822 /* Naked functions don't have prologues. */
24823 if (IS_NAKED (func_type))
24825 if (flag_stack_usage_info)
24826 current_function_static_stack_size = 0;
24827 return;
24830 if (IS_INTERRUPT (func_type))
24832 error ("interrupt Service Routines cannot be coded in Thumb mode");
24833 return;
24836 if (is_called_in_ARM_mode (current_function_decl))
24837 emit_insn (gen_prologue_thumb1_interwork ());
24839 offsets = arm_get_frame_offsets ();
24840 live_regs_mask = offsets->saved_regs_mask;
24841 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24843 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24844 l_mask = live_regs_mask & 0x40ff;
24845 /* Then count how many other high registers will need to be pushed. */
24846 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24848 if (crtl->args.pretend_args_size)
24850 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24852 if (cfun->machine->uses_anonymous_args)
24854 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24855 unsigned long mask;
24857 mask = 1ul << (LAST_ARG_REGNUM + 1);
24858 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24860 insn = thumb1_emit_multi_reg_push (mask, 0);
24862 else
24864 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24865 stack_pointer_rtx, x));
24867 RTX_FRAME_RELATED_P (insn) = 1;
24870 if (TARGET_BACKTRACE)
24872 HOST_WIDE_INT offset = 0;
24873 unsigned work_register;
24874 rtx work_reg, x, arm_hfp_rtx;
24876 /* We have been asked to create a stack backtrace structure.
24877 The code looks like this:
24879 0 .align 2
24880 0 func:
24881 0 sub SP, #16 Reserve space for 4 registers.
24882 2 push {R7} Push low registers.
24883 4 add R7, SP, #20 Get the stack pointer before the push.
24884 6 str R7, [SP, #8] Store the stack pointer
24885 (before reserving the space).
24886 8 mov R7, PC Get hold of the start of this code + 12.
24887 10 str R7, [SP, #16] Store it.
24888 12 mov R7, FP Get hold of the current frame pointer.
24889 14 str R7, [SP, #4] Store it.
24890 16 mov R7, LR Get hold of the current return address.
24891 18 str R7, [SP, #12] Store it.
24892 20 add R7, SP, #16 Point at the start of the
24893 backtrace structure.
24894 22 mov FP, R7 Put this value into the frame pointer. */
24896 work_register = thumb_find_work_register (live_regs_mask);
24897 work_reg = gen_rtx_REG (SImode, work_register);
24898 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24900 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24901 stack_pointer_rtx, GEN_INT (-16)));
24902 RTX_FRAME_RELATED_P (insn) = 1;
24904 if (l_mask)
24906 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24907 RTX_FRAME_RELATED_P (insn) = 1;
24908 lr_needs_saving = false;
24910 offset = bit_count (l_mask) * UNITS_PER_WORD;
24913 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24914 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24916 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24917 x = gen_frame_mem (SImode, x);
24918 emit_move_insn (x, work_reg);
24920 /* Make sure that the instruction fetching the PC is in the right place
24921 to calculate "start of backtrace creation code + 12". */
24922 /* ??? The stores using the common WORK_REG ought to be enough to
24923 prevent the scheduler from doing anything weird. Failing that
24924 we could always move all of the following into an UNSPEC_VOLATILE. */
24925 if (l_mask)
24927 x = gen_rtx_REG (SImode, PC_REGNUM);
24928 emit_move_insn (work_reg, x);
24930 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24931 x = gen_frame_mem (SImode, x);
24932 emit_move_insn (x, work_reg);
24934 emit_move_insn (work_reg, arm_hfp_rtx);
24936 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24937 x = gen_frame_mem (SImode, x);
24938 emit_move_insn (x, work_reg);
24940 else
24942 emit_move_insn (work_reg, arm_hfp_rtx);
24944 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24945 x = gen_frame_mem (SImode, x);
24946 emit_move_insn (x, work_reg);
24948 x = gen_rtx_REG (SImode, PC_REGNUM);
24949 emit_move_insn (work_reg, x);
24951 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24952 x = gen_frame_mem (SImode, x);
24953 emit_move_insn (x, work_reg);
24956 x = gen_rtx_REG (SImode, LR_REGNUM);
24957 emit_move_insn (work_reg, x);
24959 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24960 x = gen_frame_mem (SImode, x);
24961 emit_move_insn (x, work_reg);
24963 x = GEN_INT (offset + 12);
24964 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24966 emit_move_insn (arm_hfp_rtx, work_reg);
24968 /* Optimization: If we are not pushing any low registers but we are going
24969 to push some high registers then delay our first push. This will just
24970 be a push of LR and we can combine it with the push of the first high
24971 register. */
24972 else if ((l_mask & 0xff) != 0
24973 || (high_regs_pushed == 0 && lr_needs_saving))
24975 unsigned long mask = l_mask;
24976 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24977 insn = thumb1_emit_multi_reg_push (mask, mask);
24978 RTX_FRAME_RELATED_P (insn) = 1;
24979 lr_needs_saving = false;
24982 if (high_regs_pushed)
24984 unsigned pushable_regs;
24985 unsigned next_hi_reg;
24986 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24987 : crtl->args.info.nregs;
24988 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24990 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24991 if (live_regs_mask & (1 << next_hi_reg))
24992 break;
24994 /* Here we need to mask out registers used for passing arguments
24995 even if they can be pushed. This is to avoid using them to stash the high
24996 registers. Such kind of stash may clobber the use of arguments. */
24997 pushable_regs = l_mask & (~arg_regs_mask);
24998 if (lr_needs_saving)
24999 pushable_regs &= ~(1 << LR_REGNUM);
25001 if (pushable_regs == 0)
25002 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25004 while (high_regs_pushed > 0)
25006 unsigned long real_regs_mask = 0;
25007 unsigned long push_mask = 0;
25009 for (regno = LR_REGNUM; regno >= 0; regno --)
25011 if (pushable_regs & (1 << regno))
25013 emit_move_insn (gen_rtx_REG (SImode, regno),
25014 gen_rtx_REG (SImode, next_hi_reg));
25016 high_regs_pushed --;
25017 real_regs_mask |= (1 << next_hi_reg);
25018 push_mask |= (1 << regno);
25020 if (high_regs_pushed)
25022 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25023 next_hi_reg --)
25024 if (live_regs_mask & (1 << next_hi_reg))
25025 break;
25027 else
25028 break;
25032 /* If we had to find a work register and we have not yet
25033 saved the LR then add it to the list of regs to push. */
25034 if (lr_needs_saving)
25036 push_mask |= 1 << LR_REGNUM;
25037 real_regs_mask |= 1 << LR_REGNUM;
25038 lr_needs_saving = false;
25041 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25042 RTX_FRAME_RELATED_P (insn) = 1;
25046 /* Load the pic register before setting the frame pointer,
25047 so we can use r7 as a temporary work register. */
25048 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25049 arm_load_pic_register (live_regs_mask);
25051 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25052 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25053 stack_pointer_rtx);
25055 size = offsets->outgoing_args - offsets->saved_args;
25056 if (flag_stack_usage_info)
25057 current_function_static_stack_size = size;
25059 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25060 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25061 || flag_stack_clash_protection)
25062 && size)
25063 sorry ("-fstack-check=specific for Thumb-1");
25065 amount = offsets->outgoing_args - offsets->saved_regs;
25066 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25067 if (amount)
25069 if (amount < 512)
25071 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25072 GEN_INT (- amount)));
25073 RTX_FRAME_RELATED_P (insn) = 1;
25075 else
25077 rtx reg, dwarf;
25079 /* The stack decrement is too big for an immediate value in a single
25080 insn. In theory we could issue multiple subtracts, but after
25081 three of them it becomes more space efficient to place the full
25082 value in the constant pool and load into a register. (Also the
25083 ARM debugger really likes to see only one stack decrement per
25084 function). So instead we look for a scratch register into which
25085 we can load the decrement, and then we subtract this from the
25086 stack pointer. Unfortunately on the thumb the only available
25087 scratch registers are the argument registers, and we cannot use
25088 these as they may hold arguments to the function. Instead we
25089 attempt to locate a call preserved register which is used by this
25090 function. If we can find one, then we know that it will have
25091 been pushed at the start of the prologue and so we can corrupt
25092 it now. */
25093 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25094 if (live_regs_mask & (1 << regno))
25095 break;
25097 gcc_assert(regno <= LAST_LO_REGNUM);
25099 reg = gen_rtx_REG (SImode, regno);
25101 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25103 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25104 stack_pointer_rtx, reg));
25106 dwarf = gen_rtx_SET (stack_pointer_rtx,
25107 plus_constant (Pmode, stack_pointer_rtx,
25108 -amount));
25109 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25110 RTX_FRAME_RELATED_P (insn) = 1;
25114 if (frame_pointer_needed)
25115 thumb_set_frame_pointer (offsets);
25117 /* If we are profiling, make sure no instructions are scheduled before
25118 the call to mcount. Similarly if the user has requested no
25119 scheduling in the prolog. Similarly if we want non-call exceptions
25120 using the EABI unwinder, to prevent faulting instructions from being
25121 swapped with a stack adjustment. */
25122 if (crtl->profile || !TARGET_SCHED_PROLOG
25123 || (arm_except_unwind_info (&global_options) == UI_TARGET
25124 && cfun->can_throw_non_call_exceptions))
25125 emit_insn (gen_blockage ());
25127 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25128 if (live_regs_mask & 0xff)
25129 cfun->machine->lr_save_eliminated = 0;
25132 /* Clear caller saved registers not used to pass return values and leaked
25133 condition flags before exiting a cmse_nonsecure_entry function. */
25135 void
25136 cmse_nonsecure_entry_clear_before_return (void)
25138 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25139 uint32_t padding_bits_to_clear = 0;
25140 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25141 auto_sbitmap to_clear_bitmap (maxregno + 1);
25142 tree result_type;
25143 rtx result_rtl;
25145 bitmap_clear (to_clear_bitmap);
25146 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25147 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25149 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25150 registers. */
25151 if (TARGET_HARD_FLOAT)
25153 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25155 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25157 /* Make sure we don't clear the two scratch registers used to clear the
25158 relevant FPSCR bits in output_return_instruction. */
25159 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25160 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25161 emit_use (gen_rtx_REG (SImode, 4));
25162 bitmap_clear_bit (to_clear_bitmap, 4);
25165 /* If the user has defined registers to be caller saved, these are no longer
25166 restored by the function before returning and must thus be cleared for
25167 security purposes. */
25168 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25170 /* We do not touch registers that can be used to pass arguments as per
25171 the AAPCS, since these should never be made callee-saved by user
25172 options. */
25173 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25174 continue;
25175 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25176 continue;
25177 if (call_used_regs[regno])
25178 bitmap_set_bit (to_clear_bitmap, regno);
25181 /* Make sure we do not clear the registers used to return the result in. */
25182 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25183 if (!VOID_TYPE_P (result_type))
25185 uint64_t to_clear_return_mask;
25186 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25188 /* No need to check that we return in registers, because we don't
25189 support returning on stack yet. */
25190 gcc_assert (REG_P (result_rtl));
25191 to_clear_return_mask
25192 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25193 padding_bits_to_clear_ptr);
25194 if (to_clear_return_mask)
25196 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25197 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25199 if (to_clear_return_mask & (1ULL << regno))
25200 bitmap_clear_bit (to_clear_bitmap, regno);
25205 if (padding_bits_to_clear != 0)
25207 rtx reg_rtx;
25208 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25209 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25211 /* Padding bits to clear is not 0 so we know we are dealing with
25212 returning a composite type, which only uses r0. Let's make sure that
25213 r1-r3 is cleared too, we will use r1 as a scratch register. */
25214 bitmap_clear (to_clear_arg_regs_bitmap);
25215 bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25216 NUM_ARG_REGS - 1);
25217 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25219 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25221 /* Fill the lower half of the negated padding_bits_to_clear. */
25222 emit_move_insn (reg_rtx,
25223 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25225 /* Also fill the top half of the negated padding_bits_to_clear. */
25226 if (((~padding_bits_to_clear) >> 16) > 0)
25227 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25228 GEN_INT (16),
25229 GEN_INT (16)),
25230 GEN_INT ((~padding_bits_to_clear) >> 16)));
25232 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25233 gen_rtx_REG (SImode, R0_REGNUM),
25234 reg_rtx));
25237 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25239 if (!bitmap_bit_p (to_clear_bitmap, regno))
25240 continue;
25242 if (IS_VFP_REGNUM (regno))
25244 /* If regno is an even vfp register and its successor is also to
25245 be cleared, use vmov. */
25246 if (TARGET_VFP_DOUBLE
25247 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25248 && bitmap_bit_p (to_clear_bitmap, regno + 1))
25250 emit_move_insn (gen_rtx_REG (DFmode, regno),
25251 CONST1_RTX (DFmode));
25252 emit_use (gen_rtx_REG (DFmode, regno));
25253 regno++;
25255 else
25257 emit_move_insn (gen_rtx_REG (SFmode, regno),
25258 CONST1_RTX (SFmode));
25259 emit_use (gen_rtx_REG (SFmode, regno));
25262 else
25264 if (TARGET_THUMB1)
25266 if (regno == R0_REGNUM)
25267 emit_move_insn (gen_rtx_REG (SImode, regno),
25268 const0_rtx);
25269 else
25270 /* R0 has either been cleared before, see code above, or it
25271 holds a return value, either way it is not secret
25272 information. */
25273 emit_move_insn (gen_rtx_REG (SImode, regno),
25274 gen_rtx_REG (SImode, R0_REGNUM));
25275 emit_use (gen_rtx_REG (SImode, regno));
25277 else
25279 emit_move_insn (gen_rtx_REG (SImode, regno),
25280 gen_rtx_REG (SImode, LR_REGNUM));
25281 emit_use (gen_rtx_REG (SImode, regno));
25287 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25288 POP instruction can be generated. LR should be replaced by PC. All
25289 the checks required are already done by USE_RETURN_INSN (). Hence,
25290 all we really need to check here is if single register is to be
25291 returned, or multiple register return. */
25292 void
25293 thumb2_expand_return (bool simple_return)
25295 int i, num_regs;
25296 unsigned long saved_regs_mask;
25297 arm_stack_offsets *offsets;
25299 offsets = arm_get_frame_offsets ();
25300 saved_regs_mask = offsets->saved_regs_mask;
25302 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25303 if (saved_regs_mask & (1 << i))
25304 num_regs++;
25306 if (!simple_return && saved_regs_mask)
25308 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25309 functions or adapt code to handle according to ACLE. This path should
25310 not be reachable for cmse_nonsecure_entry functions though we prefer
25311 to assert it for now to ensure that future code changes do not silently
25312 change this behavior. */
25313 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25314 if (num_regs == 1)
25316 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25317 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25318 rtx addr = gen_rtx_MEM (SImode,
25319 gen_rtx_POST_INC (SImode,
25320 stack_pointer_rtx));
25321 set_mem_alias_set (addr, get_frame_alias_set ());
25322 XVECEXP (par, 0, 0) = ret_rtx;
25323 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25324 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25325 emit_jump_insn (par);
25327 else
25329 saved_regs_mask &= ~ (1 << LR_REGNUM);
25330 saved_regs_mask |= (1 << PC_REGNUM);
25331 arm_emit_multi_reg_pop (saved_regs_mask);
25334 else
25336 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25337 cmse_nonsecure_entry_clear_before_return ();
25338 emit_jump_insn (simple_return_rtx);
25342 void
25343 thumb1_expand_epilogue (void)
25345 HOST_WIDE_INT amount;
25346 arm_stack_offsets *offsets;
25347 int regno;
25349 /* Naked functions don't have prologues. */
25350 if (IS_NAKED (arm_current_func_type ()))
25351 return;
25353 offsets = arm_get_frame_offsets ();
25354 amount = offsets->outgoing_args - offsets->saved_regs;
25356 if (frame_pointer_needed)
25358 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25359 amount = offsets->locals_base - offsets->saved_regs;
25361 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25363 gcc_assert (amount >= 0);
25364 if (amount)
25366 emit_insn (gen_blockage ());
25368 if (amount < 512)
25369 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25370 GEN_INT (amount)));
25371 else
25373 /* r3 is always free in the epilogue. */
25374 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25376 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25377 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25381 /* Emit a USE (stack_pointer_rtx), so that
25382 the stack adjustment will not be deleted. */
25383 emit_insn (gen_force_register_use (stack_pointer_rtx));
25385 if (crtl->profile || !TARGET_SCHED_PROLOG)
25386 emit_insn (gen_blockage ());
25388 /* Emit a clobber for each insn that will be restored in the epilogue,
25389 so that flow2 will get register lifetimes correct. */
25390 for (regno = 0; regno < 13; regno++)
25391 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25392 emit_clobber (gen_rtx_REG (SImode, regno));
25394 if (! df_regs_ever_live_p (LR_REGNUM))
25395 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25397 /* Clear all caller-saved regs that are not used to return. */
25398 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25399 cmse_nonsecure_entry_clear_before_return ();
25402 /* Epilogue code for APCS frame. */
25403 static void
25404 arm_expand_epilogue_apcs_frame (bool really_return)
25406 unsigned long func_type;
25407 unsigned long saved_regs_mask;
25408 int num_regs = 0;
25409 int i;
25410 int floats_from_frame = 0;
25411 arm_stack_offsets *offsets;
25413 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25414 func_type = arm_current_func_type ();
25416 /* Get frame offsets for ARM. */
25417 offsets = arm_get_frame_offsets ();
25418 saved_regs_mask = offsets->saved_regs_mask;
25420 /* Find the offset of the floating-point save area in the frame. */
25421 floats_from_frame
25422 = (offsets->saved_args
25423 + arm_compute_static_chain_stack_bytes ()
25424 - offsets->frame);
25426 /* Compute how many core registers saved and how far away the floats are. */
25427 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25428 if (saved_regs_mask & (1 << i))
25430 num_regs++;
25431 floats_from_frame += 4;
25434 if (TARGET_HARD_FLOAT)
25436 int start_reg;
25437 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25439 /* The offset is from IP_REGNUM. */
25440 int saved_size = arm_get_vfp_saved_size ();
25441 if (saved_size > 0)
25443 rtx_insn *insn;
25444 floats_from_frame += saved_size;
25445 insn = emit_insn (gen_addsi3 (ip_rtx,
25446 hard_frame_pointer_rtx,
25447 GEN_INT (-floats_from_frame)));
25448 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25449 ip_rtx, hard_frame_pointer_rtx);
25452 /* Generate VFP register multi-pop. */
25453 start_reg = FIRST_VFP_REGNUM;
25455 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25456 /* Look for a case where a reg does not need restoring. */
25457 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25458 && (!df_regs_ever_live_p (i + 1)
25459 || call_used_regs[i + 1]))
25461 if (start_reg != i)
25462 arm_emit_vfp_multi_reg_pop (start_reg,
25463 (i - start_reg) / 2,
25464 gen_rtx_REG (SImode,
25465 IP_REGNUM));
25466 start_reg = i + 2;
25469 /* Restore the remaining regs that we have discovered (or possibly
25470 even all of them, if the conditional in the for loop never
25471 fired). */
25472 if (start_reg != i)
25473 arm_emit_vfp_multi_reg_pop (start_reg,
25474 (i - start_reg) / 2,
25475 gen_rtx_REG (SImode, IP_REGNUM));
25478 if (TARGET_IWMMXT)
25480 /* The frame pointer is guaranteed to be non-double-word aligned, as
25481 it is set to double-word-aligned old_stack_pointer - 4. */
25482 rtx_insn *insn;
25483 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25485 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25486 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25488 rtx addr = gen_frame_mem (V2SImode,
25489 plus_constant (Pmode, hard_frame_pointer_rtx,
25490 - lrm_count * 4));
25491 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25492 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25493 gen_rtx_REG (V2SImode, i),
25494 NULL_RTX);
25495 lrm_count += 2;
25499 /* saved_regs_mask should contain IP which contains old stack pointer
25500 at the time of activation creation. Since SP and IP are adjacent registers,
25501 we can restore the value directly into SP. */
25502 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25503 saved_regs_mask &= ~(1 << IP_REGNUM);
25504 saved_regs_mask |= (1 << SP_REGNUM);
25506 /* There are two registers left in saved_regs_mask - LR and PC. We
25507 only need to restore LR (the return address), but to
25508 save time we can load it directly into PC, unless we need a
25509 special function exit sequence, or we are not really returning. */
25510 if (really_return
25511 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25512 && !crtl->calls_eh_return)
25513 /* Delete LR from the register mask, so that LR on
25514 the stack is loaded into the PC in the register mask. */
25515 saved_regs_mask &= ~(1 << LR_REGNUM);
25516 else
25517 saved_regs_mask &= ~(1 << PC_REGNUM);
25519 num_regs = bit_count (saved_regs_mask);
25520 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25522 rtx_insn *insn;
25523 emit_insn (gen_blockage ());
25524 /* Unwind the stack to just below the saved registers. */
25525 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25526 hard_frame_pointer_rtx,
25527 GEN_INT (- 4 * num_regs)));
25529 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25530 stack_pointer_rtx, hard_frame_pointer_rtx);
25533 arm_emit_multi_reg_pop (saved_regs_mask);
25535 if (IS_INTERRUPT (func_type))
25537 /* Interrupt handlers will have pushed the
25538 IP onto the stack, so restore it now. */
25539 rtx_insn *insn;
25540 rtx addr = gen_rtx_MEM (SImode,
25541 gen_rtx_POST_INC (SImode,
25542 stack_pointer_rtx));
25543 set_mem_alias_set (addr, get_frame_alias_set ());
25544 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25545 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25546 gen_rtx_REG (SImode, IP_REGNUM),
25547 NULL_RTX);
25550 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25551 return;
25553 if (crtl->calls_eh_return)
25554 emit_insn (gen_addsi3 (stack_pointer_rtx,
25555 stack_pointer_rtx,
25556 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25558 if (IS_STACKALIGN (func_type))
25559 /* Restore the original stack pointer. Before prologue, the stack was
25560 realigned and the original stack pointer saved in r0. For details,
25561 see comment in arm_expand_prologue. */
25562 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25564 emit_jump_insn (simple_return_rtx);
25567 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25568 function is not a sibcall. */
25569 void
25570 arm_expand_epilogue (bool really_return)
25572 unsigned long func_type;
25573 unsigned long saved_regs_mask;
25574 int num_regs = 0;
25575 int i;
25576 int amount;
25577 arm_stack_offsets *offsets;
25579 func_type = arm_current_func_type ();
25581 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25582 let output_return_instruction take care of instruction emission if any. */
25583 if (IS_NAKED (func_type)
25584 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25586 if (really_return)
25587 emit_jump_insn (simple_return_rtx);
25588 return;
25591 /* If we are throwing an exception, then we really must be doing a
25592 return, so we can't tail-call. */
25593 gcc_assert (!crtl->calls_eh_return || really_return);
25595 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25597 arm_expand_epilogue_apcs_frame (really_return);
25598 return;
25601 /* Get frame offsets for ARM. */
25602 offsets = arm_get_frame_offsets ();
25603 saved_regs_mask = offsets->saved_regs_mask;
25604 num_regs = bit_count (saved_regs_mask);
25606 if (frame_pointer_needed)
25608 rtx_insn *insn;
25609 /* Restore stack pointer if necessary. */
25610 if (TARGET_ARM)
25612 /* In ARM mode, frame pointer points to first saved register.
25613 Restore stack pointer to last saved register. */
25614 amount = offsets->frame - offsets->saved_regs;
25616 /* Force out any pending memory operations that reference stacked data
25617 before stack de-allocation occurs. */
25618 emit_insn (gen_blockage ());
25619 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25620 hard_frame_pointer_rtx,
25621 GEN_INT (amount)));
25622 arm_add_cfa_adjust_cfa_note (insn, amount,
25623 stack_pointer_rtx,
25624 hard_frame_pointer_rtx);
25626 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25627 deleted. */
25628 emit_insn (gen_force_register_use (stack_pointer_rtx));
25630 else
25632 /* In Thumb-2 mode, the frame pointer points to the last saved
25633 register. */
25634 amount = offsets->locals_base - offsets->saved_regs;
25635 if (amount)
25637 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25638 hard_frame_pointer_rtx,
25639 GEN_INT (amount)));
25640 arm_add_cfa_adjust_cfa_note (insn, amount,
25641 hard_frame_pointer_rtx,
25642 hard_frame_pointer_rtx);
25645 /* Force out any pending memory operations that reference stacked data
25646 before stack de-allocation occurs. */
25647 emit_insn (gen_blockage ());
25648 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25649 hard_frame_pointer_rtx));
25650 arm_add_cfa_adjust_cfa_note (insn, 0,
25651 stack_pointer_rtx,
25652 hard_frame_pointer_rtx);
25653 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25654 deleted. */
25655 emit_insn (gen_force_register_use (stack_pointer_rtx));
25658 else
25660 /* Pop off outgoing args and local frame to adjust stack pointer to
25661 last saved register. */
25662 amount = offsets->outgoing_args - offsets->saved_regs;
25663 if (amount)
25665 rtx_insn *tmp;
25666 /* Force out any pending memory operations that reference stacked data
25667 before stack de-allocation occurs. */
25668 emit_insn (gen_blockage ());
25669 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25670 stack_pointer_rtx,
25671 GEN_INT (amount)));
25672 arm_add_cfa_adjust_cfa_note (tmp, amount,
25673 stack_pointer_rtx, stack_pointer_rtx);
25674 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25675 not deleted. */
25676 emit_insn (gen_force_register_use (stack_pointer_rtx));
25680 if (TARGET_HARD_FLOAT)
25682 /* Generate VFP register multi-pop. */
25683 int end_reg = LAST_VFP_REGNUM + 1;
25685 /* Scan the registers in reverse order. We need to match
25686 any groupings made in the prologue and generate matching
25687 vldm operations. The need to match groups is because,
25688 unlike pop, vldm can only do consecutive regs. */
25689 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25690 /* Look for a case where a reg does not need restoring. */
25691 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25692 && (!df_regs_ever_live_p (i + 1)
25693 || call_used_regs[i + 1]))
25695 /* Restore the regs discovered so far (from reg+2 to
25696 end_reg). */
25697 if (end_reg > i + 2)
25698 arm_emit_vfp_multi_reg_pop (i + 2,
25699 (end_reg - (i + 2)) / 2,
25700 stack_pointer_rtx);
25701 end_reg = i;
25704 /* Restore the remaining regs that we have discovered (or possibly
25705 even all of them, if the conditional in the for loop never
25706 fired). */
25707 if (end_reg > i + 2)
25708 arm_emit_vfp_multi_reg_pop (i + 2,
25709 (end_reg - (i + 2)) / 2,
25710 stack_pointer_rtx);
25713 if (TARGET_IWMMXT)
25714 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25715 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25717 rtx_insn *insn;
25718 rtx addr = gen_rtx_MEM (V2SImode,
25719 gen_rtx_POST_INC (SImode,
25720 stack_pointer_rtx));
25721 set_mem_alias_set (addr, get_frame_alias_set ());
25722 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25723 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25724 gen_rtx_REG (V2SImode, i),
25725 NULL_RTX);
25726 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25727 stack_pointer_rtx, stack_pointer_rtx);
25730 if (saved_regs_mask)
25732 rtx insn;
25733 bool return_in_pc = false;
25735 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25736 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25737 && !IS_CMSE_ENTRY (func_type)
25738 && !IS_STACKALIGN (func_type)
25739 && really_return
25740 && crtl->args.pretend_args_size == 0
25741 && saved_regs_mask & (1 << LR_REGNUM)
25742 && !crtl->calls_eh_return)
25744 saved_regs_mask &= ~(1 << LR_REGNUM);
25745 saved_regs_mask |= (1 << PC_REGNUM);
25746 return_in_pc = true;
25749 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25751 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25752 if (saved_regs_mask & (1 << i))
25754 rtx addr = gen_rtx_MEM (SImode,
25755 gen_rtx_POST_INC (SImode,
25756 stack_pointer_rtx));
25757 set_mem_alias_set (addr, get_frame_alias_set ());
25759 if (i == PC_REGNUM)
25761 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25762 XVECEXP (insn, 0, 0) = ret_rtx;
25763 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25764 addr);
25765 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25766 insn = emit_jump_insn (insn);
25768 else
25770 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25771 addr));
25772 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25773 gen_rtx_REG (SImode, i),
25774 NULL_RTX);
25775 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25776 stack_pointer_rtx,
25777 stack_pointer_rtx);
25781 else
25783 if (TARGET_LDRD
25784 && current_tune->prefer_ldrd_strd
25785 && !optimize_function_for_size_p (cfun))
25787 if (TARGET_THUMB2)
25788 thumb2_emit_ldrd_pop (saved_regs_mask);
25789 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25790 arm_emit_ldrd_pop (saved_regs_mask);
25791 else
25792 arm_emit_multi_reg_pop (saved_regs_mask);
25794 else
25795 arm_emit_multi_reg_pop (saved_regs_mask);
25798 if (return_in_pc)
25799 return;
25802 amount
25803 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25804 if (amount)
25806 int i, j;
25807 rtx dwarf = NULL_RTX;
25808 rtx_insn *tmp =
25809 emit_insn (gen_addsi3 (stack_pointer_rtx,
25810 stack_pointer_rtx,
25811 GEN_INT (amount)));
25813 RTX_FRAME_RELATED_P (tmp) = 1;
25815 if (cfun->machine->uses_anonymous_args)
25817 /* Restore pretend args. Refer arm_expand_prologue on how to save
25818 pretend_args in stack. */
25819 int num_regs = crtl->args.pretend_args_size / 4;
25820 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25821 for (j = 0, i = 0; j < num_regs; i++)
25822 if (saved_regs_mask & (1 << i))
25824 rtx reg = gen_rtx_REG (SImode, i);
25825 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25826 j++;
25828 REG_NOTES (tmp) = dwarf;
25830 arm_add_cfa_adjust_cfa_note (tmp, amount,
25831 stack_pointer_rtx, stack_pointer_rtx);
25834 /* Clear all caller-saved regs that are not used to return. */
25835 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25837 /* CMSE_ENTRY always returns. */
25838 gcc_assert (really_return);
25839 cmse_nonsecure_entry_clear_before_return ();
25842 if (!really_return)
25843 return;
25845 if (crtl->calls_eh_return)
25846 emit_insn (gen_addsi3 (stack_pointer_rtx,
25847 stack_pointer_rtx,
25848 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25850 if (IS_STACKALIGN (func_type))
25851 /* Restore the original stack pointer. Before prologue, the stack was
25852 realigned and the original stack pointer saved in r0. For details,
25853 see comment in arm_expand_prologue. */
25854 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25856 emit_jump_insn (simple_return_rtx);
25859 /* Implementation of insn prologue_thumb1_interwork. This is the first
25860 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25862 const char *
25863 thumb1_output_interwork (void)
25865 const char * name;
25866 FILE *f = asm_out_file;
25868 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25869 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25870 == SYMBOL_REF);
25871 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25873 /* Generate code sequence to switch us into Thumb mode. */
25874 /* The .code 32 directive has already been emitted by
25875 ASM_DECLARE_FUNCTION_NAME. */
25876 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25877 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25879 /* Generate a label, so that the debugger will notice the
25880 change in instruction sets. This label is also used by
25881 the assembler to bypass the ARM code when this function
25882 is called from a Thumb encoded function elsewhere in the
25883 same file. Hence the definition of STUB_NAME here must
25884 agree with the definition in gas/config/tc-arm.c. */
25886 #define STUB_NAME ".real_start_of"
25888 fprintf (f, "\t.code\t16\n");
25889 #ifdef ARM_PE
25890 if (arm_dllexport_name_p (name))
25891 name = arm_strip_name_encoding (name);
25892 #endif
25893 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25894 fprintf (f, "\t.thumb_func\n");
25895 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25897 return "";
25900 /* Handle the case of a double word load into a low register from
25901 a computed memory address. The computed address may involve a
25902 register which is overwritten by the load. */
25903 const char *
25904 thumb_load_double_from_address (rtx *operands)
25906 rtx addr;
25907 rtx base;
25908 rtx offset;
25909 rtx arg1;
25910 rtx arg2;
25912 gcc_assert (REG_P (operands[0]));
25913 gcc_assert (MEM_P (operands[1]));
25915 /* Get the memory address. */
25916 addr = XEXP (operands[1], 0);
25918 /* Work out how the memory address is computed. */
25919 switch (GET_CODE (addr))
25921 case REG:
25922 operands[2] = adjust_address (operands[1], SImode, 4);
25924 if (REGNO (operands[0]) == REGNO (addr))
25926 output_asm_insn ("ldr\t%H0, %2", operands);
25927 output_asm_insn ("ldr\t%0, %1", operands);
25929 else
25931 output_asm_insn ("ldr\t%0, %1", operands);
25932 output_asm_insn ("ldr\t%H0, %2", operands);
25934 break;
25936 case CONST:
25937 /* Compute <address> + 4 for the high order load. */
25938 operands[2] = adjust_address (operands[1], SImode, 4);
25940 output_asm_insn ("ldr\t%0, %1", operands);
25941 output_asm_insn ("ldr\t%H0, %2", operands);
25942 break;
25944 case PLUS:
25945 arg1 = XEXP (addr, 0);
25946 arg2 = XEXP (addr, 1);
25948 if (CONSTANT_P (arg1))
25949 base = arg2, offset = arg1;
25950 else
25951 base = arg1, offset = arg2;
25953 gcc_assert (REG_P (base));
25955 /* Catch the case of <address> = <reg> + <reg> */
25956 if (REG_P (offset))
25958 int reg_offset = REGNO (offset);
25959 int reg_base = REGNO (base);
25960 int reg_dest = REGNO (operands[0]);
25962 /* Add the base and offset registers together into the
25963 higher destination register. */
25964 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25965 reg_dest + 1, reg_base, reg_offset);
25967 /* Load the lower destination register from the address in
25968 the higher destination register. */
25969 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25970 reg_dest, reg_dest + 1);
25972 /* Load the higher destination register from its own address
25973 plus 4. */
25974 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25975 reg_dest + 1, reg_dest + 1);
25977 else
25979 /* Compute <address> + 4 for the high order load. */
25980 operands[2] = adjust_address (operands[1], SImode, 4);
25982 /* If the computed address is held in the low order register
25983 then load the high order register first, otherwise always
25984 load the low order register first. */
25985 if (REGNO (operands[0]) == REGNO (base))
25987 output_asm_insn ("ldr\t%H0, %2", operands);
25988 output_asm_insn ("ldr\t%0, %1", operands);
25990 else
25992 output_asm_insn ("ldr\t%0, %1", operands);
25993 output_asm_insn ("ldr\t%H0, %2", operands);
25996 break;
25998 case LABEL_REF:
25999 /* With no registers to worry about we can just load the value
26000 directly. */
26001 operands[2] = adjust_address (operands[1], SImode, 4);
26003 output_asm_insn ("ldr\t%H0, %2", operands);
26004 output_asm_insn ("ldr\t%0, %1", operands);
26005 break;
26007 default:
26008 gcc_unreachable ();
26011 return "";
26014 const char *
26015 thumb_output_move_mem_multiple (int n, rtx *operands)
26017 switch (n)
26019 case 2:
26020 if (REGNO (operands[4]) > REGNO (operands[5]))
26021 std::swap (operands[4], operands[5]);
26023 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26024 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26025 break;
26027 case 3:
26028 if (REGNO (operands[4]) > REGNO (operands[5]))
26029 std::swap (operands[4], operands[5]);
26030 if (REGNO (operands[5]) > REGNO (operands[6]))
26031 std::swap (operands[5], operands[6]);
26032 if (REGNO (operands[4]) > REGNO (operands[5]))
26033 std::swap (operands[4], operands[5]);
26035 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26036 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26037 break;
26039 default:
26040 gcc_unreachable ();
26043 return "";
26046 /* Output a call-via instruction for thumb state. */
26047 const char *
26048 thumb_call_via_reg (rtx reg)
26050 int regno = REGNO (reg);
26051 rtx *labelp;
26053 gcc_assert (regno < LR_REGNUM);
26055 /* If we are in the normal text section we can use a single instance
26056 per compilation unit. If we are doing function sections, then we need
26057 an entry per section, since we can't rely on reachability. */
26058 if (in_section == text_section)
26060 thumb_call_reg_needed = 1;
26062 if (thumb_call_via_label[regno] == NULL)
26063 thumb_call_via_label[regno] = gen_label_rtx ();
26064 labelp = thumb_call_via_label + regno;
26066 else
26068 if (cfun->machine->call_via[regno] == NULL)
26069 cfun->machine->call_via[regno] = gen_label_rtx ();
26070 labelp = cfun->machine->call_via + regno;
26073 output_asm_insn ("bl\t%a0", labelp);
26074 return "";
26077 /* Routines for generating rtl. */
26078 void
26079 thumb_expand_movmemqi (rtx *operands)
26081 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26082 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26083 HOST_WIDE_INT len = INTVAL (operands[2]);
26084 HOST_WIDE_INT offset = 0;
26086 while (len >= 12)
26088 emit_insn (gen_movmem12b (out, in, out, in));
26089 len -= 12;
26092 if (len >= 8)
26094 emit_insn (gen_movmem8b (out, in, out, in));
26095 len -= 8;
26098 if (len >= 4)
26100 rtx reg = gen_reg_rtx (SImode);
26101 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26102 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26103 len -= 4;
26104 offset += 4;
26107 if (len >= 2)
26109 rtx reg = gen_reg_rtx (HImode);
26110 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26111 plus_constant (Pmode, in,
26112 offset))));
26113 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26114 offset)),
26115 reg));
26116 len -= 2;
26117 offset += 2;
26120 if (len)
26122 rtx reg = gen_reg_rtx (QImode);
26123 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26124 plus_constant (Pmode, in,
26125 offset))));
26126 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26127 offset)),
26128 reg));
26132 void
26133 thumb_reload_out_hi (rtx *operands)
26135 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26138 /* Return the length of a function name prefix
26139 that starts with the character 'c'. */
26140 static int
26141 arm_get_strip_length (int c)
26143 switch (c)
26145 ARM_NAME_ENCODING_LENGTHS
26146 default: return 0;
26150 /* Return a pointer to a function's name with any
26151 and all prefix encodings stripped from it. */
26152 const char *
26153 arm_strip_name_encoding (const char *name)
26155 int skip;
26157 while ((skip = arm_get_strip_length (* name)))
26158 name += skip;
26160 return name;
26163 /* If there is a '*' anywhere in the name's prefix, then
26164 emit the stripped name verbatim, otherwise prepend an
26165 underscore if leading underscores are being used. */
26166 void
26167 arm_asm_output_labelref (FILE *stream, const char *name)
26169 int skip;
26170 int verbatim = 0;
26172 while ((skip = arm_get_strip_length (* name)))
26174 verbatim |= (*name == '*');
26175 name += skip;
26178 if (verbatim)
26179 fputs (name, stream);
26180 else
26181 asm_fprintf (stream, "%U%s", name);
26184 /* This function is used to emit an EABI tag and its associated value.
26185 We emit the numerical value of the tag in case the assembler does not
26186 support textual tags. (Eg gas prior to 2.20). If requested we include
26187 the tag name in a comment so that anyone reading the assembler output
26188 will know which tag is being set.
26190 This function is not static because arm-c.c needs it too. */
26192 void
26193 arm_emit_eabi_attribute (const char *name, int num, int val)
26195 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26196 if (flag_verbose_asm || flag_debug_asm)
26197 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26198 asm_fprintf (asm_out_file, "\n");
26201 /* This function is used to print CPU tuning information as comment
26202 in assembler file. Pointers are not printed for now. */
26204 void
26205 arm_print_tune_info (void)
26207 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26208 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26209 current_tune->constant_limit);
26210 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26211 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26212 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26213 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26214 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26215 "prefetch.l1_cache_size:\t%d\n",
26216 current_tune->prefetch.l1_cache_size);
26217 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26218 "prefetch.l1_cache_line_size:\t%d\n",
26219 current_tune->prefetch.l1_cache_line_size);
26220 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26221 "prefer_constant_pool:\t%d\n",
26222 (int) current_tune->prefer_constant_pool);
26223 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26224 "branch_cost:\t(s:speed, p:predictable)\n");
26225 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26226 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26227 current_tune->branch_cost (false, false));
26228 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26229 current_tune->branch_cost (false, true));
26230 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26231 current_tune->branch_cost (true, false));
26232 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26233 current_tune->branch_cost (true, true));
26234 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26235 "prefer_ldrd_strd:\t%d\n",
26236 (int) current_tune->prefer_ldrd_strd);
26237 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26238 "logical_op_non_short_circuit:\t[%d,%d]\n",
26239 (int) current_tune->logical_op_non_short_circuit_thumb,
26240 (int) current_tune->logical_op_non_short_circuit_arm);
26241 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26242 "prefer_neon_for_64bits:\t%d\n",
26243 (int) current_tune->prefer_neon_for_64bits);
26244 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26245 "disparage_flag_setting_t16_encodings:\t%d\n",
26246 (int) current_tune->disparage_flag_setting_t16_encodings);
26247 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26248 "string_ops_prefer_neon:\t%d\n",
26249 (int) current_tune->string_ops_prefer_neon);
26250 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26251 "max_insns_inline_memset:\t%d\n",
26252 current_tune->max_insns_inline_memset);
26253 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26254 current_tune->fusible_ops);
26255 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26256 (int) current_tune->sched_autopref);
26259 /* Print .arch and .arch_extension directives corresponding to the
26260 current architecture configuration. */
26261 static void
26262 arm_print_asm_arch_directives ()
26264 const arch_option *arch
26265 = arm_parse_arch_option_name (all_architectures, "-march",
26266 arm_active_target.arch_name);
26267 auto_sbitmap opt_bits (isa_num_bits);
26269 gcc_assert (arch);
26271 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26272 if (!arch->common.extensions)
26273 return;
26275 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26276 opt->name != NULL;
26277 opt++)
26279 if (!opt->remove)
26281 arm_initialize_isa (opt_bits, opt->isa_bits);
26283 /* If every feature bit of this option is set in the target
26284 ISA specification, print out the option name. However,
26285 don't print anything if all the bits are part of the
26286 FPU specification. */
26287 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26288 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26289 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26294 static void
26295 arm_file_start (void)
26297 int val;
26299 if (TARGET_BPABI)
26301 /* We don't have a specified CPU. Use the architecture to
26302 generate the tags.
26304 Note: it might be better to do this unconditionally, then the
26305 assembler would not need to know about all new CPU names as
26306 they are added. */
26307 if (!arm_active_target.core_name)
26309 /* armv7ve doesn't support any extensions. */
26310 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26312 /* Keep backward compatability for assemblers
26313 which don't support armv7ve. */
26314 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26315 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26316 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26317 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26318 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26320 else
26321 arm_print_asm_arch_directives ();
26323 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26324 asm_fprintf (asm_out_file, "\t.arch %s\n",
26325 arm_active_target.core_name + 8);
26326 else
26328 const char* truncated_name
26329 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26330 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26333 if (print_tune_info)
26334 arm_print_tune_info ();
26336 if (! TARGET_SOFT_FLOAT)
26338 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26339 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26341 if (TARGET_HARD_FLOAT_ABI)
26342 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26345 /* Some of these attributes only apply when the corresponding features
26346 are used. However we don't have any easy way of figuring this out.
26347 Conservatively record the setting that would have been used. */
26349 if (flag_rounding_math)
26350 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26352 if (!flag_unsafe_math_optimizations)
26354 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26355 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26357 if (flag_signaling_nans)
26358 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26360 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26361 flag_finite_math_only ? 1 : 3);
26363 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26364 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26365 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26366 flag_short_enums ? 1 : 2);
26368 /* Tag_ABI_optimization_goals. */
26369 if (optimize_size)
26370 val = 4;
26371 else if (optimize >= 2)
26372 val = 2;
26373 else if (optimize)
26374 val = 1;
26375 else
26376 val = 6;
26377 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26379 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26380 unaligned_access);
26382 if (arm_fp16_format)
26383 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26384 (int) arm_fp16_format);
26386 if (arm_lang_output_object_attributes_hook)
26387 arm_lang_output_object_attributes_hook();
26390 default_file_start ();
26393 static void
26394 arm_file_end (void)
26396 int regno;
26398 if (NEED_INDICATE_EXEC_STACK)
26399 /* Add .note.GNU-stack. */
26400 file_end_indicate_exec_stack ();
26402 if (! thumb_call_reg_needed)
26403 return;
26405 switch_to_section (text_section);
26406 asm_fprintf (asm_out_file, "\t.code 16\n");
26407 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26409 for (regno = 0; regno < LR_REGNUM; regno++)
26411 rtx label = thumb_call_via_label[regno];
26413 if (label != 0)
26415 targetm.asm_out.internal_label (asm_out_file, "L",
26416 CODE_LABEL_NUMBER (label));
26417 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26422 #ifndef ARM_PE
26423 /* Symbols in the text segment can be accessed without indirecting via the
26424 constant pool; it may take an extra binary operation, but this is still
26425 faster than indirecting via memory. Don't do this when not optimizing,
26426 since we won't be calculating al of the offsets necessary to do this
26427 simplification. */
26429 static void
26430 arm_encode_section_info (tree decl, rtx rtl, int first)
26432 if (optimize > 0 && TREE_CONSTANT (decl))
26433 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26435 default_encode_section_info (decl, rtl, first);
26437 #endif /* !ARM_PE */
26439 static void
26440 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26442 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26443 && !strcmp (prefix, "L"))
26445 arm_ccfsm_state = 0;
26446 arm_target_insn = NULL;
26448 default_internal_label (stream, prefix, labelno);
26451 /* Output code to add DELTA to the first argument, and then jump
26452 to FUNCTION. Used for C++ multiple inheritance. */
26454 static void
26455 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26456 HOST_WIDE_INT, tree function)
26458 static int thunk_label = 0;
26459 char label[256];
26460 char labelpc[256];
26461 int mi_delta = delta;
26462 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26463 int shift = 0;
26464 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26465 ? 1 : 0);
26466 if (mi_delta < 0)
26467 mi_delta = - mi_delta;
26469 final_start_function (emit_barrier (), file, 1);
26471 if (TARGET_THUMB1)
26473 int labelno = thunk_label++;
26474 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26475 /* Thunks are entered in arm mode when available. */
26476 if (TARGET_THUMB1_ONLY)
26478 /* push r3 so we can use it as a temporary. */
26479 /* TODO: Omit this save if r3 is not used. */
26480 fputs ("\tpush {r3}\n", file);
26481 fputs ("\tldr\tr3, ", file);
26483 else
26485 fputs ("\tldr\tr12, ", file);
26487 assemble_name (file, label);
26488 fputc ('\n', file);
26489 if (flag_pic)
26491 /* If we are generating PIC, the ldr instruction below loads
26492 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26493 the address of the add + 8, so we have:
26495 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26496 = target + 1.
26498 Note that we have "+ 1" because some versions of GNU ld
26499 don't set the low bit of the result for R_ARM_REL32
26500 relocations against thumb function symbols.
26501 On ARMv6M this is +4, not +8. */
26502 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26503 assemble_name (file, labelpc);
26504 fputs (":\n", file);
26505 if (TARGET_THUMB1_ONLY)
26507 /* This is 2 insns after the start of the thunk, so we know it
26508 is 4-byte aligned. */
26509 fputs ("\tadd\tr3, pc, r3\n", file);
26510 fputs ("\tmov r12, r3\n", file);
26512 else
26513 fputs ("\tadd\tr12, pc, r12\n", file);
26515 else if (TARGET_THUMB1_ONLY)
26516 fputs ("\tmov r12, r3\n", file);
26518 if (TARGET_THUMB1_ONLY)
26520 if (mi_delta > 255)
26522 fputs ("\tldr\tr3, ", file);
26523 assemble_name (file, label);
26524 fputs ("+4\n", file);
26525 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26526 mi_op, this_regno, this_regno);
26528 else if (mi_delta != 0)
26530 /* Thumb1 unified syntax requires s suffix in instruction name when
26531 one of the operands is immediate. */
26532 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26533 mi_op, this_regno, this_regno,
26534 mi_delta);
26537 else
26539 /* TODO: Use movw/movt for large constants when available. */
26540 while (mi_delta != 0)
26542 if ((mi_delta & (3 << shift)) == 0)
26543 shift += 2;
26544 else
26546 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26547 mi_op, this_regno, this_regno,
26548 mi_delta & (0xff << shift));
26549 mi_delta &= ~(0xff << shift);
26550 shift += 8;
26554 if (TARGET_THUMB1)
26556 if (TARGET_THUMB1_ONLY)
26557 fputs ("\tpop\t{r3}\n", file);
26559 fprintf (file, "\tbx\tr12\n");
26560 ASM_OUTPUT_ALIGN (file, 2);
26561 assemble_name (file, label);
26562 fputs (":\n", file);
26563 if (flag_pic)
26565 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26566 rtx tem = XEXP (DECL_RTL (function), 0);
26567 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26568 pipeline offset is four rather than eight. Adjust the offset
26569 accordingly. */
26570 tem = plus_constant (GET_MODE (tem), tem,
26571 TARGET_THUMB1_ONLY ? -3 : -7);
26572 tem = gen_rtx_MINUS (GET_MODE (tem),
26573 tem,
26574 gen_rtx_SYMBOL_REF (Pmode,
26575 ggc_strdup (labelpc)));
26576 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26578 else
26579 /* Output ".word .LTHUNKn". */
26580 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26582 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26583 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26585 else
26587 fputs ("\tb\t", file);
26588 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26589 if (NEED_PLT_RELOC)
26590 fputs ("(PLT)", file);
26591 fputc ('\n', file);
26594 final_end_function ();
26597 /* MI thunk handling for TARGET_32BIT. */
26599 static void
26600 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26601 HOST_WIDE_INT vcall_offset, tree function)
26603 /* On ARM, this_regno is R0 or R1 depending on
26604 whether the function returns an aggregate or not.
26606 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26607 function)
26608 ? R1_REGNUM : R0_REGNUM);
26610 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26611 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26612 reload_completed = 1;
26613 emit_note (NOTE_INSN_PROLOGUE_END);
26615 /* Add DELTA to THIS_RTX. */
26616 if (delta != 0)
26617 arm_split_constant (PLUS, Pmode, NULL_RTX,
26618 delta, this_rtx, this_rtx, false);
26620 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26621 if (vcall_offset != 0)
26623 /* Load *THIS_RTX. */
26624 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26625 /* Compute *THIS_RTX + VCALL_OFFSET. */
26626 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26627 false);
26628 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26629 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26630 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26633 /* Generate a tail call to the target function. */
26634 if (!TREE_USED (function))
26636 assemble_external (function);
26637 TREE_USED (function) = 1;
26639 rtx funexp = XEXP (DECL_RTL (function), 0);
26640 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26641 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26642 SIBLING_CALL_P (insn) = 1;
26644 insn = get_insns ();
26645 shorten_branches (insn);
26646 final_start_function (insn, file, 1);
26647 final (insn, file, 1);
26648 final_end_function ();
26650 /* Stop pretending this is a post-reload pass. */
26651 reload_completed = 0;
26654 /* Output code to add DELTA to the first argument, and then jump
26655 to FUNCTION. Used for C++ multiple inheritance. */
26657 static void
26658 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26659 HOST_WIDE_INT vcall_offset, tree function)
26661 if (TARGET_32BIT)
26662 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26663 else
26664 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26668 arm_emit_vector_const (FILE *file, rtx x)
26670 int i;
26671 const char * pattern;
26673 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26675 switch (GET_MODE (x))
26677 case E_V2SImode: pattern = "%08x"; break;
26678 case E_V4HImode: pattern = "%04x"; break;
26679 case E_V8QImode: pattern = "%02x"; break;
26680 default: gcc_unreachable ();
26683 fprintf (file, "0x");
26684 for (i = CONST_VECTOR_NUNITS (x); i--;)
26686 rtx element;
26688 element = CONST_VECTOR_ELT (x, i);
26689 fprintf (file, pattern, INTVAL (element));
26692 return 1;
26695 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26696 HFmode constant pool entries are actually loaded with ldr. */
26697 void
26698 arm_emit_fp16_const (rtx c)
26700 long bits;
26702 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26703 if (WORDS_BIG_ENDIAN)
26704 assemble_zeros (2);
26705 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26706 if (!WORDS_BIG_ENDIAN)
26707 assemble_zeros (2);
26710 const char *
26711 arm_output_load_gr (rtx *operands)
26713 rtx reg;
26714 rtx offset;
26715 rtx wcgr;
26716 rtx sum;
26718 if (!MEM_P (operands [1])
26719 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26720 || !REG_P (reg = XEXP (sum, 0))
26721 || !CONST_INT_P (offset = XEXP (sum, 1))
26722 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26723 return "wldrw%?\t%0, %1";
26725 /* Fix up an out-of-range load of a GR register. */
26726 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26727 wcgr = operands[0];
26728 operands[0] = reg;
26729 output_asm_insn ("ldr%?\t%0, %1", operands);
26731 operands[0] = wcgr;
26732 operands[1] = reg;
26733 output_asm_insn ("tmcr%?\t%0, %1", operands);
26734 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26736 return "";
26739 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26741 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26742 named arg and all anonymous args onto the stack.
26743 XXX I know the prologue shouldn't be pushing registers, but it is faster
26744 that way. */
26746 static void
26747 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26748 machine_mode mode,
26749 tree type,
26750 int *pretend_size,
26751 int second_time ATTRIBUTE_UNUSED)
26753 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26754 int nregs;
26756 cfun->machine->uses_anonymous_args = 1;
26757 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26759 nregs = pcum->aapcs_ncrn;
26760 if (nregs & 1)
26762 int res = arm_needs_doubleword_align (mode, type);
26763 if (res < 0 && warn_psabi)
26764 inform (input_location, "parameter passing for argument of "
26765 "type %qT changed in GCC 7.1", type);
26766 else if (res > 0)
26767 nregs++;
26770 else
26771 nregs = pcum->nregs;
26773 if (nregs < NUM_ARG_REGS)
26774 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26777 /* We can't rely on the caller doing the proper promotion when
26778 using APCS or ATPCS. */
26780 static bool
26781 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26783 return !TARGET_AAPCS_BASED;
26786 static machine_mode
26787 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26788 machine_mode mode,
26789 int *punsignedp ATTRIBUTE_UNUSED,
26790 const_tree fntype ATTRIBUTE_UNUSED,
26791 int for_return ATTRIBUTE_UNUSED)
26793 if (GET_MODE_CLASS (mode) == MODE_INT
26794 && GET_MODE_SIZE (mode) < 4)
26795 return SImode;
26797 return mode;
26801 static bool
26802 arm_default_short_enums (void)
26804 return ARM_DEFAULT_SHORT_ENUMS;
26808 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26810 static bool
26811 arm_align_anon_bitfield (void)
26813 return TARGET_AAPCS_BASED;
26817 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26819 static tree
26820 arm_cxx_guard_type (void)
26822 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26826 /* The EABI says test the least significant bit of a guard variable. */
26828 static bool
26829 arm_cxx_guard_mask_bit (void)
26831 return TARGET_AAPCS_BASED;
26835 /* The EABI specifies that all array cookies are 8 bytes long. */
26837 static tree
26838 arm_get_cookie_size (tree type)
26840 tree size;
26842 if (!TARGET_AAPCS_BASED)
26843 return default_cxx_get_cookie_size (type);
26845 size = build_int_cst (sizetype, 8);
26846 return size;
26850 /* The EABI says that array cookies should also contain the element size. */
26852 static bool
26853 arm_cookie_has_size (void)
26855 return TARGET_AAPCS_BASED;
26859 /* The EABI says constructors and destructors should return a pointer to
26860 the object constructed/destroyed. */
26862 static bool
26863 arm_cxx_cdtor_returns_this (void)
26865 return TARGET_AAPCS_BASED;
26868 /* The EABI says that an inline function may never be the key
26869 method. */
26871 static bool
26872 arm_cxx_key_method_may_be_inline (void)
26874 return !TARGET_AAPCS_BASED;
26877 static void
26878 arm_cxx_determine_class_data_visibility (tree decl)
26880 if (!TARGET_AAPCS_BASED
26881 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26882 return;
26884 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26885 is exported. However, on systems without dynamic vague linkage,
26886 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26887 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26888 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26889 else
26890 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26891 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26894 static bool
26895 arm_cxx_class_data_always_comdat (void)
26897 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26898 vague linkage if the class has no key function. */
26899 return !TARGET_AAPCS_BASED;
26903 /* The EABI says __aeabi_atexit should be used to register static
26904 destructors. */
26906 static bool
26907 arm_cxx_use_aeabi_atexit (void)
26909 return TARGET_AAPCS_BASED;
26913 void
26914 arm_set_return_address (rtx source, rtx scratch)
26916 arm_stack_offsets *offsets;
26917 HOST_WIDE_INT delta;
26918 rtx addr, mem;
26919 unsigned long saved_regs;
26921 offsets = arm_get_frame_offsets ();
26922 saved_regs = offsets->saved_regs_mask;
26924 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26925 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26926 else
26928 if (frame_pointer_needed)
26929 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26930 else
26932 /* LR will be the first saved register. */
26933 delta = offsets->outgoing_args - (offsets->frame + 4);
26936 if (delta >= 4096)
26938 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26939 GEN_INT (delta & ~4095)));
26940 addr = scratch;
26941 delta &= 4095;
26943 else
26944 addr = stack_pointer_rtx;
26946 addr = plus_constant (Pmode, addr, delta);
26949 /* The store needs to be marked to prevent DSE from deleting
26950 it as dead if it is based on fp. */
26951 mem = gen_frame_mem (Pmode, addr);
26952 MEM_VOLATILE_P (mem) = true;
26953 emit_move_insn (mem, source);
26958 void
26959 thumb_set_return_address (rtx source, rtx scratch)
26961 arm_stack_offsets *offsets;
26962 HOST_WIDE_INT delta;
26963 HOST_WIDE_INT limit;
26964 int reg;
26965 rtx addr, mem;
26966 unsigned long mask;
26968 emit_use (source);
26970 offsets = arm_get_frame_offsets ();
26971 mask = offsets->saved_regs_mask;
26972 if (mask & (1 << LR_REGNUM))
26974 limit = 1024;
26975 /* Find the saved regs. */
26976 if (frame_pointer_needed)
26978 delta = offsets->soft_frame - offsets->saved_args;
26979 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26980 if (TARGET_THUMB1)
26981 limit = 128;
26983 else
26985 delta = offsets->outgoing_args - offsets->saved_args;
26986 reg = SP_REGNUM;
26988 /* Allow for the stack frame. */
26989 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26990 delta -= 16;
26991 /* The link register is always the first saved register. */
26992 delta -= 4;
26994 /* Construct the address. */
26995 addr = gen_rtx_REG (SImode, reg);
26996 if (delta > limit)
26998 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26999 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27000 addr = scratch;
27002 else
27003 addr = plus_constant (Pmode, addr, delta);
27005 /* The store needs to be marked to prevent DSE from deleting
27006 it as dead if it is based on fp. */
27007 mem = gen_frame_mem (Pmode, addr);
27008 MEM_VOLATILE_P (mem) = true;
27009 emit_move_insn (mem, source);
27011 else
27012 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27015 /* Implements target hook vector_mode_supported_p. */
27016 bool
27017 arm_vector_mode_supported_p (machine_mode mode)
27019 /* Neon also supports V2SImode, etc. listed in the clause below. */
27020 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27021 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27022 || mode == V2DImode || mode == V8HFmode))
27023 return true;
27025 if ((TARGET_NEON || TARGET_IWMMXT)
27026 && ((mode == V2SImode)
27027 || (mode == V4HImode)
27028 || (mode == V8QImode)))
27029 return true;
27031 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27032 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27033 || mode == V2HAmode))
27034 return true;
27036 return false;
27039 /* Implements target hook array_mode_supported_p. */
27041 static bool
27042 arm_array_mode_supported_p (machine_mode mode,
27043 unsigned HOST_WIDE_INT nelems)
27045 if (TARGET_NEON
27046 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27047 && (nelems >= 2 && nelems <= 4))
27048 return true;
27050 return false;
27053 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27054 registers when autovectorizing for Neon, at least until multiple vector
27055 widths are supported properly by the middle-end. */
27057 static machine_mode
27058 arm_preferred_simd_mode (scalar_mode mode)
27060 if (TARGET_NEON)
27061 switch (mode)
27063 case E_SFmode:
27064 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27065 case E_SImode:
27066 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27067 case E_HImode:
27068 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27069 case E_QImode:
27070 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27071 case E_DImode:
27072 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27073 return V2DImode;
27074 break;
27076 default:;
27079 if (TARGET_REALLY_IWMMXT)
27080 switch (mode)
27082 case E_SImode:
27083 return V2SImode;
27084 case E_HImode:
27085 return V4HImode;
27086 case E_QImode:
27087 return V8QImode;
27089 default:;
27092 return word_mode;
27095 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27097 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27098 using r0-r4 for function arguments, r7 for the stack frame and don't have
27099 enough left over to do doubleword arithmetic. For Thumb-2 all the
27100 potentially problematic instructions accept high registers so this is not
27101 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27102 that require many low registers. */
27103 static bool
27104 arm_class_likely_spilled_p (reg_class_t rclass)
27106 if ((TARGET_THUMB1 && rclass == LO_REGS)
27107 || rclass == CC_REG)
27108 return true;
27110 return false;
27113 /* Implements target hook small_register_classes_for_mode_p. */
27114 bool
27115 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27117 return TARGET_THUMB1;
27120 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27121 ARM insns and therefore guarantee that the shift count is modulo 256.
27122 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27123 guarantee no particular behavior for out-of-range counts. */
27125 static unsigned HOST_WIDE_INT
27126 arm_shift_truncation_mask (machine_mode mode)
27128 return mode == SImode ? 255 : 0;
27132 /* Map internal gcc register numbers to DWARF2 register numbers. */
27134 unsigned int
27135 arm_dbx_register_number (unsigned int regno)
27137 if (regno < 16)
27138 return regno;
27140 if (IS_VFP_REGNUM (regno))
27142 /* See comment in arm_dwarf_register_span. */
27143 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27144 return 64 + regno - FIRST_VFP_REGNUM;
27145 else
27146 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27149 if (IS_IWMMXT_GR_REGNUM (regno))
27150 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27152 if (IS_IWMMXT_REGNUM (regno))
27153 return 112 + regno - FIRST_IWMMXT_REGNUM;
27155 return DWARF_FRAME_REGISTERS;
27158 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27159 GCC models tham as 64 32-bit registers, so we need to describe this to
27160 the DWARF generation code. Other registers can use the default. */
27161 static rtx
27162 arm_dwarf_register_span (rtx rtl)
27164 machine_mode mode;
27165 unsigned regno;
27166 rtx parts[16];
27167 int nregs;
27168 int i;
27170 regno = REGNO (rtl);
27171 if (!IS_VFP_REGNUM (regno))
27172 return NULL_RTX;
27174 /* XXX FIXME: The EABI defines two VFP register ranges:
27175 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27176 256-287: D0-D31
27177 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27178 corresponding D register. Until GDB supports this, we shall use the
27179 legacy encodings. We also use these encodings for D0-D15 for
27180 compatibility with older debuggers. */
27181 mode = GET_MODE (rtl);
27182 if (GET_MODE_SIZE (mode) < 8)
27183 return NULL_RTX;
27185 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27187 nregs = GET_MODE_SIZE (mode) / 4;
27188 for (i = 0; i < nregs; i += 2)
27189 if (TARGET_BIG_END)
27191 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27192 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27194 else
27196 parts[i] = gen_rtx_REG (SImode, regno + i);
27197 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27200 else
27202 nregs = GET_MODE_SIZE (mode) / 8;
27203 for (i = 0; i < nregs; i++)
27204 parts[i] = gen_rtx_REG (DImode, regno + i);
27207 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27210 #if ARM_UNWIND_INFO
27211 /* Emit unwind directives for a store-multiple instruction or stack pointer
27212 push during alignment.
27213 These should only ever be generated by the function prologue code, so
27214 expect them to have a particular form.
27215 The store-multiple instruction sometimes pushes pc as the last register,
27216 although it should not be tracked into unwind information, or for -Os
27217 sometimes pushes some dummy registers before first register that needs
27218 to be tracked in unwind information; such dummy registers are there just
27219 to avoid separate stack adjustment, and will not be restored in the
27220 epilogue. */
27222 static void
27223 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27225 int i;
27226 HOST_WIDE_INT offset;
27227 HOST_WIDE_INT nregs;
27228 int reg_size;
27229 unsigned reg;
27230 unsigned lastreg;
27231 unsigned padfirst = 0, padlast = 0;
27232 rtx e;
27234 e = XVECEXP (p, 0, 0);
27235 gcc_assert (GET_CODE (e) == SET);
27237 /* First insn will adjust the stack pointer. */
27238 gcc_assert (GET_CODE (e) == SET
27239 && REG_P (SET_DEST (e))
27240 && REGNO (SET_DEST (e)) == SP_REGNUM
27241 && GET_CODE (SET_SRC (e)) == PLUS);
27243 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27244 nregs = XVECLEN (p, 0) - 1;
27245 gcc_assert (nregs);
27247 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27248 if (reg < 16)
27250 /* For -Os dummy registers can be pushed at the beginning to
27251 avoid separate stack pointer adjustment. */
27252 e = XVECEXP (p, 0, 1);
27253 e = XEXP (SET_DEST (e), 0);
27254 if (GET_CODE (e) == PLUS)
27255 padfirst = INTVAL (XEXP (e, 1));
27256 gcc_assert (padfirst == 0 || optimize_size);
27257 /* The function prologue may also push pc, but not annotate it as it is
27258 never restored. We turn this into a stack pointer adjustment. */
27259 e = XVECEXP (p, 0, nregs);
27260 e = XEXP (SET_DEST (e), 0);
27261 if (GET_CODE (e) == PLUS)
27262 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27263 else
27264 padlast = offset - 4;
27265 gcc_assert (padlast == 0 || padlast == 4);
27266 if (padlast == 4)
27267 fprintf (asm_out_file, "\t.pad #4\n");
27268 reg_size = 4;
27269 fprintf (asm_out_file, "\t.save {");
27271 else if (IS_VFP_REGNUM (reg))
27273 reg_size = 8;
27274 fprintf (asm_out_file, "\t.vsave {");
27276 else
27277 /* Unknown register type. */
27278 gcc_unreachable ();
27280 /* If the stack increment doesn't match the size of the saved registers,
27281 something has gone horribly wrong. */
27282 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27284 offset = padfirst;
27285 lastreg = 0;
27286 /* The remaining insns will describe the stores. */
27287 for (i = 1; i <= nregs; i++)
27289 /* Expect (set (mem <addr>) (reg)).
27290 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27291 e = XVECEXP (p, 0, i);
27292 gcc_assert (GET_CODE (e) == SET
27293 && MEM_P (SET_DEST (e))
27294 && REG_P (SET_SRC (e)));
27296 reg = REGNO (SET_SRC (e));
27297 gcc_assert (reg >= lastreg);
27299 if (i != 1)
27300 fprintf (asm_out_file, ", ");
27301 /* We can't use %r for vfp because we need to use the
27302 double precision register names. */
27303 if (IS_VFP_REGNUM (reg))
27304 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27305 else
27306 asm_fprintf (asm_out_file, "%r", reg);
27308 if (flag_checking)
27310 /* Check that the addresses are consecutive. */
27311 e = XEXP (SET_DEST (e), 0);
27312 if (GET_CODE (e) == PLUS)
27313 gcc_assert (REG_P (XEXP (e, 0))
27314 && REGNO (XEXP (e, 0)) == SP_REGNUM
27315 && CONST_INT_P (XEXP (e, 1))
27316 && offset == INTVAL (XEXP (e, 1)));
27317 else
27318 gcc_assert (i == 1
27319 && REG_P (e)
27320 && REGNO (e) == SP_REGNUM);
27321 offset += reg_size;
27324 fprintf (asm_out_file, "}\n");
27325 if (padfirst)
27326 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27329 /* Emit unwind directives for a SET. */
27331 static void
27332 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27334 rtx e0;
27335 rtx e1;
27336 unsigned reg;
27338 e0 = XEXP (p, 0);
27339 e1 = XEXP (p, 1);
27340 switch (GET_CODE (e0))
27342 case MEM:
27343 /* Pushing a single register. */
27344 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27345 || !REG_P (XEXP (XEXP (e0, 0), 0))
27346 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27347 abort ();
27349 asm_fprintf (asm_out_file, "\t.save ");
27350 if (IS_VFP_REGNUM (REGNO (e1)))
27351 asm_fprintf(asm_out_file, "{d%d}\n",
27352 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27353 else
27354 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27355 break;
27357 case REG:
27358 if (REGNO (e0) == SP_REGNUM)
27360 /* A stack increment. */
27361 if (GET_CODE (e1) != PLUS
27362 || !REG_P (XEXP (e1, 0))
27363 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27364 || !CONST_INT_P (XEXP (e1, 1)))
27365 abort ();
27367 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27368 -INTVAL (XEXP (e1, 1)));
27370 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27372 HOST_WIDE_INT offset;
27374 if (GET_CODE (e1) == PLUS)
27376 if (!REG_P (XEXP (e1, 0))
27377 || !CONST_INT_P (XEXP (e1, 1)))
27378 abort ();
27379 reg = REGNO (XEXP (e1, 0));
27380 offset = INTVAL (XEXP (e1, 1));
27381 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27382 HARD_FRAME_POINTER_REGNUM, reg,
27383 offset);
27385 else if (REG_P (e1))
27387 reg = REGNO (e1);
27388 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27389 HARD_FRAME_POINTER_REGNUM, reg);
27391 else
27392 abort ();
27394 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27396 /* Move from sp to reg. */
27397 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27399 else if (GET_CODE (e1) == PLUS
27400 && REG_P (XEXP (e1, 0))
27401 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27402 && CONST_INT_P (XEXP (e1, 1)))
27404 /* Set reg to offset from sp. */
27405 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27406 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27408 else
27409 abort ();
27410 break;
27412 default:
27413 abort ();
27418 /* Emit unwind directives for the given insn. */
27420 static void
27421 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27423 rtx note, pat;
27424 bool handled_one = false;
27426 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27427 return;
27429 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27430 && (TREE_NOTHROW (current_function_decl)
27431 || crtl->all_throwers_are_sibcalls))
27432 return;
27434 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27435 return;
27437 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27439 switch (REG_NOTE_KIND (note))
27441 case REG_FRAME_RELATED_EXPR:
27442 pat = XEXP (note, 0);
27443 goto found;
27445 case REG_CFA_REGISTER:
27446 pat = XEXP (note, 0);
27447 if (pat == NULL)
27449 pat = PATTERN (insn);
27450 if (GET_CODE (pat) == PARALLEL)
27451 pat = XVECEXP (pat, 0, 0);
27454 /* Only emitted for IS_STACKALIGN re-alignment. */
27456 rtx dest, src;
27457 unsigned reg;
27459 src = SET_SRC (pat);
27460 dest = SET_DEST (pat);
27462 gcc_assert (src == stack_pointer_rtx);
27463 reg = REGNO (dest);
27464 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27465 reg + 0x90, reg);
27467 handled_one = true;
27468 break;
27470 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27471 to get correct dwarf information for shrink-wrap. We should not
27472 emit unwind information for it because these are used either for
27473 pretend arguments or notes to adjust sp and restore registers from
27474 stack. */
27475 case REG_CFA_DEF_CFA:
27476 case REG_CFA_ADJUST_CFA:
27477 case REG_CFA_RESTORE:
27478 return;
27480 case REG_CFA_EXPRESSION:
27481 case REG_CFA_OFFSET:
27482 /* ??? Only handling here what we actually emit. */
27483 gcc_unreachable ();
27485 default:
27486 break;
27489 if (handled_one)
27490 return;
27491 pat = PATTERN (insn);
27492 found:
27494 switch (GET_CODE (pat))
27496 case SET:
27497 arm_unwind_emit_set (asm_out_file, pat);
27498 break;
27500 case SEQUENCE:
27501 /* Store multiple. */
27502 arm_unwind_emit_sequence (asm_out_file, pat);
27503 break;
27505 default:
27506 abort();
27511 /* Output a reference from a function exception table to the type_info
27512 object X. The EABI specifies that the symbol should be relocated by
27513 an R_ARM_TARGET2 relocation. */
27515 static bool
27516 arm_output_ttype (rtx x)
27518 fputs ("\t.word\t", asm_out_file);
27519 output_addr_const (asm_out_file, x);
27520 /* Use special relocations for symbol references. */
27521 if (!CONST_INT_P (x))
27522 fputs ("(TARGET2)", asm_out_file);
27523 fputc ('\n', asm_out_file);
27525 return TRUE;
27528 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27530 static void
27531 arm_asm_emit_except_personality (rtx personality)
27533 fputs ("\t.personality\t", asm_out_file);
27534 output_addr_const (asm_out_file, personality);
27535 fputc ('\n', asm_out_file);
27537 #endif /* ARM_UNWIND_INFO */
27539 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27541 static void
27542 arm_asm_init_sections (void)
27544 #if ARM_UNWIND_INFO
27545 exception_section = get_unnamed_section (0, output_section_asm_op,
27546 "\t.handlerdata");
27547 #endif /* ARM_UNWIND_INFO */
27549 #ifdef OBJECT_FORMAT_ELF
27550 if (target_pure_code)
27551 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27552 #endif
27555 /* Output unwind directives for the start/end of a function. */
27557 void
27558 arm_output_fn_unwind (FILE * f, bool prologue)
27560 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27561 return;
27563 if (prologue)
27564 fputs ("\t.fnstart\n", f);
27565 else
27567 /* If this function will never be unwound, then mark it as such.
27568 The came condition is used in arm_unwind_emit to suppress
27569 the frame annotations. */
27570 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27571 && (TREE_NOTHROW (current_function_decl)
27572 || crtl->all_throwers_are_sibcalls))
27573 fputs("\t.cantunwind\n", f);
27575 fputs ("\t.fnend\n", f);
27579 static bool
27580 arm_emit_tls_decoration (FILE *fp, rtx x)
27582 enum tls_reloc reloc;
27583 rtx val;
27585 val = XVECEXP (x, 0, 0);
27586 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27588 output_addr_const (fp, val);
27590 switch (reloc)
27592 case TLS_GD32:
27593 fputs ("(tlsgd)", fp);
27594 break;
27595 case TLS_LDM32:
27596 fputs ("(tlsldm)", fp);
27597 break;
27598 case TLS_LDO32:
27599 fputs ("(tlsldo)", fp);
27600 break;
27601 case TLS_IE32:
27602 fputs ("(gottpoff)", fp);
27603 break;
27604 case TLS_LE32:
27605 fputs ("(tpoff)", fp);
27606 break;
27607 case TLS_DESCSEQ:
27608 fputs ("(tlsdesc)", fp);
27609 break;
27610 default:
27611 gcc_unreachable ();
27614 switch (reloc)
27616 case TLS_GD32:
27617 case TLS_LDM32:
27618 case TLS_IE32:
27619 case TLS_DESCSEQ:
27620 fputs (" + (. - ", fp);
27621 output_addr_const (fp, XVECEXP (x, 0, 2));
27622 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27623 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27624 output_addr_const (fp, XVECEXP (x, 0, 3));
27625 fputc (')', fp);
27626 break;
27627 default:
27628 break;
27631 return TRUE;
27634 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27636 static void
27637 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27639 gcc_assert (size == 4);
27640 fputs ("\t.word\t", file);
27641 output_addr_const (file, x);
27642 fputs ("(tlsldo)", file);
27645 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27647 static bool
27648 arm_output_addr_const_extra (FILE *fp, rtx x)
27650 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27651 return arm_emit_tls_decoration (fp, x);
27652 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27654 char label[256];
27655 int labelno = INTVAL (XVECEXP (x, 0, 0));
27657 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27658 assemble_name_raw (fp, label);
27660 return TRUE;
27662 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27664 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27665 if (GOT_PCREL)
27666 fputs ("+.", fp);
27667 fputs ("-(", fp);
27668 output_addr_const (fp, XVECEXP (x, 0, 0));
27669 fputc (')', fp);
27670 return TRUE;
27672 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27674 output_addr_const (fp, XVECEXP (x, 0, 0));
27675 if (GOT_PCREL)
27676 fputs ("+.", fp);
27677 fputs ("-(", fp);
27678 output_addr_const (fp, XVECEXP (x, 0, 1));
27679 fputc (')', fp);
27680 return TRUE;
27682 else if (GET_CODE (x) == CONST_VECTOR)
27683 return arm_emit_vector_const (fp, x);
27685 return FALSE;
27688 /* Output assembly for a shift instruction.
27689 SET_FLAGS determines how the instruction modifies the condition codes.
27690 0 - Do not set condition codes.
27691 1 - Set condition codes.
27692 2 - Use smallest instruction. */
27693 const char *
27694 arm_output_shift(rtx * operands, int set_flags)
27696 char pattern[100];
27697 static const char flag_chars[3] = {'?', '.', '!'};
27698 const char *shift;
27699 HOST_WIDE_INT val;
27700 char c;
27702 c = flag_chars[set_flags];
27703 shift = shift_op(operands[3], &val);
27704 if (shift)
27706 if (val != -1)
27707 operands[2] = GEN_INT(val);
27708 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27710 else
27711 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27713 output_asm_insn (pattern, operands);
27714 return "";
27717 /* Output assembly for a WMMX immediate shift instruction. */
27718 const char *
27719 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27721 int shift = INTVAL (operands[2]);
27722 char templ[50];
27723 machine_mode opmode = GET_MODE (operands[0]);
27725 gcc_assert (shift >= 0);
27727 /* If the shift value in the register versions is > 63 (for D qualifier),
27728 31 (for W qualifier) or 15 (for H qualifier). */
27729 if (((opmode == V4HImode) && (shift > 15))
27730 || ((opmode == V2SImode) && (shift > 31))
27731 || ((opmode == DImode) && (shift > 63)))
27733 if (wror_or_wsra)
27735 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27736 output_asm_insn (templ, operands);
27737 if (opmode == DImode)
27739 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27740 output_asm_insn (templ, operands);
27743 else
27745 /* The destination register will contain all zeros. */
27746 sprintf (templ, "wzero\t%%0");
27747 output_asm_insn (templ, operands);
27749 return "";
27752 if ((opmode == DImode) && (shift > 32))
27754 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27755 output_asm_insn (templ, operands);
27756 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27757 output_asm_insn (templ, operands);
27759 else
27761 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27762 output_asm_insn (templ, operands);
27764 return "";
27767 /* Output assembly for a WMMX tinsr instruction. */
27768 const char *
27769 arm_output_iwmmxt_tinsr (rtx *operands)
27771 int mask = INTVAL (operands[3]);
27772 int i;
27773 char templ[50];
27774 int units = mode_nunits[GET_MODE (operands[0])];
27775 gcc_assert ((mask & (mask - 1)) == 0);
27776 for (i = 0; i < units; ++i)
27778 if ((mask & 0x01) == 1)
27780 break;
27782 mask >>= 1;
27784 gcc_assert (i < units);
27786 switch (GET_MODE (operands[0]))
27788 case E_V8QImode:
27789 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27790 break;
27791 case E_V4HImode:
27792 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27793 break;
27794 case E_V2SImode:
27795 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27796 break;
27797 default:
27798 gcc_unreachable ();
27799 break;
27801 output_asm_insn (templ, operands);
27803 return "";
27806 /* Output a Thumb-1 casesi dispatch sequence. */
27807 const char *
27808 thumb1_output_casesi (rtx *operands)
27810 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27812 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27814 switch (GET_MODE(diff_vec))
27816 case E_QImode:
27817 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27818 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27819 case E_HImode:
27820 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27821 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27822 case E_SImode:
27823 return "bl\t%___gnu_thumb1_case_si";
27824 default:
27825 gcc_unreachable ();
27829 /* Output a Thumb-2 casesi instruction. */
27830 const char *
27831 thumb2_output_casesi (rtx *operands)
27833 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27835 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27837 output_asm_insn ("cmp\t%0, %1", operands);
27838 output_asm_insn ("bhi\t%l3", operands);
27839 switch (GET_MODE(diff_vec))
27841 case E_QImode:
27842 return "tbb\t[%|pc, %0]";
27843 case E_HImode:
27844 return "tbh\t[%|pc, %0, lsl #1]";
27845 case E_SImode:
27846 if (flag_pic)
27848 output_asm_insn ("adr\t%4, %l2", operands);
27849 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27850 output_asm_insn ("add\t%4, %4, %5", operands);
27851 return "bx\t%4";
27853 else
27855 output_asm_insn ("adr\t%4, %l2", operands);
27856 return "ldr\t%|pc, [%4, %0, lsl #2]";
27858 default:
27859 gcc_unreachable ();
27863 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27864 per-core tuning structs. */
27865 static int
27866 arm_issue_rate (void)
27868 return current_tune->issue_rate;
27871 /* Return how many instructions should scheduler lookahead to choose the
27872 best one. */
27873 static int
27874 arm_first_cycle_multipass_dfa_lookahead (void)
27876 int issue_rate = arm_issue_rate ();
27878 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27881 /* Enable modeling of L2 auto-prefetcher. */
27882 static int
27883 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27885 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27888 const char *
27889 arm_mangle_type (const_tree type)
27891 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27892 has to be managled as if it is in the "std" namespace. */
27893 if (TARGET_AAPCS_BASED
27894 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27895 return "St9__va_list";
27897 /* Half-precision float. */
27898 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27899 return "Dh";
27901 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27902 builtin type. */
27903 if (TYPE_NAME (type) != NULL)
27904 return arm_mangle_builtin_type (type);
27906 /* Use the default mangling. */
27907 return NULL;
27910 /* Order of allocation of core registers for Thumb: this allocation is
27911 written over the corresponding initial entries of the array
27912 initialized with REG_ALLOC_ORDER. We allocate all low registers
27913 first. Saving and restoring a low register is usually cheaper than
27914 using a call-clobbered high register. */
27916 static const int thumb_core_reg_alloc_order[] =
27918 3, 2, 1, 0, 4, 5, 6, 7,
27919 12, 14, 8, 9, 10, 11
27922 /* Adjust register allocation order when compiling for Thumb. */
27924 void
27925 arm_order_regs_for_local_alloc (void)
27927 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27928 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27929 if (TARGET_THUMB)
27930 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27931 sizeof (thumb_core_reg_alloc_order));
27934 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27936 bool
27937 arm_frame_pointer_required (void)
27939 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27940 return true;
27942 /* If the function receives nonlocal gotos, it needs to save the frame
27943 pointer in the nonlocal_goto_save_area object. */
27944 if (cfun->has_nonlocal_label)
27945 return true;
27947 /* The frame pointer is required for non-leaf APCS frames. */
27948 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27949 return true;
27951 /* If we are probing the stack in the prologue, we will have a faulting
27952 instruction prior to the stack adjustment and this requires a frame
27953 pointer if we want to catch the exception using the EABI unwinder. */
27954 if (!IS_INTERRUPT (arm_current_func_type ())
27955 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27956 || flag_stack_clash_protection)
27957 && arm_except_unwind_info (&global_options) == UI_TARGET
27958 && cfun->can_throw_non_call_exceptions)
27960 HOST_WIDE_INT size = get_frame_size ();
27962 /* That's irrelevant if there is no stack adjustment. */
27963 if (size <= 0)
27964 return false;
27966 /* That's relevant only if there is a stack probe. */
27967 if (crtl->is_leaf && !cfun->calls_alloca)
27969 /* We don't have the final size of the frame so adjust. */
27970 size += 32 * UNITS_PER_WORD;
27971 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27972 return true;
27974 else
27975 return true;
27978 return false;
27981 /* Only thumb1 can't support conditional execution, so return true if
27982 the target is not thumb1. */
27983 static bool
27984 arm_have_conditional_execution (void)
27986 return !TARGET_THUMB1;
27989 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27990 static HOST_WIDE_INT
27991 arm_vector_alignment (const_tree type)
27993 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27995 if (TARGET_AAPCS_BASED)
27996 align = MIN (align, 64);
27998 return align;
28001 static unsigned int
28002 arm_autovectorize_vector_sizes (void)
28004 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28007 static bool
28008 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28010 /* Vectors which aren't in packed structures will not be less aligned than
28011 the natural alignment of their element type, so this is safe. */
28012 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28013 return !is_packed;
28015 return default_builtin_vector_alignment_reachable (type, is_packed);
28018 static bool
28019 arm_builtin_support_vector_misalignment (machine_mode mode,
28020 const_tree type, int misalignment,
28021 bool is_packed)
28023 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28025 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28027 if (is_packed)
28028 return align == 1;
28030 /* If the misalignment is unknown, we should be able to handle the access
28031 so long as it is not to a member of a packed data structure. */
28032 if (misalignment == -1)
28033 return true;
28035 /* Return true if the misalignment is a multiple of the natural alignment
28036 of the vector's element type. This is probably always going to be
28037 true in practice, since we've already established that this isn't a
28038 packed access. */
28039 return ((misalignment % align) == 0);
28042 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28043 is_packed);
28046 static void
28047 arm_conditional_register_usage (void)
28049 int regno;
28051 if (TARGET_THUMB1 && optimize_size)
28053 /* When optimizing for size on Thumb-1, it's better not
28054 to use the HI regs, because of the overhead of
28055 stacking them. */
28056 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28057 fixed_regs[regno] = call_used_regs[regno] = 1;
28060 /* The link register can be clobbered by any branch insn,
28061 but we have no way to track that at present, so mark
28062 it as unavailable. */
28063 if (TARGET_THUMB1)
28064 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28066 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28068 /* VFPv3 registers are disabled when earlier VFP
28069 versions are selected due to the definition of
28070 LAST_VFP_REGNUM. */
28071 for (regno = FIRST_VFP_REGNUM;
28072 regno <= LAST_VFP_REGNUM; ++ regno)
28074 fixed_regs[regno] = 0;
28075 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28076 || regno >= FIRST_VFP_REGNUM + 32;
28080 if (TARGET_REALLY_IWMMXT)
28082 regno = FIRST_IWMMXT_GR_REGNUM;
28083 /* The 2002/10/09 revision of the XScale ABI has wCG0
28084 and wCG1 as call-preserved registers. The 2002/11/21
28085 revision changed this so that all wCG registers are
28086 scratch registers. */
28087 for (regno = FIRST_IWMMXT_GR_REGNUM;
28088 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28089 fixed_regs[regno] = 0;
28090 /* The XScale ABI has wR0 - wR9 as scratch registers,
28091 the rest as call-preserved registers. */
28092 for (regno = FIRST_IWMMXT_REGNUM;
28093 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28095 fixed_regs[regno] = 0;
28096 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28100 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28102 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28103 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28105 else if (TARGET_APCS_STACK)
28107 fixed_regs[10] = 1;
28108 call_used_regs[10] = 1;
28110 /* -mcaller-super-interworking reserves r11 for calls to
28111 _interwork_r11_call_via_rN(). Making the register global
28112 is an easy way of ensuring that it remains valid for all
28113 calls. */
28114 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28115 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28117 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28118 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28119 if (TARGET_CALLER_INTERWORKING)
28120 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28122 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28125 static reg_class_t
28126 arm_preferred_rename_class (reg_class_t rclass)
28128 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28129 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28130 and code size can be reduced. */
28131 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28132 return LO_REGS;
28133 else
28134 return NO_REGS;
28137 /* Compute the attribute "length" of insn "*push_multi".
28138 So this function MUST be kept in sync with that insn pattern. */
28140 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28142 int i, regno, hi_reg;
28143 int num_saves = XVECLEN (parallel_op, 0);
28145 /* ARM mode. */
28146 if (TARGET_ARM)
28147 return 4;
28148 /* Thumb1 mode. */
28149 if (TARGET_THUMB1)
28150 return 2;
28152 /* Thumb2 mode. */
28153 regno = REGNO (first_op);
28154 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28155 list is 8-bit. Normally this means all registers in the list must be
28156 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28157 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28158 with 16-bit encoding. */
28159 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28160 for (i = 1; i < num_saves && !hi_reg; i++)
28162 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28163 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28166 if (!hi_reg)
28167 return 2;
28168 return 4;
28171 /* Compute the attribute "length" of insn. Currently, this function is used
28172 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28173 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28174 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28175 true if OPERANDS contains insn which explicit updates base register. */
28178 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28180 /* ARM mode. */
28181 if (TARGET_ARM)
28182 return 4;
28183 /* Thumb1 mode. */
28184 if (TARGET_THUMB1)
28185 return 2;
28187 rtx parallel_op = operands[0];
28188 /* Initialize to elements number of PARALLEL. */
28189 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28190 /* Initialize the value to base register. */
28191 unsigned regno = REGNO (operands[1]);
28192 /* Skip return and write back pattern.
28193 We only need register pop pattern for later analysis. */
28194 unsigned first_indx = 0;
28195 first_indx += return_pc ? 1 : 0;
28196 first_indx += write_back_p ? 1 : 0;
28198 /* A pop operation can be done through LDM or POP. If the base register is SP
28199 and if it's with write back, then a LDM will be alias of POP. */
28200 bool pop_p = (regno == SP_REGNUM && write_back_p);
28201 bool ldm_p = !pop_p;
28203 /* Check base register for LDM. */
28204 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28205 return 4;
28207 /* Check each register in the list. */
28208 for (; indx >= first_indx; indx--)
28210 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28211 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28212 comment in arm_attr_length_push_multi. */
28213 if (REGNO_REG_CLASS (regno) == HI_REGS
28214 && (regno != PC_REGNUM || ldm_p))
28215 return 4;
28218 return 2;
28221 /* Compute the number of instructions emitted by output_move_double. */
28223 arm_count_output_move_double_insns (rtx *operands)
28225 int count;
28226 rtx ops[2];
28227 /* output_move_double may modify the operands array, so call it
28228 here on a copy of the array. */
28229 ops[0] = operands[0];
28230 ops[1] = operands[1];
28231 output_move_double (ops, false, &count);
28232 return count;
28236 vfp3_const_double_for_fract_bits (rtx operand)
28238 REAL_VALUE_TYPE r0;
28240 if (!CONST_DOUBLE_P (operand))
28241 return 0;
28243 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28244 if (exact_real_inverse (DFmode, &r0)
28245 && !REAL_VALUE_NEGATIVE (r0))
28247 if (exact_real_truncate (DFmode, &r0))
28249 HOST_WIDE_INT value = real_to_integer (&r0);
28250 value = value & 0xffffffff;
28251 if ((value != 0) && ( (value & (value - 1)) == 0))
28253 int ret = exact_log2 (value);
28254 gcc_assert (IN_RANGE (ret, 0, 31));
28255 return ret;
28259 return 0;
28262 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28263 log2 is in [1, 32], return that log2. Otherwise return -1.
28264 This is used in the patterns for vcvt.s32.f32 floating-point to
28265 fixed-point conversions. */
28268 vfp3_const_double_for_bits (rtx x)
28270 const REAL_VALUE_TYPE *r;
28272 if (!CONST_DOUBLE_P (x))
28273 return -1;
28275 r = CONST_DOUBLE_REAL_VALUE (x);
28277 if (REAL_VALUE_NEGATIVE (*r)
28278 || REAL_VALUE_ISNAN (*r)
28279 || REAL_VALUE_ISINF (*r)
28280 || !real_isinteger (r, SFmode))
28281 return -1;
28283 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28285 /* The exact_log2 above will have returned -1 if this is
28286 not an exact log2. */
28287 if (!IN_RANGE (hwint, 1, 32))
28288 return -1;
28290 return hwint;
28294 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28296 static void
28297 arm_pre_atomic_barrier (enum memmodel model)
28299 if (need_atomic_barrier_p (model, true))
28300 emit_insn (gen_memory_barrier ());
28303 static void
28304 arm_post_atomic_barrier (enum memmodel model)
28306 if (need_atomic_barrier_p (model, false))
28307 emit_insn (gen_memory_barrier ());
28310 /* Emit the load-exclusive and store-exclusive instructions.
28311 Use acquire and release versions if necessary. */
28313 static void
28314 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28316 rtx (*gen) (rtx, rtx);
28318 if (acq)
28320 switch (mode)
28322 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28323 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28324 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28325 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28326 default:
28327 gcc_unreachable ();
28330 else
28332 switch (mode)
28334 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28335 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28336 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28337 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28338 default:
28339 gcc_unreachable ();
28343 emit_insn (gen (rval, mem));
28346 static void
28347 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28348 rtx mem, bool rel)
28350 rtx (*gen) (rtx, rtx, rtx);
28352 if (rel)
28354 switch (mode)
28356 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28357 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28358 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28359 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28360 default:
28361 gcc_unreachable ();
28364 else
28366 switch (mode)
28368 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28369 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28370 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28371 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28372 default:
28373 gcc_unreachable ();
28377 emit_insn (gen (bval, rval, mem));
28380 /* Mark the previous jump instruction as unlikely. */
28382 static void
28383 emit_unlikely_jump (rtx insn)
28385 rtx_insn *jump = emit_jump_insn (insn);
28386 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28389 /* Expand a compare and swap pattern. */
28391 void
28392 arm_expand_compare_and_swap (rtx operands[])
28394 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28395 machine_mode mode;
28396 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28398 bval = operands[0];
28399 rval = operands[1];
28400 mem = operands[2];
28401 oldval = operands[3];
28402 newval = operands[4];
28403 is_weak = operands[5];
28404 mod_s = operands[6];
28405 mod_f = operands[7];
28406 mode = GET_MODE (mem);
28408 /* Normally the succ memory model must be stronger than fail, but in the
28409 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28410 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28412 if (TARGET_HAVE_LDACQ
28413 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28414 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28415 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28417 switch (mode)
28419 case E_QImode:
28420 case E_HImode:
28421 /* For narrow modes, we're going to perform the comparison in SImode,
28422 so do the zero-extension now. */
28423 rval = gen_reg_rtx (SImode);
28424 oldval = convert_modes (SImode, mode, oldval, true);
28425 /* FALLTHRU */
28427 case E_SImode:
28428 /* Force the value into a register if needed. We waited until after
28429 the zero-extension above to do this properly. */
28430 if (!arm_add_operand (oldval, SImode))
28431 oldval = force_reg (SImode, oldval);
28432 break;
28434 case E_DImode:
28435 if (!cmpdi_operand (oldval, mode))
28436 oldval = force_reg (mode, oldval);
28437 break;
28439 default:
28440 gcc_unreachable ();
28443 if (TARGET_THUMB1)
28445 switch (mode)
28447 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28448 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28449 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28450 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28451 default:
28452 gcc_unreachable ();
28455 else
28457 switch (mode)
28459 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28460 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28461 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28462 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28463 default:
28464 gcc_unreachable ();
28468 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28469 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28471 if (mode == QImode || mode == HImode)
28472 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28474 /* In all cases, we arrange for success to be signaled by Z set.
28475 This arrangement allows for the boolean result to be used directly
28476 in a subsequent branch, post optimization. For Thumb-1 targets, the
28477 boolean negation of the result is also stored in bval because Thumb-1
28478 backend lacks dependency tracking for CC flag due to flag-setting not
28479 being represented at RTL level. */
28480 if (TARGET_THUMB1)
28481 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28482 else
28484 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28485 emit_insn (gen_rtx_SET (bval, x));
28489 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28490 another memory store between the load-exclusive and store-exclusive can
28491 reset the monitor from Exclusive to Open state. This means we must wait
28492 until after reload to split the pattern, lest we get a register spill in
28493 the middle of the atomic sequence. Success of the compare and swap is
28494 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28495 for Thumb-1 targets (ie. negation of the boolean value returned by
28496 atomic_compare_and_swapmode standard pattern in operand 0). */
28498 void
28499 arm_split_compare_and_swap (rtx operands[])
28501 rtx rval, mem, oldval, newval, neg_bval;
28502 machine_mode mode;
28503 enum memmodel mod_s, mod_f;
28504 bool is_weak;
28505 rtx_code_label *label1, *label2;
28506 rtx x, cond;
28508 rval = operands[1];
28509 mem = operands[2];
28510 oldval = operands[3];
28511 newval = operands[4];
28512 is_weak = (operands[5] != const0_rtx);
28513 mod_s = memmodel_from_int (INTVAL (operands[6]));
28514 mod_f = memmodel_from_int (INTVAL (operands[7]));
28515 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28516 mode = GET_MODE (mem);
28518 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28520 bool use_acquire = TARGET_HAVE_LDACQ
28521 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28522 || is_mm_release (mod_s));
28524 bool use_release = TARGET_HAVE_LDACQ
28525 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28526 || is_mm_acquire (mod_s));
28528 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28529 a full barrier is emitted after the store-release. */
28530 if (is_armv8_sync)
28531 use_acquire = false;
28533 /* Checks whether a barrier is needed and emits one accordingly. */
28534 if (!(use_acquire || use_release))
28535 arm_pre_atomic_barrier (mod_s);
28537 label1 = NULL;
28538 if (!is_weak)
28540 label1 = gen_label_rtx ();
28541 emit_label (label1);
28543 label2 = gen_label_rtx ();
28545 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28547 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28548 as required to communicate with arm_expand_compare_and_swap. */
28549 if (TARGET_32BIT)
28551 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28552 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28553 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28554 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28555 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28557 else
28559 emit_move_insn (neg_bval, const1_rtx);
28560 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28561 if (thumb1_cmpneg_operand (oldval, SImode))
28562 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28563 label2, cond));
28564 else
28565 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28568 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28570 /* Weak or strong, we want EQ to be true for success, so that we
28571 match the flags that we got from the compare above. */
28572 if (TARGET_32BIT)
28574 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28575 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28576 emit_insn (gen_rtx_SET (cond, x));
28579 if (!is_weak)
28581 /* Z is set to boolean value of !neg_bval, as required to communicate
28582 with arm_expand_compare_and_swap. */
28583 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28584 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28587 if (!is_mm_relaxed (mod_f))
28588 emit_label (label2);
28590 /* Checks whether a barrier is needed and emits one accordingly. */
28591 if (is_armv8_sync
28592 || !(use_acquire || use_release))
28593 arm_post_atomic_barrier (mod_s);
28595 if (is_mm_relaxed (mod_f))
28596 emit_label (label2);
28599 /* Split an atomic operation pattern. Operation is given by CODE and is one
28600 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28601 operation). Operation is performed on the content at MEM and on VALUE
28602 following the memory model MODEL_RTX. The content at MEM before and after
28603 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28604 success of the operation is returned in COND. Using a scratch register or
28605 an operand register for these determines what result is returned for that
28606 pattern. */
28608 void
28609 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28610 rtx value, rtx model_rtx, rtx cond)
28612 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28613 machine_mode mode = GET_MODE (mem);
28614 machine_mode wmode = (mode == DImode ? DImode : SImode);
28615 rtx_code_label *label;
28616 bool all_low_regs, bind_old_new;
28617 rtx x;
28619 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28621 bool use_acquire = TARGET_HAVE_LDACQ
28622 && !(is_mm_relaxed (model) || is_mm_consume (model)
28623 || is_mm_release (model));
28625 bool use_release = TARGET_HAVE_LDACQ
28626 && !(is_mm_relaxed (model) || is_mm_consume (model)
28627 || is_mm_acquire (model));
28629 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28630 a full barrier is emitted after the store-release. */
28631 if (is_armv8_sync)
28632 use_acquire = false;
28634 /* Checks whether a barrier is needed and emits one accordingly. */
28635 if (!(use_acquire || use_release))
28636 arm_pre_atomic_barrier (model);
28638 label = gen_label_rtx ();
28639 emit_label (label);
28641 if (new_out)
28642 new_out = gen_lowpart (wmode, new_out);
28643 if (old_out)
28644 old_out = gen_lowpart (wmode, old_out);
28645 else
28646 old_out = new_out;
28647 value = simplify_gen_subreg (wmode, value, mode, 0);
28649 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28651 /* Does the operation require destination and first operand to use the same
28652 register? This is decided by register constraints of relevant insn
28653 patterns in thumb1.md. */
28654 gcc_assert (!new_out || REG_P (new_out));
28655 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28656 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28657 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28658 bind_old_new =
28659 (TARGET_THUMB1
28660 && code != SET
28661 && code != MINUS
28662 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28664 /* We want to return the old value while putting the result of the operation
28665 in the same register as the old value so copy the old value over to the
28666 destination register and use that register for the operation. */
28667 if (old_out && bind_old_new)
28669 emit_move_insn (new_out, old_out);
28670 old_out = new_out;
28673 switch (code)
28675 case SET:
28676 new_out = value;
28677 break;
28679 case NOT:
28680 x = gen_rtx_AND (wmode, old_out, value);
28681 emit_insn (gen_rtx_SET (new_out, x));
28682 x = gen_rtx_NOT (wmode, new_out);
28683 emit_insn (gen_rtx_SET (new_out, x));
28684 break;
28686 case MINUS:
28687 if (CONST_INT_P (value))
28689 value = GEN_INT (-INTVAL (value));
28690 code = PLUS;
28692 /* FALLTHRU */
28694 case PLUS:
28695 if (mode == DImode)
28697 /* DImode plus/minus need to clobber flags. */
28698 /* The adddi3 and subdi3 patterns are incorrectly written so that
28699 they require matching operands, even when we could easily support
28700 three operands. Thankfully, this can be fixed up post-splitting,
28701 as the individual add+adc patterns do accept three operands and
28702 post-reload cprop can make these moves go away. */
28703 emit_move_insn (new_out, old_out);
28704 if (code == PLUS)
28705 x = gen_adddi3 (new_out, new_out, value);
28706 else
28707 x = gen_subdi3 (new_out, new_out, value);
28708 emit_insn (x);
28709 break;
28711 /* FALLTHRU */
28713 default:
28714 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28715 emit_insn (gen_rtx_SET (new_out, x));
28716 break;
28719 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28720 use_release);
28722 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28723 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28725 /* Checks whether a barrier is needed and emits one accordingly. */
28726 if (is_armv8_sync
28727 || !(use_acquire || use_release))
28728 arm_post_atomic_barrier (model);
28731 #define MAX_VECT_LEN 16
28733 struct expand_vec_perm_d
28735 rtx target, op0, op1;
28736 auto_vec_perm_indices perm;
28737 machine_mode vmode;
28738 bool one_vector_p;
28739 bool testing_p;
28742 /* Generate a variable permutation. */
28744 static void
28745 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28747 machine_mode vmode = GET_MODE (target);
28748 bool one_vector_p = rtx_equal_p (op0, op1);
28750 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28751 gcc_checking_assert (GET_MODE (op0) == vmode);
28752 gcc_checking_assert (GET_MODE (op1) == vmode);
28753 gcc_checking_assert (GET_MODE (sel) == vmode);
28754 gcc_checking_assert (TARGET_NEON);
28756 if (one_vector_p)
28758 if (vmode == V8QImode)
28759 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28760 else
28761 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28763 else
28765 rtx pair;
28767 if (vmode == V8QImode)
28769 pair = gen_reg_rtx (V16QImode);
28770 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28771 pair = gen_lowpart (TImode, pair);
28772 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28774 else
28776 pair = gen_reg_rtx (OImode);
28777 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28778 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28783 void
28784 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28786 machine_mode vmode = GET_MODE (target);
28787 unsigned int nelt = GET_MODE_NUNITS (vmode);
28788 bool one_vector_p = rtx_equal_p (op0, op1);
28789 rtx mask;
28791 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28792 numbering of elements for big-endian, we must reverse the order. */
28793 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28795 /* The VTBL instruction does not use a modulo index, so we must take care
28796 of that ourselves. */
28797 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28798 mask = gen_const_vec_duplicate (vmode, mask);
28799 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28801 arm_expand_vec_perm_1 (target, op0, op1, sel);
28804 /* Map lane ordering between architectural lane order, and GCC lane order,
28805 taking into account ABI. See comment above output_move_neon for details. */
28807 static int
28808 neon_endian_lane_map (machine_mode mode, int lane)
28810 if (BYTES_BIG_ENDIAN)
28812 int nelems = GET_MODE_NUNITS (mode);
28813 /* Reverse lane order. */
28814 lane = (nelems - 1 - lane);
28815 /* Reverse D register order, to match ABI. */
28816 if (GET_MODE_SIZE (mode) == 16)
28817 lane = lane ^ (nelems / 2);
28819 return lane;
28822 /* Some permutations index into pairs of vectors, this is a helper function
28823 to map indexes into those pairs of vectors. */
28825 static int
28826 neon_pair_endian_lane_map (machine_mode mode, int lane)
28828 int nelem = GET_MODE_NUNITS (mode);
28829 if (BYTES_BIG_ENDIAN)
28830 lane =
28831 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28832 return lane;
28835 /* Generate or test for an insn that supports a constant permutation. */
28837 /* Recognize patterns for the VUZP insns. */
28839 static bool
28840 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28842 unsigned int i, odd, mask, nelt = d->perm.length ();
28843 rtx out0, out1, in0, in1;
28844 rtx (*gen)(rtx, rtx, rtx, rtx);
28845 int first_elem;
28846 int swap_nelt;
28848 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28849 return false;
28851 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28852 big endian pattern on 64 bit vectors, so we correct for that. */
28853 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28854 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28856 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28858 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28859 odd = 0;
28860 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28861 odd = 1;
28862 else
28863 return false;
28864 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28866 for (i = 0; i < nelt; i++)
28868 unsigned elt =
28869 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28870 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28871 return false;
28874 /* Success! */
28875 if (d->testing_p)
28876 return true;
28878 switch (d->vmode)
28880 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28881 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28882 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28883 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28884 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28885 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28886 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28887 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28888 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28889 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28890 default:
28891 gcc_unreachable ();
28894 in0 = d->op0;
28895 in1 = d->op1;
28896 if (swap_nelt != 0)
28897 std::swap (in0, in1);
28899 out0 = d->target;
28900 out1 = gen_reg_rtx (d->vmode);
28901 if (odd)
28902 std::swap (out0, out1);
28904 emit_insn (gen (out0, in0, in1, out1));
28905 return true;
28908 /* Recognize patterns for the VZIP insns. */
28910 static bool
28911 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28913 unsigned int i, high, mask, nelt = d->perm.length ();
28914 rtx out0, out1, in0, in1;
28915 rtx (*gen)(rtx, rtx, rtx, rtx);
28916 int first_elem;
28917 bool is_swapped;
28919 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28920 return false;
28922 is_swapped = BYTES_BIG_ENDIAN;
28924 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28926 high = nelt / 2;
28927 if (first_elem == neon_endian_lane_map (d->vmode, high))
28929 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28930 high = 0;
28931 else
28932 return false;
28933 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28935 for (i = 0; i < nelt / 2; i++)
28937 unsigned elt =
28938 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28939 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28940 != elt)
28941 return false;
28942 elt =
28943 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28944 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28945 != elt)
28946 return false;
28949 /* Success! */
28950 if (d->testing_p)
28951 return true;
28953 switch (d->vmode)
28955 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28956 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28957 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28958 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28959 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28960 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28961 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28962 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28963 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28964 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28965 default:
28966 gcc_unreachable ();
28969 in0 = d->op0;
28970 in1 = d->op1;
28971 if (is_swapped)
28972 std::swap (in0, in1);
28974 out0 = d->target;
28975 out1 = gen_reg_rtx (d->vmode);
28976 if (high)
28977 std::swap (out0, out1);
28979 emit_insn (gen (out0, in0, in1, out1));
28980 return true;
28983 /* Recognize patterns for the VREV insns. */
28985 static bool
28986 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28988 unsigned int i, j, diff, nelt = d->perm.length ();
28989 rtx (*gen)(rtx, rtx);
28991 if (!d->one_vector_p)
28992 return false;
28994 diff = d->perm[0];
28995 switch (diff)
28997 case 7:
28998 switch (d->vmode)
29000 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29001 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29002 default:
29003 return false;
29005 break;
29006 case 3:
29007 switch (d->vmode)
29009 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29010 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29011 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29012 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29013 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29014 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29015 default:
29016 return false;
29018 break;
29019 case 1:
29020 switch (d->vmode)
29022 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29023 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29024 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29025 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29026 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29027 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29028 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29029 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29030 default:
29031 return false;
29033 break;
29034 default:
29035 return false;
29038 for (i = 0; i < nelt ; i += diff + 1)
29039 for (j = 0; j <= diff; j += 1)
29041 /* This is guaranteed to be true as the value of diff
29042 is 7, 3, 1 and we should have enough elements in the
29043 queue to generate this. Getting a vector mask with a
29044 value of diff other than these values implies that
29045 something is wrong by the time we get here. */
29046 gcc_assert (i + j < nelt);
29047 if (d->perm[i + j] != i + diff - j)
29048 return false;
29051 /* Success! */
29052 if (d->testing_p)
29053 return true;
29055 emit_insn (gen (d->target, d->op0));
29056 return true;
29059 /* Recognize patterns for the VTRN insns. */
29061 static bool
29062 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29064 unsigned int i, odd, mask, nelt = d->perm.length ();
29065 rtx out0, out1, in0, in1;
29066 rtx (*gen)(rtx, rtx, rtx, rtx);
29068 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29069 return false;
29071 /* Note that these are little-endian tests. Adjust for big-endian later. */
29072 if (d->perm[0] == 0)
29073 odd = 0;
29074 else if (d->perm[0] == 1)
29075 odd = 1;
29076 else
29077 return false;
29078 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29080 for (i = 0; i < nelt; i += 2)
29082 if (d->perm[i] != i + odd)
29083 return false;
29084 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29085 return false;
29088 /* Success! */
29089 if (d->testing_p)
29090 return true;
29092 switch (d->vmode)
29094 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29095 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29096 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29097 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29098 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29099 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29100 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29101 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29102 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29103 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29104 default:
29105 gcc_unreachable ();
29108 in0 = d->op0;
29109 in1 = d->op1;
29110 if (BYTES_BIG_ENDIAN)
29112 std::swap (in0, in1);
29113 odd = !odd;
29116 out0 = d->target;
29117 out1 = gen_reg_rtx (d->vmode);
29118 if (odd)
29119 std::swap (out0, out1);
29121 emit_insn (gen (out0, in0, in1, out1));
29122 return true;
29125 /* Recognize patterns for the VEXT insns. */
29127 static bool
29128 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29130 unsigned int i, nelt = d->perm.length ();
29131 rtx (*gen) (rtx, rtx, rtx, rtx);
29132 rtx offset;
29134 unsigned int location;
29136 unsigned int next = d->perm[0] + 1;
29138 /* TODO: Handle GCC's numbering of elements for big-endian. */
29139 if (BYTES_BIG_ENDIAN)
29140 return false;
29142 /* Check if the extracted indexes are increasing by one. */
29143 for (i = 1; i < nelt; next++, i++)
29145 /* If we hit the most significant element of the 2nd vector in
29146 the previous iteration, no need to test further. */
29147 if (next == 2 * nelt)
29148 return false;
29150 /* If we are operating on only one vector: it could be a
29151 rotation. If there are only two elements of size < 64, let
29152 arm_evpc_neon_vrev catch it. */
29153 if (d->one_vector_p && (next == nelt))
29155 if ((nelt == 2) && (d->vmode != V2DImode))
29156 return false;
29157 else
29158 next = 0;
29161 if (d->perm[i] != next)
29162 return false;
29165 location = d->perm[0];
29167 switch (d->vmode)
29169 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29170 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29171 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29172 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29173 case E_V2SImode: gen = gen_neon_vextv2si; break;
29174 case E_V4SImode: gen = gen_neon_vextv4si; break;
29175 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29176 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29177 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29178 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29179 case E_V2DImode: gen = gen_neon_vextv2di; break;
29180 default:
29181 return false;
29184 /* Success! */
29185 if (d->testing_p)
29186 return true;
29188 offset = GEN_INT (location);
29189 emit_insn (gen (d->target, d->op0, d->op1, offset));
29190 return true;
29193 /* The NEON VTBL instruction is a fully variable permuation that's even
29194 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29195 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29196 can do slightly better by expanding this as a constant where we don't
29197 have to apply a mask. */
29199 static bool
29200 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29202 rtx rperm[MAX_VECT_LEN], sel;
29203 machine_mode vmode = d->vmode;
29204 unsigned int i, nelt = d->perm.length ();
29206 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29207 numbering of elements for big-endian, we must reverse the order. */
29208 if (BYTES_BIG_ENDIAN)
29209 return false;
29211 if (d->testing_p)
29212 return true;
29214 /* Generic code will try constant permutation twice. Once with the
29215 original mode and again with the elements lowered to QImode.
29216 So wait and don't do the selector expansion ourselves. */
29217 if (vmode != V8QImode && vmode != V16QImode)
29218 return false;
29220 for (i = 0; i < nelt; ++i)
29221 rperm[i] = GEN_INT (d->perm[i]);
29222 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29223 sel = force_reg (vmode, sel);
29225 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29226 return true;
29229 static bool
29230 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29232 /* Check if the input mask matches vext before reordering the
29233 operands. */
29234 if (TARGET_NEON)
29235 if (arm_evpc_neon_vext (d))
29236 return true;
29238 /* The pattern matching functions above are written to look for a small
29239 number to begin the sequence (0, 1, N/2). If we begin with an index
29240 from the second operand, we can swap the operands. */
29241 unsigned int nelt = d->perm.length ();
29242 if (d->perm[0] >= nelt)
29244 for (unsigned int i = 0; i < nelt; ++i)
29245 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29247 std::swap (d->op0, d->op1);
29250 if (TARGET_NEON)
29252 if (arm_evpc_neon_vuzp (d))
29253 return true;
29254 if (arm_evpc_neon_vzip (d))
29255 return true;
29256 if (arm_evpc_neon_vrev (d))
29257 return true;
29258 if (arm_evpc_neon_vtrn (d))
29259 return true;
29260 return arm_evpc_neon_vtbl (d);
29262 return false;
29265 /* Expand a vec_perm_const pattern. */
29267 bool
29268 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29270 struct expand_vec_perm_d d;
29271 int i, nelt, which;
29273 d.target = target;
29274 d.op0 = op0;
29275 d.op1 = op1;
29277 d.vmode = GET_MODE (target);
29278 gcc_assert (VECTOR_MODE_P (d.vmode));
29279 d.testing_p = false;
29281 nelt = GET_MODE_NUNITS (d.vmode);
29282 d.perm.reserve (nelt);
29283 for (i = which = 0; i < nelt; ++i)
29285 rtx e = XVECEXP (sel, 0, i);
29286 int ei = INTVAL (e) & (2 * nelt - 1);
29287 which |= (ei < nelt ? 1 : 2);
29288 d.perm.quick_push (ei);
29291 switch (which)
29293 default:
29294 gcc_unreachable();
29296 case 3:
29297 d.one_vector_p = false;
29298 if (!rtx_equal_p (op0, op1))
29299 break;
29301 /* The elements of PERM do not suggest that only the first operand
29302 is used, but both operands are identical. Allow easier matching
29303 of the permutation by folding the permutation into the single
29304 input vector. */
29305 /* FALLTHRU */
29306 case 2:
29307 for (i = 0; i < nelt; ++i)
29308 d.perm[i] &= nelt - 1;
29309 d.op0 = op1;
29310 d.one_vector_p = true;
29311 break;
29313 case 1:
29314 d.op1 = op0;
29315 d.one_vector_p = true;
29316 break;
29319 return arm_expand_vec_perm_const_1 (&d);
29322 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29324 static bool
29325 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29327 struct expand_vec_perm_d d;
29328 unsigned int i, nelt, which;
29329 bool ret;
29331 d.vmode = vmode;
29332 d.testing_p = true;
29333 d.perm.safe_splice (sel);
29335 /* Categorize the set of elements in the selector. */
29336 nelt = GET_MODE_NUNITS (d.vmode);
29337 for (i = which = 0; i < nelt; ++i)
29339 unsigned int e = d.perm[i];
29340 gcc_assert (e < 2 * nelt);
29341 which |= (e < nelt ? 1 : 2);
29344 /* For all elements from second vector, fold the elements to first. */
29345 if (which == 2)
29346 for (i = 0; i < nelt; ++i)
29347 d.perm[i] -= nelt;
29349 /* Check whether the mask can be applied to the vector type. */
29350 d.one_vector_p = (which != 3);
29352 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29353 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29354 if (!d.one_vector_p)
29355 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29357 start_sequence ();
29358 ret = arm_expand_vec_perm_const_1 (&d);
29359 end_sequence ();
29361 return ret;
29364 bool
29365 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29367 /* If we are soft float and we do not have ldrd
29368 then all auto increment forms are ok. */
29369 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29370 return true;
29372 switch (code)
29374 /* Post increment and Pre Decrement are supported for all
29375 instruction forms except for vector forms. */
29376 case ARM_POST_INC:
29377 case ARM_PRE_DEC:
29378 if (VECTOR_MODE_P (mode))
29380 if (code != ARM_PRE_DEC)
29381 return true;
29382 else
29383 return false;
29386 return true;
29388 case ARM_POST_DEC:
29389 case ARM_PRE_INC:
29390 /* Without LDRD and mode size greater than
29391 word size, there is no point in auto-incrementing
29392 because ldm and stm will not have these forms. */
29393 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29394 return false;
29396 /* Vector and floating point modes do not support
29397 these auto increment forms. */
29398 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29399 return false;
29401 return true;
29403 default:
29404 return false;
29408 return false;
29411 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29412 on ARM, since we know that shifts by negative amounts are no-ops.
29413 Additionally, the default expansion code is not available or suitable
29414 for post-reload insn splits (this can occur when the register allocator
29415 chooses not to do a shift in NEON).
29417 This function is used in both initial expand and post-reload splits, and
29418 handles all kinds of 64-bit shifts.
29420 Input requirements:
29421 - It is safe for the input and output to be the same register, but
29422 early-clobber rules apply for the shift amount and scratch registers.
29423 - Shift by register requires both scratch registers. In all other cases
29424 the scratch registers may be NULL.
29425 - Ashiftrt by a register also clobbers the CC register. */
29426 void
29427 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29428 rtx amount, rtx scratch1, rtx scratch2)
29430 rtx out_high = gen_highpart (SImode, out);
29431 rtx out_low = gen_lowpart (SImode, out);
29432 rtx in_high = gen_highpart (SImode, in);
29433 rtx in_low = gen_lowpart (SImode, in);
29435 /* Terminology:
29436 in = the register pair containing the input value.
29437 out = the destination register pair.
29438 up = the high- or low-part of each pair.
29439 down = the opposite part to "up".
29440 In a shift, we can consider bits to shift from "up"-stream to
29441 "down"-stream, so in a left-shift "up" is the low-part and "down"
29442 is the high-part of each register pair. */
29444 rtx out_up = code == ASHIFT ? out_low : out_high;
29445 rtx out_down = code == ASHIFT ? out_high : out_low;
29446 rtx in_up = code == ASHIFT ? in_low : in_high;
29447 rtx in_down = code == ASHIFT ? in_high : in_low;
29449 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29450 gcc_assert (out
29451 && (REG_P (out) || GET_CODE (out) == SUBREG)
29452 && GET_MODE (out) == DImode);
29453 gcc_assert (in
29454 && (REG_P (in) || GET_CODE (in) == SUBREG)
29455 && GET_MODE (in) == DImode);
29456 gcc_assert (amount
29457 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29458 && GET_MODE (amount) == SImode)
29459 || CONST_INT_P (amount)));
29460 gcc_assert (scratch1 == NULL
29461 || (GET_CODE (scratch1) == SCRATCH)
29462 || (GET_MODE (scratch1) == SImode
29463 && REG_P (scratch1)));
29464 gcc_assert (scratch2 == NULL
29465 || (GET_CODE (scratch2) == SCRATCH)
29466 || (GET_MODE (scratch2) == SImode
29467 && REG_P (scratch2)));
29468 gcc_assert (!REG_P (out) || !REG_P (amount)
29469 || !HARD_REGISTER_P (out)
29470 || (REGNO (out) != REGNO (amount)
29471 && REGNO (out) + 1 != REGNO (amount)));
29473 /* Macros to make following code more readable. */
29474 #define SUB_32(DEST,SRC) \
29475 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29476 #define RSB_32(DEST,SRC) \
29477 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29478 #define SUB_S_32(DEST,SRC) \
29479 gen_addsi3_compare0 ((DEST), (SRC), \
29480 GEN_INT (-32))
29481 #define SET(DEST,SRC) \
29482 gen_rtx_SET ((DEST), (SRC))
29483 #define SHIFT(CODE,SRC,AMOUNT) \
29484 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29485 #define LSHIFT(CODE,SRC,AMOUNT) \
29486 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29487 SImode, (SRC), (AMOUNT))
29488 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29489 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29490 SImode, (SRC), (AMOUNT))
29491 #define ORR(A,B) \
29492 gen_rtx_IOR (SImode, (A), (B))
29493 #define BRANCH(COND,LABEL) \
29494 gen_arm_cond_branch ((LABEL), \
29495 gen_rtx_ ## COND (CCmode, cc_reg, \
29496 const0_rtx), \
29497 cc_reg)
29499 /* Shifts by register and shifts by constant are handled separately. */
29500 if (CONST_INT_P (amount))
29502 /* We have a shift-by-constant. */
29504 /* First, handle out-of-range shift amounts.
29505 In both cases we try to match the result an ARM instruction in a
29506 shift-by-register would give. This helps reduce execution
29507 differences between optimization levels, but it won't stop other
29508 parts of the compiler doing different things. This is "undefined
29509 behavior, in any case. */
29510 if (INTVAL (amount) <= 0)
29511 emit_insn (gen_movdi (out, in));
29512 else if (INTVAL (amount) >= 64)
29514 if (code == ASHIFTRT)
29516 rtx const31_rtx = GEN_INT (31);
29517 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29518 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29520 else
29521 emit_insn (gen_movdi (out, const0_rtx));
29524 /* Now handle valid shifts. */
29525 else if (INTVAL (amount) < 32)
29527 /* Shifts by a constant less than 32. */
29528 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29530 /* Clearing the out register in DImode first avoids lots
29531 of spilling and results in less stack usage.
29532 Later this redundant insn is completely removed.
29533 Do that only if "in" and "out" are different registers. */
29534 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29535 emit_insn (SET (out, const0_rtx));
29536 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29537 emit_insn (SET (out_down,
29538 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29539 out_down)));
29540 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29542 else
29544 /* Shifts by a constant greater than 31. */
29545 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29547 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29548 emit_insn (SET (out, const0_rtx));
29549 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29550 if (code == ASHIFTRT)
29551 emit_insn (gen_ashrsi3 (out_up, in_up,
29552 GEN_INT (31)));
29553 else
29554 emit_insn (SET (out_up, const0_rtx));
29557 else
29559 /* We have a shift-by-register. */
29560 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29562 /* This alternative requires the scratch registers. */
29563 gcc_assert (scratch1 && REG_P (scratch1));
29564 gcc_assert (scratch2 && REG_P (scratch2));
29566 /* We will need the values "amount-32" and "32-amount" later.
29567 Swapping them around now allows the later code to be more general. */
29568 switch (code)
29570 case ASHIFT:
29571 emit_insn (SUB_32 (scratch1, amount));
29572 emit_insn (RSB_32 (scratch2, amount));
29573 break;
29574 case ASHIFTRT:
29575 emit_insn (RSB_32 (scratch1, amount));
29576 /* Also set CC = amount > 32. */
29577 emit_insn (SUB_S_32 (scratch2, amount));
29578 break;
29579 case LSHIFTRT:
29580 emit_insn (RSB_32 (scratch1, amount));
29581 emit_insn (SUB_32 (scratch2, amount));
29582 break;
29583 default:
29584 gcc_unreachable ();
29587 /* Emit code like this:
29589 arithmetic-left:
29590 out_down = in_down << amount;
29591 out_down = (in_up << (amount - 32)) | out_down;
29592 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29593 out_up = in_up << amount;
29595 arithmetic-right:
29596 out_down = in_down >> amount;
29597 out_down = (in_up << (32 - amount)) | out_down;
29598 if (amount < 32)
29599 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29600 out_up = in_up << amount;
29602 logical-right:
29603 out_down = in_down >> amount;
29604 out_down = (in_up << (32 - amount)) | out_down;
29605 if (amount < 32)
29606 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29607 out_up = in_up << amount;
29609 The ARM and Thumb2 variants are the same but implemented slightly
29610 differently. If this were only called during expand we could just
29611 use the Thumb2 case and let combine do the right thing, but this
29612 can also be called from post-reload splitters. */
29614 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29616 if (!TARGET_THUMB2)
29618 /* Emit code for ARM mode. */
29619 emit_insn (SET (out_down,
29620 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29621 if (code == ASHIFTRT)
29623 rtx_code_label *done_label = gen_label_rtx ();
29624 emit_jump_insn (BRANCH (LT, done_label));
29625 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29626 out_down)));
29627 emit_label (done_label);
29629 else
29630 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29631 out_down)));
29633 else
29635 /* Emit code for Thumb2 mode.
29636 Thumb2 can't do shift and or in one insn. */
29637 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29638 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29640 if (code == ASHIFTRT)
29642 rtx_code_label *done_label = gen_label_rtx ();
29643 emit_jump_insn (BRANCH (LT, done_label));
29644 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29645 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29646 emit_label (done_label);
29648 else
29650 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29651 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29655 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29658 #undef SUB_32
29659 #undef RSB_32
29660 #undef SUB_S_32
29661 #undef SET
29662 #undef SHIFT
29663 #undef LSHIFT
29664 #undef REV_LSHIFT
29665 #undef ORR
29666 #undef BRANCH
29669 /* Returns true if the pattern is a valid symbolic address, which is either a
29670 symbol_ref or (symbol_ref + addend).
29672 According to the ARM ELF ABI, the initial addend of REL-type relocations
29673 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29674 literal field of the instruction as a 16-bit signed value in the range
29675 -32768 <= A < 32768. */
29677 bool
29678 arm_valid_symbolic_address_p (rtx addr)
29680 rtx xop0, xop1 = NULL_RTX;
29681 rtx tmp = addr;
29683 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29684 return true;
29686 /* (const (plus: symbol_ref const_int)) */
29687 if (GET_CODE (addr) == CONST)
29688 tmp = XEXP (addr, 0);
29690 if (GET_CODE (tmp) == PLUS)
29692 xop0 = XEXP (tmp, 0);
29693 xop1 = XEXP (tmp, 1);
29695 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29696 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29699 return false;
29702 /* Returns true if a valid comparison operation and makes
29703 the operands in a form that is valid. */
29704 bool
29705 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29707 enum rtx_code code = GET_CODE (*comparison);
29708 int code_int;
29709 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29710 ? GET_MODE (*op2) : GET_MODE (*op1);
29712 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29714 if (code == UNEQ || code == LTGT)
29715 return false;
29717 code_int = (int)code;
29718 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29719 PUT_CODE (*comparison, (enum rtx_code)code_int);
29721 switch (mode)
29723 case E_SImode:
29724 if (!arm_add_operand (*op1, mode))
29725 *op1 = force_reg (mode, *op1);
29726 if (!arm_add_operand (*op2, mode))
29727 *op2 = force_reg (mode, *op2);
29728 return true;
29730 case E_DImode:
29731 if (!cmpdi_operand (*op1, mode))
29732 *op1 = force_reg (mode, *op1);
29733 if (!cmpdi_operand (*op2, mode))
29734 *op2 = force_reg (mode, *op2);
29735 return true;
29737 case E_HFmode:
29738 if (!TARGET_VFP_FP16INST)
29739 break;
29740 /* FP16 comparisons are done in SF mode. */
29741 mode = SFmode;
29742 *op1 = convert_to_mode (mode, *op1, 1);
29743 *op2 = convert_to_mode (mode, *op2, 1);
29744 /* Fall through. */
29745 case E_SFmode:
29746 case E_DFmode:
29747 if (!vfp_compare_operand (*op1, mode))
29748 *op1 = force_reg (mode, *op1);
29749 if (!vfp_compare_operand (*op2, mode))
29750 *op2 = force_reg (mode, *op2);
29751 return true;
29752 default:
29753 break;
29756 return false;
29760 /* Maximum number of instructions to set block of memory. */
29761 static int
29762 arm_block_set_max_insns (void)
29764 if (optimize_function_for_size_p (cfun))
29765 return 4;
29766 else
29767 return current_tune->max_insns_inline_memset;
29770 /* Return TRUE if it's profitable to set block of memory for
29771 non-vectorized case. VAL is the value to set the memory
29772 with. LENGTH is the number of bytes to set. ALIGN is the
29773 alignment of the destination memory in bytes. UNALIGNED_P
29774 is TRUE if we can only set the memory with instructions
29775 meeting alignment requirements. USE_STRD_P is TRUE if we
29776 can use strd to set the memory. */
29777 static bool
29778 arm_block_set_non_vect_profit_p (rtx val,
29779 unsigned HOST_WIDE_INT length,
29780 unsigned HOST_WIDE_INT align,
29781 bool unaligned_p, bool use_strd_p)
29783 int num = 0;
29784 /* For leftovers in bytes of 0-7, we can set the memory block using
29785 strb/strh/str with minimum instruction number. */
29786 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29788 if (unaligned_p)
29790 num = arm_const_inline_cost (SET, val);
29791 num += length / align + length % align;
29793 else if (use_strd_p)
29795 num = arm_const_double_inline_cost (val);
29796 num += (length >> 3) + leftover[length & 7];
29798 else
29800 num = arm_const_inline_cost (SET, val);
29801 num += (length >> 2) + leftover[length & 3];
29804 /* We may be able to combine last pair STRH/STRB into a single STR
29805 by shifting one byte back. */
29806 if (unaligned_access && length > 3 && (length & 3) == 3)
29807 num--;
29809 return (num <= arm_block_set_max_insns ());
29812 /* Return TRUE if it's profitable to set block of memory for
29813 vectorized case. LENGTH is the number of bytes to set.
29814 ALIGN is the alignment of destination memory in bytes.
29815 MODE is the vector mode used to set the memory. */
29816 static bool
29817 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29818 unsigned HOST_WIDE_INT align,
29819 machine_mode mode)
29821 int num;
29822 bool unaligned_p = ((align & 3) != 0);
29823 unsigned int nelt = GET_MODE_NUNITS (mode);
29825 /* Instruction loading constant value. */
29826 num = 1;
29827 /* Instructions storing the memory. */
29828 num += (length + nelt - 1) / nelt;
29829 /* Instructions adjusting the address expression. Only need to
29830 adjust address expression if it's 4 bytes aligned and bytes
29831 leftover can only be stored by mis-aligned store instruction. */
29832 if (!unaligned_p && (length & 3) != 0)
29833 num++;
29835 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29836 if (!unaligned_p && mode == V16QImode)
29837 num--;
29839 return (num <= arm_block_set_max_insns ());
29842 /* Set a block of memory using vectorization instructions for the
29843 unaligned case. We fill the first LENGTH bytes of the memory
29844 area starting from DSTBASE with byte constant VALUE. ALIGN is
29845 the alignment requirement of memory. Return TRUE if succeeded. */
29846 static bool
29847 arm_block_set_unaligned_vect (rtx dstbase,
29848 unsigned HOST_WIDE_INT length,
29849 unsigned HOST_WIDE_INT value,
29850 unsigned HOST_WIDE_INT align)
29852 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29853 rtx dst, mem;
29854 rtx val_vec, reg;
29855 rtx (*gen_func) (rtx, rtx);
29856 machine_mode mode;
29857 unsigned HOST_WIDE_INT v = value;
29858 unsigned int offset = 0;
29859 gcc_assert ((align & 0x3) != 0);
29860 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29861 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29862 if (length >= nelt_v16)
29864 mode = V16QImode;
29865 gen_func = gen_movmisalignv16qi;
29867 else
29869 mode = V8QImode;
29870 gen_func = gen_movmisalignv8qi;
29872 nelt_mode = GET_MODE_NUNITS (mode);
29873 gcc_assert (length >= nelt_mode);
29874 /* Skip if it isn't profitable. */
29875 if (!arm_block_set_vect_profit_p (length, align, mode))
29876 return false;
29878 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29879 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29881 v = sext_hwi (v, BITS_PER_WORD);
29883 reg = gen_reg_rtx (mode);
29884 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29885 /* Emit instruction loading the constant value. */
29886 emit_move_insn (reg, val_vec);
29888 /* Handle nelt_mode bytes in a vector. */
29889 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29891 emit_insn ((*gen_func) (mem, reg));
29892 if (i + 2 * nelt_mode <= length)
29894 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29895 offset += nelt_mode;
29896 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29900 /* If there are not less than nelt_v8 bytes leftover, we must be in
29901 V16QI mode. */
29902 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29904 /* Handle (8, 16) bytes leftover. */
29905 if (i + nelt_v8 < length)
29907 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29908 offset += length - i;
29909 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29911 /* We are shifting bytes back, set the alignment accordingly. */
29912 if ((length & 1) != 0 && align >= 2)
29913 set_mem_align (mem, BITS_PER_UNIT);
29915 emit_insn (gen_movmisalignv16qi (mem, reg));
29917 /* Handle (0, 8] bytes leftover. */
29918 else if (i < length && i + nelt_v8 >= length)
29920 if (mode == V16QImode)
29921 reg = gen_lowpart (V8QImode, reg);
29923 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29924 + (nelt_mode - nelt_v8))));
29925 offset += (length - i) + (nelt_mode - nelt_v8);
29926 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29928 /* We are shifting bytes back, set the alignment accordingly. */
29929 if ((length & 1) != 0 && align >= 2)
29930 set_mem_align (mem, BITS_PER_UNIT);
29932 emit_insn (gen_movmisalignv8qi (mem, reg));
29935 return true;
29938 /* Set a block of memory using vectorization instructions for the
29939 aligned case. We fill the first LENGTH bytes of the memory area
29940 starting from DSTBASE with byte constant VALUE. ALIGN is the
29941 alignment requirement of memory. Return TRUE if succeeded. */
29942 static bool
29943 arm_block_set_aligned_vect (rtx dstbase,
29944 unsigned HOST_WIDE_INT length,
29945 unsigned HOST_WIDE_INT value,
29946 unsigned HOST_WIDE_INT align)
29948 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
29949 rtx dst, addr, mem;
29950 rtx val_vec, reg;
29951 machine_mode mode;
29952 unsigned HOST_WIDE_INT v = value;
29953 unsigned int offset = 0;
29955 gcc_assert ((align & 0x3) == 0);
29956 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29957 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29958 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29959 mode = V16QImode;
29960 else
29961 mode = V8QImode;
29963 nelt_mode = GET_MODE_NUNITS (mode);
29964 gcc_assert (length >= nelt_mode);
29965 /* Skip if it isn't profitable. */
29966 if (!arm_block_set_vect_profit_p (length, align, mode))
29967 return false;
29969 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29971 v = sext_hwi (v, BITS_PER_WORD);
29973 reg = gen_reg_rtx (mode);
29974 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29975 /* Emit instruction loading the constant value. */
29976 emit_move_insn (reg, val_vec);
29978 i = 0;
29979 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29980 if (mode == V16QImode)
29982 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29983 emit_insn (gen_movmisalignv16qi (mem, reg));
29984 i += nelt_mode;
29985 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29986 if (i + nelt_v8 < length && i + nelt_v16 > length)
29988 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29989 offset += length - nelt_mode;
29990 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29991 /* We are shifting bytes back, set the alignment accordingly. */
29992 if ((length & 0x3) == 0)
29993 set_mem_align (mem, BITS_PER_UNIT * 4);
29994 else if ((length & 0x1) == 0)
29995 set_mem_align (mem, BITS_PER_UNIT * 2);
29996 else
29997 set_mem_align (mem, BITS_PER_UNIT);
29999 emit_insn (gen_movmisalignv16qi (mem, reg));
30000 return true;
30002 /* Fall through for bytes leftover. */
30003 mode = V8QImode;
30004 nelt_mode = GET_MODE_NUNITS (mode);
30005 reg = gen_lowpart (V8QImode, reg);
30008 /* Handle 8 bytes in a vector. */
30009 for (; (i + nelt_mode <= length); i += nelt_mode)
30011 addr = plus_constant (Pmode, dst, i);
30012 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30013 emit_move_insn (mem, reg);
30016 /* Handle single word leftover by shifting 4 bytes back. We can
30017 use aligned access for this case. */
30018 if (i + UNITS_PER_WORD == length)
30020 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30021 offset += i - UNITS_PER_WORD;
30022 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30023 /* We are shifting 4 bytes back, set the alignment accordingly. */
30024 if (align > UNITS_PER_WORD)
30025 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30027 emit_move_insn (mem, reg);
30029 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30030 We have to use unaligned access for this case. */
30031 else if (i < length)
30033 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30034 offset += length - nelt_mode;
30035 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30036 /* We are shifting bytes back, set the alignment accordingly. */
30037 if ((length & 1) == 0)
30038 set_mem_align (mem, BITS_PER_UNIT * 2);
30039 else
30040 set_mem_align (mem, BITS_PER_UNIT);
30042 emit_insn (gen_movmisalignv8qi (mem, reg));
30045 return true;
30048 /* Set a block of memory using plain strh/strb instructions, only
30049 using instructions allowed by ALIGN on processor. We fill the
30050 first LENGTH bytes of the memory area starting from DSTBASE
30051 with byte constant VALUE. ALIGN is the alignment requirement
30052 of memory. */
30053 static bool
30054 arm_block_set_unaligned_non_vect (rtx dstbase,
30055 unsigned HOST_WIDE_INT length,
30056 unsigned HOST_WIDE_INT value,
30057 unsigned HOST_WIDE_INT align)
30059 unsigned int i;
30060 rtx dst, addr, mem;
30061 rtx val_exp, val_reg, reg;
30062 machine_mode mode;
30063 HOST_WIDE_INT v = value;
30065 gcc_assert (align == 1 || align == 2);
30067 if (align == 2)
30068 v |= (value << BITS_PER_UNIT);
30070 v = sext_hwi (v, BITS_PER_WORD);
30071 val_exp = GEN_INT (v);
30072 /* Skip if it isn't profitable. */
30073 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30074 align, true, false))
30075 return false;
30077 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30078 mode = (align == 2 ? HImode : QImode);
30079 val_reg = force_reg (SImode, val_exp);
30080 reg = gen_lowpart (mode, val_reg);
30082 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30084 addr = plus_constant (Pmode, dst, i);
30085 mem = adjust_automodify_address (dstbase, mode, addr, i);
30086 emit_move_insn (mem, reg);
30089 /* Handle single byte leftover. */
30090 if (i + 1 == length)
30092 reg = gen_lowpart (QImode, val_reg);
30093 addr = plus_constant (Pmode, dst, i);
30094 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30095 emit_move_insn (mem, reg);
30096 i++;
30099 gcc_assert (i == length);
30100 return true;
30103 /* Set a block of memory using plain strd/str/strh/strb instructions,
30104 to permit unaligned copies on processors which support unaligned
30105 semantics for those instructions. We fill the first LENGTH bytes
30106 of the memory area starting from DSTBASE with byte constant VALUE.
30107 ALIGN is the alignment requirement of memory. */
30108 static bool
30109 arm_block_set_aligned_non_vect (rtx dstbase,
30110 unsigned HOST_WIDE_INT length,
30111 unsigned HOST_WIDE_INT value,
30112 unsigned HOST_WIDE_INT align)
30114 unsigned int i;
30115 rtx dst, addr, mem;
30116 rtx val_exp, val_reg, reg;
30117 unsigned HOST_WIDE_INT v;
30118 bool use_strd_p;
30120 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30121 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30123 v = (value | (value << 8) | (value << 16) | (value << 24));
30124 if (length < UNITS_PER_WORD)
30125 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30127 if (use_strd_p)
30128 v |= (v << BITS_PER_WORD);
30129 else
30130 v = sext_hwi (v, BITS_PER_WORD);
30132 val_exp = GEN_INT (v);
30133 /* Skip if it isn't profitable. */
30134 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30135 align, false, use_strd_p))
30137 if (!use_strd_p)
30138 return false;
30140 /* Try without strd. */
30141 v = (v >> BITS_PER_WORD);
30142 v = sext_hwi (v, BITS_PER_WORD);
30143 val_exp = GEN_INT (v);
30144 use_strd_p = false;
30145 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30146 align, false, use_strd_p))
30147 return false;
30150 i = 0;
30151 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30152 /* Handle double words using strd if possible. */
30153 if (use_strd_p)
30155 val_reg = force_reg (DImode, val_exp);
30156 reg = val_reg;
30157 for (; (i + 8 <= length); i += 8)
30159 addr = plus_constant (Pmode, dst, i);
30160 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30161 emit_move_insn (mem, reg);
30164 else
30165 val_reg = force_reg (SImode, val_exp);
30167 /* Handle words. */
30168 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30169 for (; (i + 4 <= length); i += 4)
30171 addr = plus_constant (Pmode, dst, i);
30172 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30173 if ((align & 3) == 0)
30174 emit_move_insn (mem, reg);
30175 else
30176 emit_insn (gen_unaligned_storesi (mem, reg));
30179 /* Merge last pair of STRH and STRB into a STR if possible. */
30180 if (unaligned_access && i > 0 && (i + 3) == length)
30182 addr = plus_constant (Pmode, dst, i - 1);
30183 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30184 /* We are shifting one byte back, set the alignment accordingly. */
30185 if ((align & 1) == 0)
30186 set_mem_align (mem, BITS_PER_UNIT);
30188 /* Most likely this is an unaligned access, and we can't tell at
30189 compilation time. */
30190 emit_insn (gen_unaligned_storesi (mem, reg));
30191 return true;
30194 /* Handle half word leftover. */
30195 if (i + 2 <= length)
30197 reg = gen_lowpart (HImode, val_reg);
30198 addr = plus_constant (Pmode, dst, i);
30199 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30200 if ((align & 1) == 0)
30201 emit_move_insn (mem, reg);
30202 else
30203 emit_insn (gen_unaligned_storehi (mem, reg));
30205 i += 2;
30208 /* Handle single byte leftover. */
30209 if (i + 1 == length)
30211 reg = gen_lowpart (QImode, val_reg);
30212 addr = plus_constant (Pmode, dst, i);
30213 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30214 emit_move_insn (mem, reg);
30217 return true;
30220 /* Set a block of memory using vectorization instructions for both
30221 aligned and unaligned cases. We fill the first LENGTH bytes of
30222 the memory area starting from DSTBASE with byte constant VALUE.
30223 ALIGN is the alignment requirement of memory. */
30224 static bool
30225 arm_block_set_vect (rtx dstbase,
30226 unsigned HOST_WIDE_INT length,
30227 unsigned HOST_WIDE_INT value,
30228 unsigned HOST_WIDE_INT align)
30230 /* Check whether we need to use unaligned store instruction. */
30231 if (((align & 3) != 0 || (length & 3) != 0)
30232 /* Check whether unaligned store instruction is available. */
30233 && (!unaligned_access || BYTES_BIG_ENDIAN))
30234 return false;
30236 if ((align & 3) == 0)
30237 return arm_block_set_aligned_vect (dstbase, length, value, align);
30238 else
30239 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30242 /* Expand string store operation. Firstly we try to do that by using
30243 vectorization instructions, then try with ARM unaligned access and
30244 double-word store if profitable. OPERANDS[0] is the destination,
30245 OPERANDS[1] is the number of bytes, operands[2] is the value to
30246 initialize the memory, OPERANDS[3] is the known alignment of the
30247 destination. */
30248 bool
30249 arm_gen_setmem (rtx *operands)
30251 rtx dstbase = operands[0];
30252 unsigned HOST_WIDE_INT length;
30253 unsigned HOST_WIDE_INT value;
30254 unsigned HOST_WIDE_INT align;
30256 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30257 return false;
30259 length = UINTVAL (operands[1]);
30260 if (length > 64)
30261 return false;
30263 value = (UINTVAL (operands[2]) & 0xFF);
30264 align = UINTVAL (operands[3]);
30265 if (TARGET_NEON && length >= 8
30266 && current_tune->string_ops_prefer_neon
30267 && arm_block_set_vect (dstbase, length, value, align))
30268 return true;
30270 if (!unaligned_access && (align & 3) != 0)
30271 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30273 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30277 static bool
30278 arm_macro_fusion_p (void)
30280 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30283 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30284 for MOVW / MOVT macro fusion. */
30286 static bool
30287 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30289 /* We are trying to fuse
30290 movw imm / movt imm
30291 instructions as a group that gets scheduled together. */
30293 rtx set_dest = SET_DEST (curr_set);
30295 if (GET_MODE (set_dest) != SImode)
30296 return false;
30298 /* We are trying to match:
30299 prev (movw) == (set (reg r0) (const_int imm16))
30300 curr (movt) == (set (zero_extract (reg r0)
30301 (const_int 16)
30302 (const_int 16))
30303 (const_int imm16_1))
30305 prev (movw) == (set (reg r1)
30306 (high (symbol_ref ("SYM"))))
30307 curr (movt) == (set (reg r0)
30308 (lo_sum (reg r1)
30309 (symbol_ref ("SYM")))) */
30311 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30313 if (CONST_INT_P (SET_SRC (curr_set))
30314 && CONST_INT_P (SET_SRC (prev_set))
30315 && REG_P (XEXP (set_dest, 0))
30316 && REG_P (SET_DEST (prev_set))
30317 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30318 return true;
30321 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30322 && REG_P (SET_DEST (curr_set))
30323 && REG_P (SET_DEST (prev_set))
30324 && GET_CODE (SET_SRC (prev_set)) == HIGH
30325 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30326 return true;
30328 return false;
30331 static bool
30332 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30334 rtx prev_set = single_set (prev);
30335 rtx curr_set = single_set (curr);
30337 if (!prev_set
30338 || !curr_set)
30339 return false;
30341 if (any_condjump_p (curr))
30342 return false;
30344 if (!arm_macro_fusion_p ())
30345 return false;
30347 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30348 && aarch_crypto_can_dual_issue (prev, curr))
30349 return true;
30351 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30352 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30353 return true;
30355 return false;
30358 /* Return true iff the instruction fusion described by OP is enabled. */
30359 bool
30360 arm_fusion_enabled_p (tune_params::fuse_ops op)
30362 return current_tune->fusible_ops & op;
30365 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30366 scheduled for speculative execution. Reject the long-running division
30367 and square-root instructions. */
30369 static bool
30370 arm_sched_can_speculate_insn (rtx_insn *insn)
30372 switch (get_attr_type (insn))
30374 case TYPE_SDIV:
30375 case TYPE_UDIV:
30376 case TYPE_FDIVS:
30377 case TYPE_FDIVD:
30378 case TYPE_FSQRTS:
30379 case TYPE_FSQRTD:
30380 case TYPE_NEON_FP_SQRT_S:
30381 case TYPE_NEON_FP_SQRT_D:
30382 case TYPE_NEON_FP_SQRT_S_Q:
30383 case TYPE_NEON_FP_SQRT_D_Q:
30384 case TYPE_NEON_FP_DIV_S:
30385 case TYPE_NEON_FP_DIV_D:
30386 case TYPE_NEON_FP_DIV_S_Q:
30387 case TYPE_NEON_FP_DIV_D_Q:
30388 return false;
30389 default:
30390 return true;
30394 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30396 static unsigned HOST_WIDE_INT
30397 arm_asan_shadow_offset (void)
30399 return HOST_WIDE_INT_1U << 29;
30403 /* This is a temporary fix for PR60655. Ideally we need
30404 to handle most of these cases in the generic part but
30405 currently we reject minus (..) (sym_ref). We try to
30406 ameliorate the case with minus (sym_ref1) (sym_ref2)
30407 where they are in the same section. */
30409 static bool
30410 arm_const_not_ok_for_debug_p (rtx p)
30412 tree decl_op0 = NULL;
30413 tree decl_op1 = NULL;
30415 if (GET_CODE (p) == UNSPEC)
30416 return true;
30417 if (GET_CODE (p) == MINUS)
30419 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30421 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30422 if (decl_op1
30423 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30424 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30426 if ((VAR_P (decl_op1)
30427 || TREE_CODE (decl_op1) == CONST_DECL)
30428 && (VAR_P (decl_op0)
30429 || TREE_CODE (decl_op0) == CONST_DECL))
30430 return (get_variable_section (decl_op1, false)
30431 != get_variable_section (decl_op0, false));
30433 if (TREE_CODE (decl_op1) == LABEL_DECL
30434 && TREE_CODE (decl_op0) == LABEL_DECL)
30435 return (DECL_CONTEXT (decl_op1)
30436 != DECL_CONTEXT (decl_op0));
30439 return true;
30443 return false;
30446 /* return TRUE if x is a reference to a value in a constant pool */
30447 extern bool
30448 arm_is_constant_pool_ref (rtx x)
30450 return (MEM_P (x)
30451 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30452 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30455 /* Remember the last target of arm_set_current_function. */
30456 static GTY(()) tree arm_previous_fndecl;
30458 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30460 void
30461 save_restore_target_globals (tree new_tree)
30463 /* If we have a previous state, use it. */
30464 if (TREE_TARGET_GLOBALS (new_tree))
30465 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30466 else if (new_tree == target_option_default_node)
30467 restore_target_globals (&default_target_globals);
30468 else
30470 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30471 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30474 arm_option_params_internal ();
30477 /* Invalidate arm_previous_fndecl. */
30479 void
30480 arm_reset_previous_fndecl (void)
30482 arm_previous_fndecl = NULL_TREE;
30485 /* Establish appropriate back-end context for processing the function
30486 FNDECL. The argument might be NULL to indicate processing at top
30487 level, outside of any function scope. */
30489 static void
30490 arm_set_current_function (tree fndecl)
30492 if (!fndecl || fndecl == arm_previous_fndecl)
30493 return;
30495 tree old_tree = (arm_previous_fndecl
30496 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30497 : NULL_TREE);
30499 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30501 /* If current function has no attributes but previous one did,
30502 use the default node. */
30503 if (! new_tree && old_tree)
30504 new_tree = target_option_default_node;
30506 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30507 the default have been handled by save_restore_target_globals from
30508 arm_pragma_target_parse. */
30509 if (old_tree == new_tree)
30510 return;
30512 arm_previous_fndecl = fndecl;
30514 /* First set the target options. */
30515 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30517 save_restore_target_globals (new_tree);
30520 /* Implement TARGET_OPTION_PRINT. */
30522 static void
30523 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30525 int flags = ptr->x_target_flags;
30526 const char *fpu_name;
30528 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30529 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30531 fprintf (file, "%*sselected isa %s\n", indent, "",
30532 TARGET_THUMB2_P (flags) ? "thumb2" :
30533 TARGET_THUMB_P (flags) ? "thumb1" :
30534 "arm");
30536 if (ptr->x_arm_arch_string)
30537 fprintf (file, "%*sselected architecture %s\n", indent, "",
30538 ptr->x_arm_arch_string);
30540 if (ptr->x_arm_cpu_string)
30541 fprintf (file, "%*sselected CPU %s\n", indent, "",
30542 ptr->x_arm_cpu_string);
30544 if (ptr->x_arm_tune_string)
30545 fprintf (file, "%*sselected tune %s\n", indent, "",
30546 ptr->x_arm_tune_string);
30548 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30551 /* Hook to determine if one function can safely inline another. */
30553 static bool
30554 arm_can_inline_p (tree caller, tree callee)
30556 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30557 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30558 bool can_inline = true;
30560 struct cl_target_option *caller_opts
30561 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30562 : target_option_default_node);
30564 struct cl_target_option *callee_opts
30565 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30566 : target_option_default_node);
30568 if (callee_opts == caller_opts)
30569 return true;
30571 /* Callee's ISA features should be a subset of the caller's. */
30572 struct arm_build_target caller_target;
30573 struct arm_build_target callee_target;
30574 caller_target.isa = sbitmap_alloc (isa_num_bits);
30575 callee_target.isa = sbitmap_alloc (isa_num_bits);
30577 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30578 false);
30579 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30580 false);
30581 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30582 can_inline = false;
30584 sbitmap_free (caller_target.isa);
30585 sbitmap_free (callee_target.isa);
30587 /* OK to inline between different modes.
30588 Function with mode specific instructions, e.g using asm,
30589 must be explicitly protected with noinline. */
30590 return can_inline;
30593 /* Hook to fix function's alignment affected by target attribute. */
30595 static void
30596 arm_relayout_function (tree fndecl)
30598 if (DECL_USER_ALIGN (fndecl))
30599 return;
30601 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30603 if (!callee_tree)
30604 callee_tree = target_option_default_node;
30606 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30607 SET_DECL_ALIGN
30608 (fndecl,
30609 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30612 /* Inner function to process the attribute((target(...))), take an argument and
30613 set the current options from the argument. If we have a list, recursively
30614 go over the list. */
30616 static bool
30617 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30619 if (TREE_CODE (args) == TREE_LIST)
30621 bool ret = true;
30623 for (; args; args = TREE_CHAIN (args))
30624 if (TREE_VALUE (args)
30625 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30626 ret = false;
30627 return ret;
30630 else if (TREE_CODE (args) != STRING_CST)
30632 error ("attribute %<target%> argument not a string");
30633 return false;
30636 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30637 char *q;
30639 while ((q = strtok (argstr, ",")) != NULL)
30641 while (ISSPACE (*q)) ++q;
30643 argstr = NULL;
30644 if (!strncmp (q, "thumb", 5))
30645 opts->x_target_flags |= MASK_THUMB;
30647 else if (!strncmp (q, "arm", 3))
30648 opts->x_target_flags &= ~MASK_THUMB;
30650 else if (!strncmp (q, "fpu=", 4))
30652 int fpu_index;
30653 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30654 &fpu_index, CL_TARGET))
30656 error ("invalid fpu for attribute(target(\"%s\"))", q);
30657 return false;
30659 if (fpu_index == TARGET_FPU_auto)
30661 /* This doesn't really make sense until we support
30662 general dynamic selection of the architecture and all
30663 sub-features. */
30664 sorry ("auto fpu selection not currently permitted here");
30665 return false;
30667 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30669 else
30671 error ("attribute(target(\"%s\")) is unknown", q);
30672 return false;
30676 return true;
30679 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30681 tree
30682 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30683 struct gcc_options *opts_set)
30685 struct cl_target_option cl_opts;
30687 if (!arm_valid_target_attribute_rec (args, opts))
30688 return NULL_TREE;
30690 cl_target_option_save (&cl_opts, opts);
30691 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30692 arm_option_check_internal (opts);
30693 /* Do any overrides, such as global options arch=xxx. */
30694 arm_option_override_internal (opts, opts_set);
30696 return build_target_option_node (opts);
30699 static void
30700 add_attribute (const char * mode, tree *attributes)
30702 size_t len = strlen (mode);
30703 tree value = build_string (len, mode);
30705 TREE_TYPE (value) = build_array_type (char_type_node,
30706 build_index_type (size_int (len)));
30708 *attributes = tree_cons (get_identifier ("target"),
30709 build_tree_list (NULL_TREE, value),
30710 *attributes);
30713 /* For testing. Insert thumb or arm modes alternatively on functions. */
30715 static void
30716 arm_insert_attributes (tree fndecl, tree * attributes)
30718 const char *mode;
30720 if (! TARGET_FLIP_THUMB)
30721 return;
30723 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30724 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30725 return;
30727 /* Nested definitions must inherit mode. */
30728 if (current_function_decl)
30730 mode = TARGET_THUMB ? "thumb" : "arm";
30731 add_attribute (mode, attributes);
30732 return;
30735 /* If there is already a setting don't change it. */
30736 if (lookup_attribute ("target", *attributes) != NULL)
30737 return;
30739 mode = thumb_flipper ? "thumb" : "arm";
30740 add_attribute (mode, attributes);
30742 thumb_flipper = !thumb_flipper;
30745 /* Hook to validate attribute((target("string"))). */
30747 static bool
30748 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30749 tree args, int ARG_UNUSED (flags))
30751 bool ret = true;
30752 struct gcc_options func_options;
30753 tree cur_tree, new_optimize;
30754 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30756 /* Get the optimization options of the current function. */
30757 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30759 /* If the function changed the optimization levels as well as setting target
30760 options, start with the optimizations specified. */
30761 if (!func_optimize)
30762 func_optimize = optimization_default_node;
30764 /* Init func_options. */
30765 memset (&func_options, 0, sizeof (func_options));
30766 init_options_struct (&func_options, NULL);
30767 lang_hooks.init_options_struct (&func_options);
30769 /* Initialize func_options to the defaults. */
30770 cl_optimization_restore (&func_options,
30771 TREE_OPTIMIZATION (func_optimize));
30773 cl_target_option_restore (&func_options,
30774 TREE_TARGET_OPTION (target_option_default_node));
30776 /* Set func_options flags with new target mode. */
30777 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30778 &global_options_set);
30780 if (cur_tree == NULL_TREE)
30781 ret = false;
30783 new_optimize = build_optimization_node (&func_options);
30785 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30787 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30789 finalize_options_struct (&func_options);
30791 return ret;
30794 /* Match an ISA feature bitmap to a named FPU. We always use the
30795 first entry that exactly matches the feature set, so that we
30796 effectively canonicalize the FPU name for the assembler. */
30797 static const char*
30798 arm_identify_fpu_from_isa (sbitmap isa)
30800 auto_sbitmap fpubits (isa_num_bits);
30801 auto_sbitmap cand_fpubits (isa_num_bits);
30803 bitmap_and (fpubits, isa, isa_all_fpubits);
30805 /* If there are no ISA feature bits relating to the FPU, we must be
30806 doing soft-float. */
30807 if (bitmap_empty_p (fpubits))
30808 return "softvfp";
30810 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30812 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30813 if (bitmap_equal_p (fpubits, cand_fpubits))
30814 return all_fpus[i].name;
30816 /* We must find an entry, or things have gone wrong. */
30817 gcc_unreachable ();
30820 void
30821 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30824 fprintf (stream, "\t.syntax unified\n");
30826 if (TARGET_THUMB)
30828 if (is_called_in_ARM_mode (decl)
30829 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30830 && cfun->is_thunk))
30831 fprintf (stream, "\t.code 32\n");
30832 else if (TARGET_THUMB1)
30833 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30834 else
30835 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30837 else
30838 fprintf (stream, "\t.arm\n");
30840 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30841 (TARGET_SOFT_FLOAT
30842 ? "softvfp"
30843 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30845 if (TARGET_POKE_FUNCTION_NAME)
30846 arm_poke_function_name (stream, (const char *) name);
30849 /* If MEM is in the form of [base+offset], extract the two parts
30850 of address and set to BASE and OFFSET, otherwise return false
30851 after clearing BASE and OFFSET. */
30853 static bool
30854 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30856 rtx addr;
30858 gcc_assert (MEM_P (mem));
30860 addr = XEXP (mem, 0);
30862 /* Strip off const from addresses like (const (addr)). */
30863 if (GET_CODE (addr) == CONST)
30864 addr = XEXP (addr, 0);
30866 if (GET_CODE (addr) == REG)
30868 *base = addr;
30869 *offset = const0_rtx;
30870 return true;
30873 if (GET_CODE (addr) == PLUS
30874 && GET_CODE (XEXP (addr, 0)) == REG
30875 && CONST_INT_P (XEXP (addr, 1)))
30877 *base = XEXP (addr, 0);
30878 *offset = XEXP (addr, 1);
30879 return true;
30882 *base = NULL_RTX;
30883 *offset = NULL_RTX;
30885 return false;
30888 /* If INSN is a load or store of address in the form of [base+offset],
30889 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30890 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30891 otherwise return FALSE. */
30893 static bool
30894 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30896 rtx x, dest, src;
30898 gcc_assert (INSN_P (insn));
30899 x = PATTERN (insn);
30900 if (GET_CODE (x) != SET)
30901 return false;
30903 src = SET_SRC (x);
30904 dest = SET_DEST (x);
30905 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30907 *is_load = false;
30908 extract_base_offset_in_addr (dest, base, offset);
30910 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30912 *is_load = true;
30913 extract_base_offset_in_addr (src, base, offset);
30915 else
30916 return false;
30918 return (*base != NULL_RTX && *offset != NULL_RTX);
30921 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30923 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30924 and PRI are only calculated for these instructions. For other instruction,
30925 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30926 instruction fusion can be supported by returning different priorities.
30928 It's important that irrelevant instructions get the largest FUSION_PRI. */
30930 static void
30931 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30932 int *fusion_pri, int *pri)
30934 int tmp, off_val;
30935 bool is_load;
30936 rtx base, offset;
30938 gcc_assert (INSN_P (insn));
30940 tmp = max_pri - 1;
30941 if (!fusion_load_store (insn, &base, &offset, &is_load))
30943 *pri = tmp;
30944 *fusion_pri = tmp;
30945 return;
30948 /* Load goes first. */
30949 if (is_load)
30950 *fusion_pri = tmp - 1;
30951 else
30952 *fusion_pri = tmp - 2;
30954 tmp /= 2;
30956 /* INSN with smaller base register goes first. */
30957 tmp -= ((REGNO (base) & 0xff) << 20);
30959 /* INSN with smaller offset goes first. */
30960 off_val = (int)(INTVAL (offset));
30961 if (off_val >= 0)
30962 tmp -= (off_val & 0xfffff);
30963 else
30964 tmp += ((- off_val) & 0xfffff);
30966 *pri = tmp;
30967 return;
30971 /* Construct and return a PARALLEL RTX vector with elements numbering the
30972 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30973 the vector - from the perspective of the architecture. This does not
30974 line up with GCC's perspective on lane numbers, so we end up with
30975 different masks depending on our target endian-ness. The diagram
30976 below may help. We must draw the distinction when building masks
30977 which select one half of the vector. An instruction selecting
30978 architectural low-lanes for a big-endian target, must be described using
30979 a mask selecting GCC high-lanes.
30981 Big-Endian Little-Endian
30983 GCC 0 1 2 3 3 2 1 0
30984 | x | x | x | x | | x | x | x | x |
30985 Architecture 3 2 1 0 3 2 1 0
30987 Low Mask: { 2, 3 } { 0, 1 }
30988 High Mask: { 0, 1 } { 2, 3 }
30992 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30994 int nunits = GET_MODE_NUNITS (mode);
30995 rtvec v = rtvec_alloc (nunits / 2);
30996 int high_base = nunits / 2;
30997 int low_base = 0;
30998 int base;
30999 rtx t1;
31000 int i;
31002 if (BYTES_BIG_ENDIAN)
31003 base = high ? low_base : high_base;
31004 else
31005 base = high ? high_base : low_base;
31007 for (i = 0; i < nunits / 2; i++)
31008 RTVEC_ELT (v, i) = GEN_INT (base + i);
31010 t1 = gen_rtx_PARALLEL (mode, v);
31011 return t1;
31014 /* Check OP for validity as a PARALLEL RTX vector with elements
31015 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31016 from the perspective of the architecture. See the diagram above
31017 arm_simd_vect_par_cnst_half_p for more details. */
31019 bool
31020 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31021 bool high)
31023 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31024 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31025 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31026 int i = 0;
31028 if (!VECTOR_MODE_P (mode))
31029 return false;
31031 if (count_op != count_ideal)
31032 return false;
31034 for (i = 0; i < count_ideal; i++)
31036 rtx elt_op = XVECEXP (op, 0, i);
31037 rtx elt_ideal = XVECEXP (ideal, 0, i);
31039 if (!CONST_INT_P (elt_op)
31040 || INTVAL (elt_ideal) != INTVAL (elt_op))
31041 return false;
31043 return true;
31046 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31047 in Thumb1. */
31048 static bool
31049 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31050 const_tree)
31052 /* For now, we punt and not handle this for TARGET_THUMB1. */
31053 if (vcall_offset && TARGET_THUMB1)
31054 return false;
31056 /* Otherwise ok. */
31057 return true;
31060 /* Generate RTL for a conditional branch with rtx comparison CODE in
31061 mode CC_MODE. The destination of the unlikely conditional branch
31062 is LABEL_REF. */
31064 void
31065 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31066 rtx label_ref)
31068 rtx x;
31069 x = gen_rtx_fmt_ee (code, VOIDmode,
31070 gen_rtx_REG (cc_mode, CC_REGNUM),
31071 const0_rtx);
31073 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31074 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31075 pc_rtx);
31076 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31079 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31081 For pure-code sections there is no letter code for this attribute, so
31082 output all the section flags numerically when this is needed. */
31084 static bool
31085 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31088 if (flags & SECTION_ARM_PURECODE)
31090 *num = 0x20000000;
31092 if (!(flags & SECTION_DEBUG))
31093 *num |= 0x2;
31094 if (flags & SECTION_EXCLUDE)
31095 *num |= 0x80000000;
31096 if (flags & SECTION_WRITE)
31097 *num |= 0x1;
31098 if (flags & SECTION_CODE)
31099 *num |= 0x4;
31100 if (flags & SECTION_MERGE)
31101 *num |= 0x10;
31102 if (flags & SECTION_STRINGS)
31103 *num |= 0x20;
31104 if (flags & SECTION_TLS)
31105 *num |= 0x400;
31106 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31107 *num |= 0x200;
31109 return true;
31112 return false;
31115 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31117 If pure-code is passed as an option, make sure all functions are in
31118 sections that have the SHF_ARM_PURECODE attribute. */
31120 static section *
31121 arm_function_section (tree decl, enum node_frequency freq,
31122 bool startup, bool exit)
31124 const char * section_name;
31125 section * sec;
31127 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31128 return default_function_section (decl, freq, startup, exit);
31130 if (!target_pure_code)
31131 return default_function_section (decl, freq, startup, exit);
31134 section_name = DECL_SECTION_NAME (decl);
31136 /* If a function is not in a named section then it falls under the 'default'
31137 text section, also known as '.text'. We can preserve previous behavior as
31138 the default text section already has the SHF_ARM_PURECODE section
31139 attribute. */
31140 if (!section_name)
31142 section *default_sec = default_function_section (decl, freq, startup,
31143 exit);
31145 /* If default_sec is not null, then it must be a special section like for
31146 example .text.startup. We set the pure-code attribute and return the
31147 same section to preserve existing behavior. */
31148 if (default_sec)
31149 default_sec->common.flags |= SECTION_ARM_PURECODE;
31150 return default_sec;
31153 /* Otherwise look whether a section has already been created with
31154 'section_name'. */
31155 sec = get_named_section (decl, section_name, 0);
31156 if (!sec)
31157 /* If that is not the case passing NULL as the section's name to
31158 'get_named_section' will create a section with the declaration's
31159 section name. */
31160 sec = get_named_section (decl, NULL, 0);
31162 /* Set the SHF_ARM_PURECODE attribute. */
31163 sec->common.flags |= SECTION_ARM_PURECODE;
31165 return sec;
31168 /* Implements the TARGET_SECTION_FLAGS hook.
31170 If DECL is a function declaration and pure-code is passed as an option
31171 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31172 section's name and RELOC indicates whether the declarations initializer may
31173 contain runtime relocations. */
31175 static unsigned int
31176 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31178 unsigned int flags = default_section_type_flags (decl, name, reloc);
31180 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31181 flags |= SECTION_ARM_PURECODE;
31183 return flags;
31186 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31188 static void
31189 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31190 rtx op0, rtx op1,
31191 rtx *quot_p, rtx *rem_p)
31193 if (mode == SImode)
31194 gcc_assert (!TARGET_IDIV);
31196 scalar_int_mode libval_mode
31197 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31199 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31200 libval_mode,
31201 op0, GET_MODE (op0),
31202 op1, GET_MODE (op1));
31204 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31205 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31206 GET_MODE_SIZE (mode));
31208 gcc_assert (quotient);
31209 gcc_assert (remainder);
31211 *quot_p = quotient;
31212 *rem_p = remainder;
31215 /* This function checks for the availability of the coprocessor builtin passed
31216 in BUILTIN for the current target. Returns true if it is available and
31217 false otherwise. If a BUILTIN is passed for which this function has not
31218 been implemented it will cause an exception. */
31220 bool
31221 arm_coproc_builtin_available (enum unspecv builtin)
31223 /* None of these builtins are available in Thumb mode if the target only
31224 supports Thumb-1. */
31225 if (TARGET_THUMB1)
31226 return false;
31228 switch (builtin)
31230 case VUNSPEC_CDP:
31231 case VUNSPEC_LDC:
31232 case VUNSPEC_LDCL:
31233 case VUNSPEC_STC:
31234 case VUNSPEC_STCL:
31235 case VUNSPEC_MCR:
31236 case VUNSPEC_MRC:
31237 if (arm_arch4)
31238 return true;
31239 break;
31240 case VUNSPEC_CDP2:
31241 case VUNSPEC_LDC2:
31242 case VUNSPEC_LDC2L:
31243 case VUNSPEC_STC2:
31244 case VUNSPEC_STC2L:
31245 case VUNSPEC_MCR2:
31246 case VUNSPEC_MRC2:
31247 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31248 ARMv8-{A,M}. */
31249 if (arm_arch5)
31250 return true;
31251 break;
31252 case VUNSPEC_MCRR:
31253 case VUNSPEC_MRRC:
31254 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31255 ARMv8-{A,M}. */
31256 if (arm_arch6 || arm_arch5te)
31257 return true;
31258 break;
31259 case VUNSPEC_MCRR2:
31260 case VUNSPEC_MRRC2:
31261 if (arm_arch6)
31262 return true;
31263 break;
31264 default:
31265 gcc_unreachable ();
31267 return false;
31270 /* This function returns true if OP is a valid memory operand for the ldc and
31271 stc coprocessor instructions and false otherwise. */
31273 bool
31274 arm_coproc_ldc_stc_legitimate_address (rtx op)
31276 HOST_WIDE_INT range;
31277 /* Has to be a memory operand. */
31278 if (!MEM_P (op))
31279 return false;
31281 op = XEXP (op, 0);
31283 /* We accept registers. */
31284 if (REG_P (op))
31285 return true;
31287 switch GET_CODE (op)
31289 case PLUS:
31291 /* Or registers with an offset. */
31292 if (!REG_P (XEXP (op, 0)))
31293 return false;
31295 op = XEXP (op, 1);
31297 /* The offset must be an immediate though. */
31298 if (!CONST_INT_P (op))
31299 return false;
31301 range = INTVAL (op);
31303 /* Within the range of [-1020,1020]. */
31304 if (!IN_RANGE (range, -1020, 1020))
31305 return false;
31307 /* And a multiple of 4. */
31308 return (range % 4) == 0;
31310 case PRE_INC:
31311 case POST_INC:
31312 case PRE_DEC:
31313 case POST_DEC:
31314 return REG_P (XEXP (op, 0));
31315 default:
31316 gcc_unreachable ();
31318 return false;
31321 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31323 In VFPv1, VFP registers could only be accessed in the mode they were
31324 set, so subregs would be invalid there. However, we don't support
31325 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31327 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31328 VFP registers in little-endian order. We can't describe that accurately to
31329 GCC, so avoid taking subregs of such values.
31331 The only exception is going from a 128-bit to a 64-bit type. In that
31332 case the data layout happens to be consistent for big-endian, so we
31333 explicitly allow that case. */
31335 static bool
31336 arm_can_change_mode_class (machine_mode from, machine_mode to,
31337 reg_class_t rclass)
31339 if (TARGET_BIG_END
31340 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31341 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31342 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31343 && reg_classes_intersect_p (VFP_REGS, rclass))
31344 return false;
31345 return true;
31348 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31349 strcpy from constants will be faster. */
31351 static HOST_WIDE_INT
31352 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31354 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31355 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31356 return MAX (align, BITS_PER_WORD * factor);
31357 return align;
31360 #if CHECKING_P
31361 namespace selftest {
31363 /* Scan the static data tables generated by parsecpu.awk looking for
31364 potential issues with the data. We primarily check for
31365 inconsistencies in the option extensions at present (extensions
31366 that duplicate others but aren't marked as aliases). Furthermore,
31367 for correct canonicalization later options must never be a subset
31368 of an earlier option. Any extension should also only specify other
31369 feature bits and never an architecture bit. The architecture is inferred
31370 from the declaration of the extension. */
31371 static void
31372 arm_test_cpu_arch_data (void)
31374 const arch_option *arch;
31375 const cpu_option *cpu;
31376 auto_sbitmap target_isa (isa_num_bits);
31377 auto_sbitmap isa1 (isa_num_bits);
31378 auto_sbitmap isa2 (isa_num_bits);
31380 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31382 const cpu_arch_extension *ext1, *ext2;
31384 if (arch->common.extensions == NULL)
31385 continue;
31387 arm_initialize_isa (target_isa, arch->common.isa_bits);
31389 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31391 if (ext1->alias)
31392 continue;
31394 arm_initialize_isa (isa1, ext1->isa_bits);
31395 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31397 if (ext2->alias || ext1->remove != ext2->remove)
31398 continue;
31400 arm_initialize_isa (isa2, ext2->isa_bits);
31401 /* If the option is a subset of the parent option, it doesn't
31402 add anything and so isn't useful. */
31403 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31405 /* If the extension specifies any architectural bits then
31406 disallow it. Extensions should only specify feature bits. */
31407 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31412 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31414 const cpu_arch_extension *ext1, *ext2;
31416 if (cpu->common.extensions == NULL)
31417 continue;
31419 arm_initialize_isa (target_isa, arch->common.isa_bits);
31421 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31423 if (ext1->alias)
31424 continue;
31426 arm_initialize_isa (isa1, ext1->isa_bits);
31427 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31429 if (ext2->alias || ext1->remove != ext2->remove)
31430 continue;
31432 arm_initialize_isa (isa2, ext2->isa_bits);
31433 /* If the option is a subset of the parent option, it doesn't
31434 add anything and so isn't useful. */
31435 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31437 /* If the extension specifies any architectural bits then
31438 disallow it. Extensions should only specify feature bits. */
31439 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31445 /* Scan the static data tables generated by parsecpu.awk looking for
31446 potential issues with the data. Here we check for consistency between the
31447 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31448 a feature bit that is not defined by any FPU flag. */
31449 static void
31450 arm_test_fpu_data (void)
31452 auto_sbitmap isa_all_fpubits (isa_num_bits);
31453 auto_sbitmap fpubits (isa_num_bits);
31454 auto_sbitmap tmpset (isa_num_bits);
31456 static const enum isa_feature fpu_bitlist[]
31457 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31458 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31460 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31462 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31463 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31464 bitmap_clear (isa_all_fpubits);
31465 bitmap_copy (isa_all_fpubits, tmpset);
31468 if (!bitmap_empty_p (isa_all_fpubits))
31470 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31471 " group that are not defined by any FPU.\n"
31472 " Check your arm-cpus.in.\n");
31473 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31477 static void
31478 arm_run_selftests (void)
31480 arm_test_cpu_arch_data ();
31481 arm_test_fpu_data ();
31483 } /* Namespace selftest. */
31485 #undef TARGET_RUN_TARGET_SELFTESTS
31486 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31487 #endif /* CHECKING_P */
31489 struct gcc_target targetm = TARGET_INITIALIZER;
31491 #include "gt-arm.h"