PR middle-end/81657
[official-gcc.git] / gcc / config / arm / arm.c
blob08120c6272b5c558e60016c2bd8901b274a45af8
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
85 void (*arm_lang_output_object_attributes_hook)(void);
87 struct four_ints
89 int i[4];
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
632 #endif /* ARM_UNWIND_INFO */
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
820 extern FILE * asm_out_file;
822 /* True if we are currently building a constant table. */
823 int making_const_table;
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
846 /* Active target architecture and tuning. */
848 struct arm_build_target arm_active_target;
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
853 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
854 int arm_arch3m = 0;
856 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
857 int arm_arch4 = 0;
859 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
860 int arm_arch4t = 0;
862 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
863 int arm_arch5 = 0;
865 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
866 int arm_arch5e = 0;
868 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
869 int arm_arch5te = 0;
871 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
872 int arm_arch6 = 0;
874 /* Nonzero if this chip supports the ARM 6K extensions. */
875 int arm_arch6k = 0;
877 /* Nonzero if this chip supports the ARM 6KZ extensions. */
878 int arm_arch6kz = 0;
880 /* Nonzero if instructions present in ARMv6-M can be used. */
881 int arm_arch6m = 0;
883 /* Nonzero if this chip supports the ARM 7 extensions. */
884 int arm_arch7 = 0;
886 /* Nonzero if this chip supports the Large Physical Address Extension. */
887 int arm_arch_lpae = 0;
889 /* Nonzero if instructions not present in the 'M' profile can be used. */
890 int arm_arch_notm = 0;
892 /* Nonzero if instructions present in ARMv7E-M can be used. */
893 int arm_arch7em = 0;
895 /* Nonzero if instructions present in ARMv8 can be used. */
896 int arm_arch8 = 0;
898 /* Nonzero if this chip supports the ARMv8.1 extensions. */
899 int arm_arch8_1 = 0;
901 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
902 int arm_arch8_2 = 0;
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905 Architecture 8.2. */
906 int arm_fp16_inst = 0;
908 /* Nonzero if this chip can benefit from load scheduling. */
909 int arm_ld_sched = 0;
911 /* Nonzero if this chip is a StrongARM. */
912 int arm_tune_strongarm = 0;
914 /* Nonzero if this chip supports Intel Wireless MMX technology. */
915 int arm_arch_iwmmxt = 0;
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
918 int arm_arch_iwmmxt2 = 0;
920 /* Nonzero if this chip is an XScale. */
921 int arm_arch_xscale = 0;
923 /* Nonzero if tuning for XScale */
924 int arm_tune_xscale = 0;
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927 This typically means an ARM6 or ARM7 with MMU or MPU. */
928 int arm_tune_wbuf = 0;
930 /* Nonzero if tuning for Cortex-A9. */
931 int arm_tune_cortex_a9 = 0;
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934 preprocessor.
935 XXX This is a bit of a hack, it's intended to help work around
936 problems in GLD which doesn't understand that armv5t code is
937 interworking clean. */
938 int arm_cpp_interwork = 0;
940 /* Nonzero if chip supports Thumb 1. */
941 int arm_arch_thumb1;
943 /* Nonzero if chip supports Thumb 2. */
944 int arm_arch_thumb2;
946 /* Nonzero if chip supports integer division instruction. */
947 int arm_arch_arm_hwdiv;
948 int arm_arch_thumb_hwdiv;
950 /* Nonzero if chip disallows volatile memory access in IT block. */
951 int arm_arch_no_volatile_ce;
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954 than core registers. */
955 int prefer_neon_for_64bits = 0;
957 /* Nonzero if we shouldn't use literal pools. */
958 bool arm_disable_literal_pool = false;
960 /* The register number to be used for the PIC offset register. */
961 unsigned arm_pic_register = INVALID_REGNUM;
963 enum arm_pcs arm_pcs_default;
965 /* For an explanation of these variables, see final_prescan_insn below. */
966 int arm_ccfsm_state;
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
968 enum arm_cond_code arm_current_cc;
970 rtx arm_target_insn;
971 int arm_target_label;
972 /* The number of conditionally executed insns, including the current insn. */
973 int arm_condexec_count = 0;
974 /* A bitmask specifying the patterns for the IT block.
975 Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask = 0;
977 /* The number of bits used in arm_condexec_mask. */
978 int arm_condexec_masklen = 0;
980 /* Nonzero if chip supports the ARMv8 CRC instructions. */
981 int arm_arch_crc = 0;
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
984 int arm_arch_dotprod = 0;
986 /* Nonzero if chip supports the ARMv8-M security extensions. */
987 int arm_arch_cmse = 0;
989 /* Nonzero if the core has a very small, high-latency, multiply unit. */
990 int arm_m_profile_small_mul = 0;
992 /* The condition codes of the ARM, and the inverse function. */
993 static const char * const arm_condition_codes[] =
995 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1000 int arm_regs_in_sequence[] =
1002 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1008 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1012 /* Initialization code. */
1014 struct cpu_tune
1016 enum processor_type scheduler;
1017 unsigned int tune_flags;
1018 const struct tune_params *tune;
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1024 num_slots, \
1025 l1_size, \
1026 l1_line_size \
1029 /* arm generic vectorizer costs. */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 1, /* vec_unalign_load_cost. */
1040 1, /* vec_unalign_store_cost. */
1041 1, /* vec_store_cost. */
1042 3, /* cond_taken_branch_cost. */
1043 1, /* cond_not_taken_branch_cost. */
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1047 #include "aarch-cost-tables.h"
1051 const struct cpu_cost_table cortexa9_extra_costs =
1053 /* ALU */
1055 0, /* arith. */
1056 0, /* logical. */
1057 0, /* shift. */
1058 COSTS_N_INSNS (1), /* shift_reg. */
1059 COSTS_N_INSNS (1), /* arith_shift. */
1060 COSTS_N_INSNS (2), /* arith_shift_reg. */
1061 0, /* log_shift. */
1062 COSTS_N_INSNS (1), /* log_shift_reg. */
1063 COSTS_N_INSNS (1), /* extend. */
1064 COSTS_N_INSNS (2), /* extend_arith. */
1065 COSTS_N_INSNS (1), /* bfi. */
1066 COSTS_N_INSNS (1), /* bfx. */
1067 0, /* clz. */
1068 0, /* rev. */
1069 0, /* non_exec. */
1070 true /* non_exec_costs_exec. */
1073 /* MULT SImode */
1075 COSTS_N_INSNS (3), /* simple. */
1076 COSTS_N_INSNS (3), /* flag_setting. */
1077 COSTS_N_INSNS (2), /* extend. */
1078 COSTS_N_INSNS (3), /* add. */
1079 COSTS_N_INSNS (2), /* extend_add. */
1080 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1082 /* MULT DImode */
1084 0, /* simple (N/A). */
1085 0, /* flag_setting (N/A). */
1086 COSTS_N_INSNS (4), /* extend. */
1087 0, /* add (N/A). */
1088 COSTS_N_INSNS (4), /* extend_add. */
1089 0 /* idiv (N/A). */
1092 /* LD/ST */
1094 COSTS_N_INSNS (2), /* load. */
1095 COSTS_N_INSNS (2), /* load_sign_extend. */
1096 COSTS_N_INSNS (2), /* ldrd. */
1097 COSTS_N_INSNS (2), /* ldm_1st. */
1098 1, /* ldm_regs_per_insn_1st. */
1099 2, /* ldm_regs_per_insn_subsequent. */
1100 COSTS_N_INSNS (5), /* loadf. */
1101 COSTS_N_INSNS (5), /* loadd. */
1102 COSTS_N_INSNS (1), /* load_unaligned. */
1103 COSTS_N_INSNS (2), /* store. */
1104 COSTS_N_INSNS (2), /* strd. */
1105 COSTS_N_INSNS (2), /* stm_1st. */
1106 1, /* stm_regs_per_insn_1st. */
1107 2, /* stm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* storef. */
1109 COSTS_N_INSNS (1), /* stored. */
1110 COSTS_N_INSNS (1), /* store_unaligned. */
1111 COSTS_N_INSNS (1), /* loadv. */
1112 COSTS_N_INSNS (1) /* storev. */
1115 /* FP SFmode */
1117 COSTS_N_INSNS (14), /* div. */
1118 COSTS_N_INSNS (4), /* mult. */
1119 COSTS_N_INSNS (7), /* mult_addsub. */
1120 COSTS_N_INSNS (30), /* fma. */
1121 COSTS_N_INSNS (3), /* addsub. */
1122 COSTS_N_INSNS (1), /* fpconst. */
1123 COSTS_N_INSNS (1), /* neg. */
1124 COSTS_N_INSNS (3), /* compare. */
1125 COSTS_N_INSNS (3), /* widen. */
1126 COSTS_N_INSNS (3), /* narrow. */
1127 COSTS_N_INSNS (3), /* toint. */
1128 COSTS_N_INSNS (3), /* fromint. */
1129 COSTS_N_INSNS (3) /* roundint. */
1131 /* FP DFmode */
1133 COSTS_N_INSNS (24), /* div. */
1134 COSTS_N_INSNS (5), /* mult. */
1135 COSTS_N_INSNS (8), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (1), /* fpconst. */
1139 COSTS_N_INSNS (1), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1148 /* Vector */
1150 COSTS_N_INSNS (1) /* alu. */
1154 const struct cpu_cost_table cortexa8_extra_costs =
1156 /* ALU */
1158 0, /* arith. */
1159 0, /* logical. */
1160 COSTS_N_INSNS (1), /* shift. */
1161 0, /* shift_reg. */
1162 COSTS_N_INSNS (1), /* arith_shift. */
1163 0, /* arith_shift_reg. */
1164 COSTS_N_INSNS (1), /* log_shift. */
1165 0, /* log_shift_reg. */
1166 0, /* extend. */
1167 0, /* extend_arith. */
1168 0, /* bfi. */
1169 0, /* bfx. */
1170 0, /* clz. */
1171 0, /* rev. */
1172 0, /* non_exec. */
1173 true /* non_exec_costs_exec. */
1176 /* MULT SImode */
1178 COSTS_N_INSNS (1), /* simple. */
1179 COSTS_N_INSNS (1), /* flag_setting. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* add. */
1182 COSTS_N_INSNS (1), /* extend_add. */
1183 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1185 /* MULT DImode */
1187 0, /* simple (N/A). */
1188 0, /* flag_setting (N/A). */
1189 COSTS_N_INSNS (2), /* extend. */
1190 0, /* add (N/A). */
1191 COSTS_N_INSNS (2), /* extend_add. */
1192 0 /* idiv (N/A). */
1195 /* LD/ST */
1197 COSTS_N_INSNS (1), /* load. */
1198 COSTS_N_INSNS (1), /* load_sign_extend. */
1199 COSTS_N_INSNS (1), /* ldrd. */
1200 COSTS_N_INSNS (1), /* ldm_1st. */
1201 1, /* ldm_regs_per_insn_1st. */
1202 2, /* ldm_regs_per_insn_subsequent. */
1203 COSTS_N_INSNS (1), /* loadf. */
1204 COSTS_N_INSNS (1), /* loadd. */
1205 COSTS_N_INSNS (1), /* load_unaligned. */
1206 COSTS_N_INSNS (1), /* store. */
1207 COSTS_N_INSNS (1), /* strd. */
1208 COSTS_N_INSNS (1), /* stm_1st. */
1209 1, /* stm_regs_per_insn_1st. */
1210 2, /* stm_regs_per_insn_subsequent. */
1211 COSTS_N_INSNS (1), /* storef. */
1212 COSTS_N_INSNS (1), /* stored. */
1213 COSTS_N_INSNS (1), /* store_unaligned. */
1214 COSTS_N_INSNS (1), /* loadv. */
1215 COSTS_N_INSNS (1) /* storev. */
1218 /* FP SFmode */
1220 COSTS_N_INSNS (36), /* div. */
1221 COSTS_N_INSNS (11), /* mult. */
1222 COSTS_N_INSNS (20), /* mult_addsub. */
1223 COSTS_N_INSNS (30), /* fma. */
1224 COSTS_N_INSNS (9), /* addsub. */
1225 COSTS_N_INSNS (3), /* fpconst. */
1226 COSTS_N_INSNS (3), /* neg. */
1227 COSTS_N_INSNS (6), /* compare. */
1228 COSTS_N_INSNS (4), /* widen. */
1229 COSTS_N_INSNS (4), /* narrow. */
1230 COSTS_N_INSNS (8), /* toint. */
1231 COSTS_N_INSNS (8), /* fromint. */
1232 COSTS_N_INSNS (8) /* roundint. */
1234 /* FP DFmode */
1236 COSTS_N_INSNS (64), /* div. */
1237 COSTS_N_INSNS (16), /* mult. */
1238 COSTS_N_INSNS (25), /* mult_addsub. */
1239 COSTS_N_INSNS (30), /* fma. */
1240 COSTS_N_INSNS (9), /* addsub. */
1241 COSTS_N_INSNS (3), /* fpconst. */
1242 COSTS_N_INSNS (3), /* neg. */
1243 COSTS_N_INSNS (6), /* compare. */
1244 COSTS_N_INSNS (6), /* widen. */
1245 COSTS_N_INSNS (6), /* narrow. */
1246 COSTS_N_INSNS (8), /* toint. */
1247 COSTS_N_INSNS (8), /* fromint. */
1248 COSTS_N_INSNS (8) /* roundint. */
1251 /* Vector */
1253 COSTS_N_INSNS (1) /* alu. */
1257 const struct cpu_cost_table cortexa5_extra_costs =
1259 /* ALU */
1261 0, /* arith. */
1262 0, /* logical. */
1263 COSTS_N_INSNS (1), /* shift. */
1264 COSTS_N_INSNS (1), /* shift_reg. */
1265 COSTS_N_INSNS (1), /* arith_shift. */
1266 COSTS_N_INSNS (1), /* arith_shift_reg. */
1267 COSTS_N_INSNS (1), /* log_shift. */
1268 COSTS_N_INSNS (1), /* log_shift_reg. */
1269 COSTS_N_INSNS (1), /* extend. */
1270 COSTS_N_INSNS (1), /* extend_arith. */
1271 COSTS_N_INSNS (1), /* bfi. */
1272 COSTS_N_INSNS (1), /* bfx. */
1273 COSTS_N_INSNS (1), /* clz. */
1274 COSTS_N_INSNS (1), /* rev. */
1275 0, /* non_exec. */
1276 true /* non_exec_costs_exec. */
1280 /* MULT SImode */
1282 0, /* simple. */
1283 COSTS_N_INSNS (1), /* flag_setting. */
1284 COSTS_N_INSNS (1), /* extend. */
1285 COSTS_N_INSNS (1), /* add. */
1286 COSTS_N_INSNS (1), /* extend_add. */
1287 COSTS_N_INSNS (7) /* idiv. */
1289 /* MULT DImode */
1291 0, /* simple (N/A). */
1292 0, /* flag_setting (N/A). */
1293 COSTS_N_INSNS (1), /* extend. */
1294 0, /* add. */
1295 COSTS_N_INSNS (2), /* extend_add. */
1296 0 /* idiv (N/A). */
1299 /* LD/ST */
1301 COSTS_N_INSNS (1), /* load. */
1302 COSTS_N_INSNS (1), /* load_sign_extend. */
1303 COSTS_N_INSNS (6), /* ldrd. */
1304 COSTS_N_INSNS (1), /* ldm_1st. */
1305 1, /* ldm_regs_per_insn_1st. */
1306 2, /* ldm_regs_per_insn_subsequent. */
1307 COSTS_N_INSNS (2), /* loadf. */
1308 COSTS_N_INSNS (4), /* loadd. */
1309 COSTS_N_INSNS (1), /* load_unaligned. */
1310 COSTS_N_INSNS (1), /* store. */
1311 COSTS_N_INSNS (3), /* strd. */
1312 COSTS_N_INSNS (1), /* stm_1st. */
1313 1, /* stm_regs_per_insn_1st. */
1314 2, /* stm_regs_per_insn_subsequent. */
1315 COSTS_N_INSNS (2), /* storef. */
1316 COSTS_N_INSNS (2), /* stored. */
1317 COSTS_N_INSNS (1), /* store_unaligned. */
1318 COSTS_N_INSNS (1), /* loadv. */
1319 COSTS_N_INSNS (1) /* storev. */
1322 /* FP SFmode */
1324 COSTS_N_INSNS (15), /* div. */
1325 COSTS_N_INSNS (3), /* mult. */
1326 COSTS_N_INSNS (7), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1338 /* FP DFmode */
1340 COSTS_N_INSNS (30), /* div. */
1341 COSTS_N_INSNS (6), /* mult. */
1342 COSTS_N_INSNS (10), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1355 /* Vector */
1357 COSTS_N_INSNS (1) /* alu. */
1362 const struct cpu_cost_table cortexa7_extra_costs =
1364 /* ALU */
1366 0, /* arith. */
1367 0, /* logical. */
1368 COSTS_N_INSNS (1), /* shift. */
1369 COSTS_N_INSNS (1), /* shift_reg. */
1370 COSTS_N_INSNS (1), /* arith_shift. */
1371 COSTS_N_INSNS (1), /* arith_shift_reg. */
1372 COSTS_N_INSNS (1), /* log_shift. */
1373 COSTS_N_INSNS (1), /* log_shift_reg. */
1374 COSTS_N_INSNS (1), /* extend. */
1375 COSTS_N_INSNS (1), /* extend_arith. */
1376 COSTS_N_INSNS (1), /* bfi. */
1377 COSTS_N_INSNS (1), /* bfx. */
1378 COSTS_N_INSNS (1), /* clz. */
1379 COSTS_N_INSNS (1), /* rev. */
1380 0, /* non_exec. */
1381 true /* non_exec_costs_exec. */
1385 /* MULT SImode */
1387 0, /* simple. */
1388 COSTS_N_INSNS (1), /* flag_setting. */
1389 COSTS_N_INSNS (1), /* extend. */
1390 COSTS_N_INSNS (1), /* add. */
1391 COSTS_N_INSNS (1), /* extend_add. */
1392 COSTS_N_INSNS (7) /* idiv. */
1394 /* MULT DImode */
1396 0, /* simple (N/A). */
1397 0, /* flag_setting (N/A). */
1398 COSTS_N_INSNS (1), /* extend. */
1399 0, /* add. */
1400 COSTS_N_INSNS (2), /* extend_add. */
1401 0 /* idiv (N/A). */
1404 /* LD/ST */
1406 COSTS_N_INSNS (1), /* load. */
1407 COSTS_N_INSNS (1), /* load_sign_extend. */
1408 COSTS_N_INSNS (3), /* ldrd. */
1409 COSTS_N_INSNS (1), /* ldm_1st. */
1410 1, /* ldm_regs_per_insn_1st. */
1411 2, /* ldm_regs_per_insn_subsequent. */
1412 COSTS_N_INSNS (2), /* loadf. */
1413 COSTS_N_INSNS (2), /* loadd. */
1414 COSTS_N_INSNS (1), /* load_unaligned. */
1415 COSTS_N_INSNS (1), /* store. */
1416 COSTS_N_INSNS (3), /* strd. */
1417 COSTS_N_INSNS (1), /* stm_1st. */
1418 1, /* stm_regs_per_insn_1st. */
1419 2, /* stm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (2), /* storef. */
1421 COSTS_N_INSNS (2), /* stored. */
1422 COSTS_N_INSNS (1), /* store_unaligned. */
1423 COSTS_N_INSNS (1), /* loadv. */
1424 COSTS_N_INSNS (1) /* storev. */
1427 /* FP SFmode */
1429 COSTS_N_INSNS (15), /* div. */
1430 COSTS_N_INSNS (3), /* mult. */
1431 COSTS_N_INSNS (7), /* mult_addsub. */
1432 COSTS_N_INSNS (7), /* fma. */
1433 COSTS_N_INSNS (3), /* addsub. */
1434 COSTS_N_INSNS (3), /* fpconst. */
1435 COSTS_N_INSNS (3), /* neg. */
1436 COSTS_N_INSNS (3), /* compare. */
1437 COSTS_N_INSNS (3), /* widen. */
1438 COSTS_N_INSNS (3), /* narrow. */
1439 COSTS_N_INSNS (3), /* toint. */
1440 COSTS_N_INSNS (3), /* fromint. */
1441 COSTS_N_INSNS (3) /* roundint. */
1443 /* FP DFmode */
1445 COSTS_N_INSNS (30), /* div. */
1446 COSTS_N_INSNS (6), /* mult. */
1447 COSTS_N_INSNS (10), /* mult_addsub. */
1448 COSTS_N_INSNS (7), /* fma. */
1449 COSTS_N_INSNS (3), /* addsub. */
1450 COSTS_N_INSNS (3), /* fpconst. */
1451 COSTS_N_INSNS (3), /* neg. */
1452 COSTS_N_INSNS (3), /* compare. */
1453 COSTS_N_INSNS (3), /* widen. */
1454 COSTS_N_INSNS (3), /* narrow. */
1455 COSTS_N_INSNS (3), /* toint. */
1456 COSTS_N_INSNS (3), /* fromint. */
1457 COSTS_N_INSNS (3) /* roundint. */
1460 /* Vector */
1462 COSTS_N_INSNS (1) /* alu. */
1466 const struct cpu_cost_table cortexa12_extra_costs =
1468 /* ALU */
1470 0, /* arith. */
1471 0, /* logical. */
1472 0, /* shift. */
1473 COSTS_N_INSNS (1), /* shift_reg. */
1474 COSTS_N_INSNS (1), /* arith_shift. */
1475 COSTS_N_INSNS (1), /* arith_shift_reg. */
1476 COSTS_N_INSNS (1), /* log_shift. */
1477 COSTS_N_INSNS (1), /* log_shift_reg. */
1478 0, /* extend. */
1479 COSTS_N_INSNS (1), /* extend_arith. */
1480 0, /* bfi. */
1481 COSTS_N_INSNS (1), /* bfx. */
1482 COSTS_N_INSNS (1), /* clz. */
1483 COSTS_N_INSNS (1), /* rev. */
1484 0, /* non_exec. */
1485 true /* non_exec_costs_exec. */
1487 /* MULT SImode */
1490 COSTS_N_INSNS (2), /* simple. */
1491 COSTS_N_INSNS (3), /* flag_setting. */
1492 COSTS_N_INSNS (2), /* extend. */
1493 COSTS_N_INSNS (3), /* add. */
1494 COSTS_N_INSNS (2), /* extend_add. */
1495 COSTS_N_INSNS (18) /* idiv. */
1497 /* MULT DImode */
1499 0, /* simple (N/A). */
1500 0, /* flag_setting (N/A). */
1501 COSTS_N_INSNS (3), /* extend. */
1502 0, /* add (N/A). */
1503 COSTS_N_INSNS (3), /* extend_add. */
1504 0 /* idiv (N/A). */
1507 /* LD/ST */
1509 COSTS_N_INSNS (3), /* load. */
1510 COSTS_N_INSNS (3), /* load_sign_extend. */
1511 COSTS_N_INSNS (3), /* ldrd. */
1512 COSTS_N_INSNS (3), /* ldm_1st. */
1513 1, /* ldm_regs_per_insn_1st. */
1514 2, /* ldm_regs_per_insn_subsequent. */
1515 COSTS_N_INSNS (3), /* loadf. */
1516 COSTS_N_INSNS (3), /* loadd. */
1517 0, /* load_unaligned. */
1518 0, /* store. */
1519 0, /* strd. */
1520 0, /* stm_1st. */
1521 1, /* stm_regs_per_insn_1st. */
1522 2, /* stm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (2), /* storef. */
1524 COSTS_N_INSNS (2), /* stored. */
1525 0, /* store_unaligned. */
1526 COSTS_N_INSNS (1), /* loadv. */
1527 COSTS_N_INSNS (1) /* storev. */
1530 /* FP SFmode */
1532 COSTS_N_INSNS (17), /* div. */
1533 COSTS_N_INSNS (4), /* mult. */
1534 COSTS_N_INSNS (8), /* mult_addsub. */
1535 COSTS_N_INSNS (8), /* fma. */
1536 COSTS_N_INSNS (4), /* addsub. */
1537 COSTS_N_INSNS (2), /* fpconst. */
1538 COSTS_N_INSNS (2), /* neg. */
1539 COSTS_N_INSNS (2), /* compare. */
1540 COSTS_N_INSNS (4), /* widen. */
1541 COSTS_N_INSNS (4), /* narrow. */
1542 COSTS_N_INSNS (4), /* toint. */
1543 COSTS_N_INSNS (4), /* fromint. */
1544 COSTS_N_INSNS (4) /* roundint. */
1546 /* FP DFmode */
1548 COSTS_N_INSNS (31), /* div. */
1549 COSTS_N_INSNS (4), /* mult. */
1550 COSTS_N_INSNS (8), /* mult_addsub. */
1551 COSTS_N_INSNS (8), /* fma. */
1552 COSTS_N_INSNS (4), /* addsub. */
1553 COSTS_N_INSNS (2), /* fpconst. */
1554 COSTS_N_INSNS (2), /* neg. */
1555 COSTS_N_INSNS (2), /* compare. */
1556 COSTS_N_INSNS (4), /* widen. */
1557 COSTS_N_INSNS (4), /* narrow. */
1558 COSTS_N_INSNS (4), /* toint. */
1559 COSTS_N_INSNS (4), /* fromint. */
1560 COSTS_N_INSNS (4) /* roundint. */
1563 /* Vector */
1565 COSTS_N_INSNS (1) /* alu. */
1569 const struct cpu_cost_table cortexa15_extra_costs =
1571 /* ALU */
1573 0, /* arith. */
1574 0, /* logical. */
1575 0, /* shift. */
1576 0, /* shift_reg. */
1577 COSTS_N_INSNS (1), /* arith_shift. */
1578 COSTS_N_INSNS (1), /* arith_shift_reg. */
1579 COSTS_N_INSNS (1), /* log_shift. */
1580 COSTS_N_INSNS (1), /* log_shift_reg. */
1581 0, /* extend. */
1582 COSTS_N_INSNS (1), /* extend_arith. */
1583 COSTS_N_INSNS (1), /* bfi. */
1584 0, /* bfx. */
1585 0, /* clz. */
1586 0, /* rev. */
1587 0, /* non_exec. */
1588 true /* non_exec_costs_exec. */
1590 /* MULT SImode */
1593 COSTS_N_INSNS (2), /* simple. */
1594 COSTS_N_INSNS (3), /* flag_setting. */
1595 COSTS_N_INSNS (2), /* extend. */
1596 COSTS_N_INSNS (2), /* add. */
1597 COSTS_N_INSNS (2), /* extend_add. */
1598 COSTS_N_INSNS (18) /* idiv. */
1600 /* MULT DImode */
1602 0, /* simple (N/A). */
1603 0, /* flag_setting (N/A). */
1604 COSTS_N_INSNS (3), /* extend. */
1605 0, /* add (N/A). */
1606 COSTS_N_INSNS (3), /* extend_add. */
1607 0 /* idiv (N/A). */
1610 /* LD/ST */
1612 COSTS_N_INSNS (3), /* load. */
1613 COSTS_N_INSNS (3), /* load_sign_extend. */
1614 COSTS_N_INSNS (3), /* ldrd. */
1615 COSTS_N_INSNS (4), /* ldm_1st. */
1616 1, /* ldm_regs_per_insn_1st. */
1617 2, /* ldm_regs_per_insn_subsequent. */
1618 COSTS_N_INSNS (4), /* loadf. */
1619 COSTS_N_INSNS (4), /* loadd. */
1620 0, /* load_unaligned. */
1621 0, /* store. */
1622 0, /* strd. */
1623 COSTS_N_INSNS (1), /* stm_1st. */
1624 1, /* stm_regs_per_insn_1st. */
1625 2, /* stm_regs_per_insn_subsequent. */
1626 0, /* storef. */
1627 0, /* stored. */
1628 0, /* store_unaligned. */
1629 COSTS_N_INSNS (1), /* loadv. */
1630 COSTS_N_INSNS (1) /* storev. */
1633 /* FP SFmode */
1635 COSTS_N_INSNS (17), /* div. */
1636 COSTS_N_INSNS (4), /* mult. */
1637 COSTS_N_INSNS (8), /* mult_addsub. */
1638 COSTS_N_INSNS (8), /* fma. */
1639 COSTS_N_INSNS (4), /* addsub. */
1640 COSTS_N_INSNS (2), /* fpconst. */
1641 COSTS_N_INSNS (2), /* neg. */
1642 COSTS_N_INSNS (5), /* compare. */
1643 COSTS_N_INSNS (4), /* widen. */
1644 COSTS_N_INSNS (4), /* narrow. */
1645 COSTS_N_INSNS (4), /* toint. */
1646 COSTS_N_INSNS (4), /* fromint. */
1647 COSTS_N_INSNS (4) /* roundint. */
1649 /* FP DFmode */
1651 COSTS_N_INSNS (31), /* div. */
1652 COSTS_N_INSNS (4), /* mult. */
1653 COSTS_N_INSNS (8), /* mult_addsub. */
1654 COSTS_N_INSNS (8), /* fma. */
1655 COSTS_N_INSNS (4), /* addsub. */
1656 COSTS_N_INSNS (2), /* fpconst. */
1657 COSTS_N_INSNS (2), /* neg. */
1658 COSTS_N_INSNS (2), /* compare. */
1659 COSTS_N_INSNS (4), /* widen. */
1660 COSTS_N_INSNS (4), /* narrow. */
1661 COSTS_N_INSNS (4), /* toint. */
1662 COSTS_N_INSNS (4), /* fromint. */
1663 COSTS_N_INSNS (4) /* roundint. */
1666 /* Vector */
1668 COSTS_N_INSNS (1) /* alu. */
1672 const struct cpu_cost_table v7m_extra_costs =
1674 /* ALU */
1676 0, /* arith. */
1677 0, /* logical. */
1678 0, /* shift. */
1679 0, /* shift_reg. */
1680 0, /* arith_shift. */
1681 COSTS_N_INSNS (1), /* arith_shift_reg. */
1682 0, /* log_shift. */
1683 COSTS_N_INSNS (1), /* log_shift_reg. */
1684 0, /* extend. */
1685 COSTS_N_INSNS (1), /* extend_arith. */
1686 0, /* bfi. */
1687 0, /* bfx. */
1688 0, /* clz. */
1689 0, /* rev. */
1690 COSTS_N_INSNS (1), /* non_exec. */
1691 false /* non_exec_costs_exec. */
1694 /* MULT SImode */
1696 COSTS_N_INSNS (1), /* simple. */
1697 COSTS_N_INSNS (1), /* flag_setting. */
1698 COSTS_N_INSNS (2), /* extend. */
1699 COSTS_N_INSNS (1), /* add. */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 COSTS_N_INSNS (8) /* idiv. */
1703 /* MULT DImode */
1705 0, /* simple (N/A). */
1706 0, /* flag_setting (N/A). */
1707 COSTS_N_INSNS (2), /* extend. */
1708 0, /* add (N/A). */
1709 COSTS_N_INSNS (3), /* extend_add. */
1710 0 /* idiv (N/A). */
1713 /* LD/ST */
1715 COSTS_N_INSNS (2), /* load. */
1716 0, /* load_sign_extend. */
1717 COSTS_N_INSNS (3), /* ldrd. */
1718 COSTS_N_INSNS (2), /* ldm_1st. */
1719 1, /* ldm_regs_per_insn_1st. */
1720 1, /* ldm_regs_per_insn_subsequent. */
1721 COSTS_N_INSNS (2), /* loadf. */
1722 COSTS_N_INSNS (3), /* loadd. */
1723 COSTS_N_INSNS (1), /* load_unaligned. */
1724 COSTS_N_INSNS (2), /* store. */
1725 COSTS_N_INSNS (3), /* strd. */
1726 COSTS_N_INSNS (2), /* stm_1st. */
1727 1, /* stm_regs_per_insn_1st. */
1728 1, /* stm_regs_per_insn_subsequent. */
1729 COSTS_N_INSNS (2), /* storef. */
1730 COSTS_N_INSNS (3), /* stored. */
1731 COSTS_N_INSNS (1), /* store_unaligned. */
1732 COSTS_N_INSNS (1), /* loadv. */
1733 COSTS_N_INSNS (1) /* storev. */
1736 /* FP SFmode */
1738 COSTS_N_INSNS (7), /* div. */
1739 COSTS_N_INSNS (2), /* mult. */
1740 COSTS_N_INSNS (5), /* mult_addsub. */
1741 COSTS_N_INSNS (3), /* fma. */
1742 COSTS_N_INSNS (1), /* addsub. */
1743 0, /* fpconst. */
1744 0, /* neg. */
1745 0, /* compare. */
1746 0, /* widen. */
1747 0, /* narrow. */
1748 0, /* toint. */
1749 0, /* fromint. */
1750 0 /* roundint. */
1752 /* FP DFmode */
1754 COSTS_N_INSNS (15), /* div. */
1755 COSTS_N_INSNS (5), /* mult. */
1756 COSTS_N_INSNS (7), /* mult_addsub. */
1757 COSTS_N_INSNS (7), /* fma. */
1758 COSTS_N_INSNS (3), /* addsub. */
1759 0, /* fpconst. */
1760 0, /* neg. */
1761 0, /* compare. */
1762 0, /* widen. */
1763 0, /* narrow. */
1764 0, /* toint. */
1765 0, /* fromint. */
1766 0 /* roundint. */
1769 /* Vector */
1771 COSTS_N_INSNS (1) /* alu. */
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1777 /* int. */
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1783 /* float. */
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1789 /* vector. */
1791 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1792 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1793 COSTS_N_INSNS (0) /* AMO_WB. */
1797 const struct tune_params arm_slowmul_tune =
1799 &generic_extra_costs, /* Insn extra costs. */
1800 &generic_addr_mode_costs, /* Addressing mode costs. */
1801 NULL, /* Sched adj cost. */
1802 arm_default_branch_cost,
1803 &arm_default_vec_cost,
1804 3, /* Constant limit. */
1805 5, /* Max cond insns. */
1806 8, /* Memset max inline. */
1807 1, /* Issue rate. */
1808 ARM_PREFETCH_NOT_BENEFICIAL,
1809 tune_params::PREF_CONST_POOL_TRUE,
1810 tune_params::PREF_LDRD_FALSE,
1811 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1813 tune_params::DISPARAGE_FLAGS_NEITHER,
1814 tune_params::PREF_NEON_64_FALSE,
1815 tune_params::PREF_NEON_STRINGOPS_FALSE,
1816 tune_params::FUSE_NOTHING,
1817 tune_params::SCHED_AUTOPREF_OFF
1820 const struct tune_params arm_fastmul_tune =
1822 &generic_extra_costs, /* Insn extra costs. */
1823 &generic_addr_mode_costs, /* Addressing mode costs. */
1824 NULL, /* Sched adj cost. */
1825 arm_default_branch_cost,
1826 &arm_default_vec_cost,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 8, /* Memset max inline. */
1830 1, /* Issue rate. */
1831 ARM_PREFETCH_NOT_BENEFICIAL,
1832 tune_params::PREF_CONST_POOL_TRUE,
1833 tune_params::PREF_LDRD_FALSE,
1834 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1836 tune_params::DISPARAGE_FLAGS_NEITHER,
1837 tune_params::PREF_NEON_64_FALSE,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE,
1839 tune_params::FUSE_NOTHING,
1840 tune_params::SCHED_AUTOPREF_OFF
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1846 const struct tune_params arm_strongarm_tune =
1848 &generic_extra_costs, /* Insn extra costs. */
1849 &generic_addr_mode_costs, /* Addressing mode costs. */
1850 NULL, /* Sched adj cost. */
1851 arm_default_branch_cost,
1852 &arm_default_vec_cost,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 tune_params::PREF_CONST_POOL_TRUE,
1859 tune_params::PREF_LDRD_FALSE,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER,
1863 tune_params::PREF_NEON_64_FALSE,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE,
1865 tune_params::FUSE_NOTHING,
1866 tune_params::SCHED_AUTOPREF_OFF
1869 const struct tune_params arm_xscale_tune =
1871 &generic_extra_costs, /* Insn extra costs. */
1872 &generic_addr_mode_costs, /* Addressing mode costs. */
1873 xscale_sched_adjust_cost,
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 2, /* Constant limit. */
1877 3, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_TRUE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1892 const struct tune_params arm_9e_tune =
1894 &generic_extra_costs, /* Insn extra costs. */
1895 &generic_addr_mode_costs, /* Addressing mode costs. */
1896 NULL, /* Sched adj cost. */
1897 arm_default_branch_cost,
1898 &arm_default_vec_cost,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 1, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL,
1904 tune_params::PREF_CONST_POOL_TRUE,
1905 tune_params::PREF_LDRD_FALSE,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER,
1909 tune_params::PREF_NEON_64_FALSE,
1910 tune_params::PREF_NEON_STRINGOPS_FALSE,
1911 tune_params::FUSE_NOTHING,
1912 tune_params::SCHED_AUTOPREF_OFF
1915 const struct tune_params arm_marvell_pj4_tune =
1917 &generic_extra_costs, /* Insn extra costs. */
1918 &generic_addr_mode_costs, /* Addressing mode costs. */
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_TRUE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_FALSE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_v6t2_tune =
1940 &generic_extra_costs, /* Insn extra costs. */
1941 &generic_addr_mode_costs, /* Addressing mode costs. */
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_FALSE,
1957 tune_params::FUSE_NOTHING,
1958 tune_params::SCHED_AUTOPREF_OFF
1962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1963 const struct tune_params arm_cortex_tune =
1965 &generic_extra_costs,
1966 &generic_addr_mode_costs, /* Addressing mode costs. */
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_FALSE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_64_FALSE,
1981 tune_params::PREF_NEON_STRINGOPS_FALSE,
1982 tune_params::FUSE_NOTHING,
1983 tune_params::SCHED_AUTOPREF_OFF
1986 const struct tune_params arm_cortex_a8_tune =
1988 &cortexa8_extra_costs,
1989 &generic_addr_mode_costs, /* Addressing mode costs. */
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 2, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_FALSE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 tune_params::FUSE_NOTHING,
2006 tune_params::SCHED_AUTOPREF_OFF
2009 const struct tune_params arm_cortex_a7_tune =
2011 &cortexa7_extra_costs,
2012 &generic_addr_mode_costs, /* Addressing mode costs. */
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 2, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_FALSE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_cortex_a15_tune =
2034 &cortexa15_extra_costs,
2035 &generic_addr_mode_costs, /* Addressing mode costs. */
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 8, /* Memset max inline. */
2042 3, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_TRUE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_FULL
2055 const struct tune_params arm_cortex_a35_tune =
2057 &cortexa53_extra_costs,
2058 &generic_addr_mode_costs, /* Addressing mode costs. */
2059 NULL, /* Sched adj cost. */
2060 arm_default_branch_cost,
2061 &arm_default_vec_cost,
2062 1, /* Constant limit. */
2063 5, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 1, /* Issue rate. */
2066 ARM_PREFETCH_NOT_BENEFICIAL,
2067 tune_params::PREF_CONST_POOL_FALSE,
2068 tune_params::PREF_LDRD_FALSE,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_NEITHER,
2072 tune_params::PREF_NEON_64_FALSE,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075 tune_params::SCHED_AUTOPREF_OFF
2078 const struct tune_params arm_cortex_a53_tune =
2080 &cortexa53_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098 tune_params::SCHED_AUTOPREF_OFF
2101 const struct tune_params arm_cortex_a57_tune =
2103 &cortexa57_extra_costs,
2104 &generic_addr_mode_costs, /* addressing mode costs */
2105 NULL, /* Sched adj cost. */
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 3, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_TRUE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_ALL,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_TRUE,
2120 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121 tune_params::SCHED_AUTOPREF_FULL
2124 const struct tune_params arm_exynosm1_tune =
2126 &exynosm1_extra_costs,
2127 &generic_addr_mode_costs, /* Addressing mode costs. */
2128 NULL, /* Sched adj cost. */
2129 arm_default_branch_cost,
2130 &arm_default_vec_cost,
2131 1, /* Constant limit. */
2132 2, /* Max cond insns. */
2133 8, /* Memset max inline. */
2134 3, /* Issue rate. */
2135 ARM_PREFETCH_NOT_BENEFICIAL,
2136 tune_params::PREF_CONST_POOL_FALSE,
2137 tune_params::PREF_LDRD_TRUE,
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2140 tune_params::DISPARAGE_FLAGS_ALL,
2141 tune_params::PREF_NEON_64_FALSE,
2142 tune_params::PREF_NEON_STRINGOPS_TRUE,
2143 tune_params::FUSE_NOTHING,
2144 tune_params::SCHED_AUTOPREF_OFF
2147 const struct tune_params arm_xgene1_tune =
2149 &xgene1_extra_costs,
2150 &generic_addr_mode_costs, /* Addressing mode costs. */
2151 NULL, /* Sched adj cost. */
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 32, /* Memset max inline. */
2157 4, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL,
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_TRUE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171 less appealing. Set max_insns_skipped to a low value. */
2173 const struct tune_params arm_cortex_a5_tune =
2175 &cortexa5_extra_costs,
2176 &generic_addr_mode_costs, /* Addressing mode costs. */
2177 NULL, /* Sched adj cost. */
2178 arm_cortex_a5_branch_cost,
2179 &arm_default_vec_cost,
2180 1, /* Constant limit. */
2181 1, /* Max cond insns. */
2182 8, /* Memset max inline. */
2183 2, /* Issue rate. */
2184 ARM_PREFETCH_NOT_BENEFICIAL,
2185 tune_params::PREF_CONST_POOL_FALSE,
2186 tune_params::PREF_LDRD_FALSE,
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2189 tune_params::DISPARAGE_FLAGS_NEITHER,
2190 tune_params::PREF_NEON_64_FALSE,
2191 tune_params::PREF_NEON_STRINGOPS_TRUE,
2192 tune_params::FUSE_NOTHING,
2193 tune_params::SCHED_AUTOPREF_OFF
2196 const struct tune_params arm_cortex_a9_tune =
2198 &cortexa9_extra_costs,
2199 &generic_addr_mode_costs, /* Addressing mode costs. */
2200 cortex_a9_sched_adjust_cost,
2201 arm_default_branch_cost,
2202 &arm_default_vec_cost,
2203 1, /* Constant limit. */
2204 5, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_BENEFICIAL(4,32,32),
2208 tune_params::PREF_CONST_POOL_FALSE,
2209 tune_params::PREF_LDRD_FALSE,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_NEITHER,
2213 tune_params::PREF_NEON_64_FALSE,
2214 tune_params::PREF_NEON_STRINGOPS_FALSE,
2215 tune_params::FUSE_NOTHING,
2216 tune_params::SCHED_AUTOPREF_OFF
2219 const struct tune_params arm_cortex_a12_tune =
2221 &cortexa12_extra_costs,
2222 &generic_addr_mode_costs, /* Addressing mode costs. */
2223 NULL, /* Sched adj cost. */
2224 arm_default_branch_cost,
2225 &arm_default_vec_cost, /* Vectorizer costs. */
2226 1, /* Constant limit. */
2227 2, /* Max cond insns. */
2228 8, /* Memset max inline. */
2229 2, /* Issue rate. */
2230 ARM_PREFETCH_NOT_BENEFICIAL,
2231 tune_params::PREF_CONST_POOL_FALSE,
2232 tune_params::PREF_LDRD_TRUE,
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2235 tune_params::DISPARAGE_FLAGS_ALL,
2236 tune_params::PREF_NEON_64_FALSE,
2237 tune_params::PREF_NEON_STRINGOPS_TRUE,
2238 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239 tune_params::SCHED_AUTOPREF_OFF
2242 const struct tune_params arm_cortex_a73_tune =
2244 &cortexa57_extra_costs,
2245 &generic_addr_mode_costs, /* Addressing mode costs. */
2246 NULL, /* Sched adj cost. */
2247 arm_default_branch_cost,
2248 &arm_default_vec_cost, /* Vectorizer costs. */
2249 1, /* Constant limit. */
2250 2, /* Max cond insns. */
2251 8, /* Memset max inline. */
2252 2, /* Issue rate. */
2253 ARM_PREFETCH_NOT_BENEFICIAL,
2254 tune_params::PREF_CONST_POOL_FALSE,
2255 tune_params::PREF_LDRD_TRUE,
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2258 tune_params::DISPARAGE_FLAGS_ALL,
2259 tune_params::PREF_NEON_64_FALSE,
2260 tune_params::PREF_NEON_STRINGOPS_TRUE,
2261 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262 tune_params::SCHED_AUTOPREF_FULL
2265 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2266 cycle to execute each. An LDR from the constant pool also takes two cycles
2267 to execute, but mildly increases pipelining opportunity (consecutive
2268 loads/stores can be pipelined together, saving one cycle), and may also
2269 improve icache utilisation. Hence we prefer the constant pool for such
2270 processors. */
2272 const struct tune_params arm_v7m_tune =
2274 &v7m_extra_costs,
2275 &generic_addr_mode_costs, /* Addressing mode costs. */
2276 NULL, /* Sched adj cost. */
2277 arm_cortex_m_branch_cost,
2278 &arm_default_vec_cost,
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 1, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_TRUE,
2285 tune_params::PREF_LDRD_FALSE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER,
2289 tune_params::PREF_NEON_64_FALSE,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE,
2291 tune_params::FUSE_NOTHING,
2292 tune_params::SCHED_AUTOPREF_OFF
2295 /* Cortex-M7 tuning. */
2297 const struct tune_params arm_cortex_m7_tune =
2299 &v7m_extra_costs,
2300 &generic_addr_mode_costs, /* Addressing mode costs. */
2301 NULL, /* Sched adj cost. */
2302 arm_cortex_m7_branch_cost,
2303 &arm_default_vec_cost,
2304 0, /* Constant limit. */
2305 1, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 2, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL,
2309 tune_params::PREF_CONST_POOL_TRUE,
2310 tune_params::PREF_LDRD_FALSE,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER,
2314 tune_params::PREF_NEON_64_FALSE,
2315 tune_params::PREF_NEON_STRINGOPS_FALSE,
2316 tune_params::FUSE_NOTHING,
2317 tune_params::SCHED_AUTOPREF_OFF
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322 cortex-m23. */
2323 const struct tune_params arm_v6m_tune =
2325 &generic_extra_costs, /* Insn extra costs. */
2326 &generic_addr_mode_costs, /* Addressing mode costs. */
2327 NULL, /* Sched adj cost. */
2328 arm_default_branch_cost,
2329 &arm_default_vec_cost, /* Vectorizer costs. */
2330 1, /* Constant limit. */
2331 5, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL,
2335 tune_params::PREF_CONST_POOL_FALSE,
2336 tune_params::PREF_LDRD_FALSE,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER,
2340 tune_params::PREF_NEON_64_FALSE,
2341 tune_params::PREF_NEON_STRINGOPS_FALSE,
2342 tune_params::FUSE_NOTHING,
2343 tune_params::SCHED_AUTOPREF_OFF
2346 const struct tune_params arm_fa726te_tune =
2348 &generic_extra_costs, /* Insn extra costs. */
2349 &generic_addr_mode_costs, /* Addressing mode costs. */
2350 fa726te_sched_adjust_cost,
2351 arm_default_branch_cost,
2352 &arm_default_vec_cost,
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 2, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL,
2358 tune_params::PREF_CONST_POOL_TRUE,
2359 tune_params::PREF_LDRD_FALSE,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER,
2363 tune_params::PREF_NEON_64_FALSE,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE,
2365 tune_params::FUSE_NOTHING,
2366 tune_params::SCHED_AUTOPREF_OFF
2369 /* Auto-generated CPU, FPU and architecture tables. */
2370 #include "arm-cpu-data.h"
2372 /* The name of the preprocessor macro to define for this architecture. PROFILE
2373 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374 is thus chosen to be big enough to hold the longest architecture name. */
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2378 /* Supported TLS relocations. */
2380 enum tls_reloc {
2381 TLS_GD32,
2382 TLS_LDM32,
2383 TLS_LDO32,
2384 TLS_IE32,
2385 TLS_LE32,
2386 TLS_DESCSEQ /* GNU scheme */
2389 /* The maximum number of insns to be used when loading a constant. */
2390 inline static int
2391 arm_constant_limit (bool size_p)
2393 return size_p ? 1 : current_tune->constant_limit;
2396 /* Emit an insn that's a simple single-set. Both the operands must be known
2397 to be valid. */
2398 inline static rtx_insn *
2399 emit_set_insn (rtx x, rtx y)
2401 return emit_insn (gen_rtx_SET (x, y));
2404 /* Return the number of bits set in VALUE. */
2405 static unsigned
2406 bit_count (unsigned long value)
2408 unsigned long count = 0;
2410 while (value)
2412 count++;
2413 value &= value - 1; /* Clear the least-significant set bit. */
2416 return count;
2419 /* Return the number of bits set in BMAP. */
2420 static unsigned
2421 bitmap_popcount (const sbitmap bmap)
2423 unsigned int count = 0;
2424 unsigned int n = 0;
2425 sbitmap_iterator sbi;
2427 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428 count++;
2429 return count;
2432 typedef struct
2434 machine_mode mode;
2435 const char *name;
2436 } arm_fixed_mode_set;
2438 /* A small helper for setting fixed-point library libfuncs. */
2440 static void
2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442 const char *funcname, const char *modename,
2443 int num_suffix)
2445 char buffer[50];
2447 if (num_suffix == 0)
2448 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449 else
2450 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2452 set_optab_libfunc (optable, mode, buffer);
2455 static void
2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457 machine_mode from, const char *funcname,
2458 const char *toname, const char *fromname)
2460 char buffer[50];
2461 const char *maybe_suffix_2 = "";
2463 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2464 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467 maybe_suffix_2 = "2";
2469 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470 maybe_suffix_2);
2472 set_conv_libfunc (optable, to, from, buffer);
2475 /* Set up library functions unique to ARM. */
2477 static void
2478 arm_init_libfuncs (void)
2480 /* For Linux, we have access to kernel support for atomic operations. */
2481 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2482 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2484 /* There are no special library functions unless we are using the
2485 ARM BPABI. */
2486 if (!TARGET_BPABI)
2487 return;
2489 /* The functions below are described in Section 4 of the "Run-Time
2490 ABI for the ARM architecture", Version 1.0. */
2492 /* Double-precision floating-point arithmetic. Table 2. */
2493 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2494 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2495 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2496 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2497 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2499 /* Double-precision comparisons. Table 3. */
2500 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2501 set_optab_libfunc (ne_optab, DFmode, NULL);
2502 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2503 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2504 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2505 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2506 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2508 /* Single-precision floating-point arithmetic. Table 4. */
2509 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2510 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2511 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2512 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2513 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2515 /* Single-precision comparisons. Table 5. */
2516 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2517 set_optab_libfunc (ne_optab, SFmode, NULL);
2518 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2519 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2520 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2521 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2522 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2524 /* Floating-point to integer conversions. Table 6. */
2525 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2526 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2527 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2528 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2529 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2530 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2531 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2532 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2534 /* Conversions between floating types. Table 7. */
2535 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2536 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2538 /* Integer to floating-point conversions. Table 8. */
2539 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2540 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2541 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2542 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2543 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2544 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2545 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2546 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2548 /* Long long. Table 9. */
2549 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2550 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2551 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2552 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2553 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2554 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2555 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2556 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2558 /* Integer (32/32->32) division. \S 4.3.1. */
2559 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2560 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2562 /* The divmod functions are designed so that they can be used for
2563 plain division, even though they return both the quotient and the
2564 remainder. The quotient is returned in the usual location (i.e.,
2565 r0 for SImode, {r0, r1} for DImode), just as would be expected
2566 for an ordinary division routine. Because the AAPCS calling
2567 conventions specify that all of { r0, r1, r2, r3 } are
2568 callee-saved registers, there is no need to tell the compiler
2569 explicitly that those registers are clobbered by these
2570 routines. */
2571 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2572 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2574 /* For SImode division the ABI provides div-without-mod routines,
2575 which are faster. */
2576 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2577 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2579 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2580 divmod libcalls instead. */
2581 set_optab_libfunc (smod_optab, DImode, NULL);
2582 set_optab_libfunc (umod_optab, DImode, NULL);
2583 set_optab_libfunc (smod_optab, SImode, NULL);
2584 set_optab_libfunc (umod_optab, SImode, NULL);
2586 /* Half-precision float operations. The compiler handles all operations
2587 with NULL libfuncs by converting the SFmode. */
2588 switch (arm_fp16_format)
2590 case ARM_FP16_FORMAT_IEEE:
2591 case ARM_FP16_FORMAT_ALTERNATIVE:
2593 /* Conversions. */
2594 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2595 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596 ? "__gnu_f2h_ieee"
2597 : "__gnu_f2h_alternative"));
2598 set_conv_libfunc (sext_optab, SFmode, HFmode,
2599 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2600 ? "__gnu_h2f_ieee"
2601 : "__gnu_h2f_alternative"));
2603 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2604 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2605 ? "__gnu_d2h_ieee"
2606 : "__gnu_d2h_alternative"));
2608 /* Arithmetic. */
2609 set_optab_libfunc (add_optab, HFmode, NULL);
2610 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2611 set_optab_libfunc (smul_optab, HFmode, NULL);
2612 set_optab_libfunc (neg_optab, HFmode, NULL);
2613 set_optab_libfunc (sub_optab, HFmode, NULL);
2615 /* Comparisons. */
2616 set_optab_libfunc (eq_optab, HFmode, NULL);
2617 set_optab_libfunc (ne_optab, HFmode, NULL);
2618 set_optab_libfunc (lt_optab, HFmode, NULL);
2619 set_optab_libfunc (le_optab, HFmode, NULL);
2620 set_optab_libfunc (ge_optab, HFmode, NULL);
2621 set_optab_libfunc (gt_optab, HFmode, NULL);
2622 set_optab_libfunc (unord_optab, HFmode, NULL);
2623 break;
2625 default:
2626 break;
2629 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2631 const arm_fixed_mode_set fixed_arith_modes[] =
2633 { E_QQmode, "qq" },
2634 { E_UQQmode, "uqq" },
2635 { E_HQmode, "hq" },
2636 { E_UHQmode, "uhq" },
2637 { E_SQmode, "sq" },
2638 { E_USQmode, "usq" },
2639 { E_DQmode, "dq" },
2640 { E_UDQmode, "udq" },
2641 { E_TQmode, "tq" },
2642 { E_UTQmode, "utq" },
2643 { E_HAmode, "ha" },
2644 { E_UHAmode, "uha" },
2645 { E_SAmode, "sa" },
2646 { E_USAmode, "usa" },
2647 { E_DAmode, "da" },
2648 { E_UDAmode, "uda" },
2649 { E_TAmode, "ta" },
2650 { E_UTAmode, "uta" }
2652 const arm_fixed_mode_set fixed_conv_modes[] =
2654 { E_QQmode, "qq" },
2655 { E_UQQmode, "uqq" },
2656 { E_HQmode, "hq" },
2657 { E_UHQmode, "uhq" },
2658 { E_SQmode, "sq" },
2659 { E_USQmode, "usq" },
2660 { E_DQmode, "dq" },
2661 { E_UDQmode, "udq" },
2662 { E_TQmode, "tq" },
2663 { E_UTQmode, "utq" },
2664 { E_HAmode, "ha" },
2665 { E_UHAmode, "uha" },
2666 { E_SAmode, "sa" },
2667 { E_USAmode, "usa" },
2668 { E_DAmode, "da" },
2669 { E_UDAmode, "uda" },
2670 { E_TAmode, "ta" },
2671 { E_UTAmode, "uta" },
2672 { E_QImode, "qi" },
2673 { E_HImode, "hi" },
2674 { E_SImode, "si" },
2675 { E_DImode, "di" },
2676 { E_TImode, "ti" },
2677 { E_SFmode, "sf" },
2678 { E_DFmode, "df" }
2680 unsigned int i, j;
2682 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2684 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2685 "add", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2687 "ssadd", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2689 "usadd", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2691 "sub", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2693 "sssub", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2695 "ussub", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2697 "mul", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2699 "ssmul", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2701 "usmul", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2703 "div", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2705 "udiv", fixed_arith_modes[i].name, 3);
2706 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2707 "ssdiv", fixed_arith_modes[i].name, 3);
2708 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2709 "usdiv", fixed_arith_modes[i].name, 3);
2710 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2711 "neg", fixed_arith_modes[i].name, 2);
2712 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2713 "ssneg", fixed_arith_modes[i].name, 2);
2714 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2715 "usneg", fixed_arith_modes[i].name, 2);
2716 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2717 "ashl", fixed_arith_modes[i].name, 3);
2718 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2719 "ashr", fixed_arith_modes[i].name, 3);
2720 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2721 "lshr", fixed_arith_modes[i].name, 3);
2722 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2723 "ssashl", fixed_arith_modes[i].name, 3);
2724 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2725 "usashl", fixed_arith_modes[i].name, 3);
2726 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2727 "cmp", fixed_arith_modes[i].name, 2);
2730 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2731 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2733 if (i == j
2734 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2735 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2736 continue;
2738 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2739 fixed_conv_modes[j].mode, "fract",
2740 fixed_conv_modes[i].name,
2741 fixed_conv_modes[j].name);
2742 arm_set_fixed_conv_libfunc (satfract_optab,
2743 fixed_conv_modes[i].mode,
2744 fixed_conv_modes[j].mode, "satfract",
2745 fixed_conv_modes[i].name,
2746 fixed_conv_modes[j].name);
2747 arm_set_fixed_conv_libfunc (fractuns_optab,
2748 fixed_conv_modes[i].mode,
2749 fixed_conv_modes[j].mode, "fractuns",
2750 fixed_conv_modes[i].name,
2751 fixed_conv_modes[j].name);
2752 arm_set_fixed_conv_libfunc (satfractuns_optab,
2753 fixed_conv_modes[i].mode,
2754 fixed_conv_modes[j].mode, "satfractuns",
2755 fixed_conv_modes[i].name,
2756 fixed_conv_modes[j].name);
2760 if (TARGET_AAPCS_BASED)
2761 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2764 /* On AAPCS systems, this is the "struct __va_list". */
2765 static GTY(()) tree va_list_type;
2767 /* Return the type to use as __builtin_va_list. */
2768 static tree
2769 arm_build_builtin_va_list (void)
2771 tree va_list_name;
2772 tree ap_field;
2774 if (!TARGET_AAPCS_BASED)
2775 return std_build_builtin_va_list ();
2777 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2778 defined as:
2780 struct __va_list
2782 void *__ap;
2785 The C Library ABI further reinforces this definition in \S
2786 4.1.
2788 We must follow this definition exactly. The structure tag
2789 name is visible in C++ mangled names, and thus forms a part
2790 of the ABI. The field name may be used by people who
2791 #include <stdarg.h>. */
2792 /* Create the type. */
2793 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2794 /* Give it the required name. */
2795 va_list_name = build_decl (BUILTINS_LOCATION,
2796 TYPE_DECL,
2797 get_identifier ("__va_list"),
2798 va_list_type);
2799 DECL_ARTIFICIAL (va_list_name) = 1;
2800 TYPE_NAME (va_list_type) = va_list_name;
2801 TYPE_STUB_DECL (va_list_type) = va_list_name;
2802 /* Create the __ap field. */
2803 ap_field = build_decl (BUILTINS_LOCATION,
2804 FIELD_DECL,
2805 get_identifier ("__ap"),
2806 ptr_type_node);
2807 DECL_ARTIFICIAL (ap_field) = 1;
2808 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2809 TYPE_FIELDS (va_list_type) = ap_field;
2810 /* Compute its layout. */
2811 layout_type (va_list_type);
2813 return va_list_type;
2816 /* Return an expression of type "void *" pointing to the next
2817 available argument in a variable-argument list. VALIST is the
2818 user-level va_list object, of type __builtin_va_list. */
2819 static tree
2820 arm_extract_valist_ptr (tree valist)
2822 if (TREE_TYPE (valist) == error_mark_node)
2823 return error_mark_node;
2825 /* On an AAPCS target, the pointer is stored within "struct
2826 va_list". */
2827 if (TARGET_AAPCS_BASED)
2829 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2830 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2831 valist, ap_field, NULL_TREE);
2834 return valist;
2837 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2838 static void
2839 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2841 valist = arm_extract_valist_ptr (valist);
2842 std_expand_builtin_va_start (valist, nextarg);
2845 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2846 static tree
2847 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2848 gimple_seq *post_p)
2850 valist = arm_extract_valist_ptr (valist);
2851 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2854 /* Check any incompatible options that the user has specified. */
2855 static void
2856 arm_option_check_internal (struct gcc_options *opts)
2858 int flags = opts->x_target_flags;
2860 /* iWMMXt and NEON are incompatible. */
2861 if (TARGET_IWMMXT
2862 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2863 error ("iWMMXt and NEON are incompatible");
2865 /* Make sure that the processor choice does not conflict with any of the
2866 other command line choices. */
2867 if (TARGET_ARM_P (flags)
2868 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2869 error ("target CPU does not support ARM mode");
2871 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2872 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2873 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2875 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2876 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2878 /* If this target is normally configured to use APCS frames, warn if they
2879 are turned off and debugging is turned on. */
2880 if (TARGET_ARM_P (flags)
2881 && write_symbols != NO_DEBUG
2882 && !TARGET_APCS_FRAME
2883 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2884 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2886 /* iWMMXt unsupported under Thumb mode. */
2887 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2888 error ("iWMMXt unsupported under Thumb mode");
2890 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2891 error ("can not use -mtp=cp15 with 16-bit Thumb");
2893 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2895 error ("RTP PIC is incompatible with Thumb");
2896 flag_pic = 0;
2899 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2900 with MOVT. */
2901 if ((target_pure_code || target_slow_flash_data)
2902 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2904 const char *flag = (target_pure_code ? "-mpure-code" :
2905 "-mslow-flash-data");
2906 error ("%s only supports non-pic code on M-profile targets with the "
2907 "MOVT instruction", flag);
2912 /* Recompute the global settings depending on target attribute options. */
2914 static void
2915 arm_option_params_internal (void)
2917 /* If we are not using the default (ARM mode) section anchor offset
2918 ranges, then set the correct ranges now. */
2919 if (TARGET_THUMB1)
2921 /* Thumb-1 LDR instructions cannot have negative offsets.
2922 Permissible positive offset ranges are 5-bit (for byte loads),
2923 6-bit (for halfword loads), or 7-bit (for word loads).
2924 Empirical results suggest a 7-bit anchor range gives the best
2925 overall code size. */
2926 targetm.min_anchor_offset = 0;
2927 targetm.max_anchor_offset = 127;
2929 else if (TARGET_THUMB2)
2931 /* The minimum is set such that the total size of the block
2932 for a particular anchor is 248 + 1 + 4095 bytes, which is
2933 divisible by eight, ensuring natural spacing of anchors. */
2934 targetm.min_anchor_offset = -248;
2935 targetm.max_anchor_offset = 4095;
2937 else
2939 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2940 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2943 /* Increase the number of conditional instructions with -Os. */
2944 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2946 /* For THUMB2, we limit the conditional sequence to one IT block. */
2947 if (TARGET_THUMB2)
2948 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2951 /* True if -mflip-thumb should next add an attribute for the default
2952 mode, false if it should next add an attribute for the opposite mode. */
2953 static GTY(()) bool thumb_flipper;
2955 /* Options after initial target override. */
2956 static GTY(()) tree init_optimize;
2958 static void
2959 arm_override_options_after_change_1 (struct gcc_options *opts)
2961 if (opts->x_align_functions <= 0)
2962 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2963 && opts->x_optimize_size ? 2 : 4;
2966 /* Implement targetm.override_options_after_change. */
2968 static void
2969 arm_override_options_after_change (void)
2971 arm_configure_build_target (&arm_active_target,
2972 TREE_TARGET_OPTION (target_option_default_node),
2973 &global_options_set, false);
2975 arm_override_options_after_change_1 (&global_options);
2978 /* Implement TARGET_OPTION_SAVE. */
2979 static void
2980 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2982 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2983 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2984 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2987 /* Implement TARGET_OPTION_RESTORE. */
2988 static void
2989 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2991 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2992 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2993 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2994 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2995 false);
2998 /* Reset options between modes that the user has specified. */
2999 static void
3000 arm_option_override_internal (struct gcc_options *opts,
3001 struct gcc_options *opts_set)
3003 arm_override_options_after_change_1 (opts);
3005 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3007 /* The default is to enable interworking, so this warning message would
3008 be confusing to users who have just compiled with, eg, -march=armv3. */
3009 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3010 opts->x_target_flags &= ~MASK_INTERWORK;
3013 if (TARGET_THUMB_P (opts->x_target_flags)
3014 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3016 warning (0, "target CPU does not support THUMB instructions");
3017 opts->x_target_flags &= ~MASK_THUMB;
3020 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3022 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3023 opts->x_target_flags &= ~MASK_APCS_FRAME;
3026 /* Callee super interworking implies thumb interworking. Adding
3027 this to the flags here simplifies the logic elsewhere. */
3028 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3029 opts->x_target_flags |= MASK_INTERWORK;
3031 /* need to remember initial values so combinaisons of options like
3032 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3033 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3035 if (! opts_set->x_arm_restrict_it)
3036 opts->x_arm_restrict_it = arm_arch8;
3038 /* ARM execution state and M profile don't have [restrict] IT. */
3039 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3040 opts->x_arm_restrict_it = 0;
3042 /* Enable -munaligned-access by default for
3043 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3044 i.e. Thumb2 and ARM state only.
3045 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3046 - ARMv8 architecture-base processors.
3048 Disable -munaligned-access by default for
3049 - all pre-ARMv6 architecture-based processors
3050 - ARMv6-M architecture-based processors
3051 - ARMv8-M Baseline processors. */
3053 if (! opts_set->x_unaligned_access)
3055 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3056 && arm_arch6 && (arm_arch_notm || arm_arch7));
3058 else if (opts->x_unaligned_access == 1
3059 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3061 warning (0, "target CPU does not support unaligned accesses");
3062 opts->x_unaligned_access = 0;
3065 /* Don't warn since it's on by default in -O2. */
3066 if (TARGET_THUMB1_P (opts->x_target_flags))
3067 opts->x_flag_schedule_insns = 0;
3068 else
3069 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3071 /* Disable shrink-wrap when optimizing function for size, since it tends to
3072 generate additional returns. */
3073 if (optimize_function_for_size_p (cfun)
3074 && TARGET_THUMB2_P (opts->x_target_flags))
3075 opts->x_flag_shrink_wrap = false;
3076 else
3077 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3079 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3080 - epilogue_insns - does not accurately model the corresponding insns
3081 emitted in the asm file. In particular, see the comment in thumb_exit
3082 'Find out how many of the (return) argument registers we can corrupt'.
3083 As a consequence, the epilogue may clobber registers without fipa-ra
3084 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3085 TODO: Accurately model clobbers for epilogue_insns and reenable
3086 fipa-ra. */
3087 if (TARGET_THUMB1_P (opts->x_target_flags))
3088 opts->x_flag_ipa_ra = 0;
3089 else
3090 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3092 /* Thumb2 inline assembly code should always use unified syntax.
3093 This will apply to ARM and Thumb1 eventually. */
3094 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3096 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3097 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3098 #endif
3101 static sbitmap isa_all_fpubits;
3102 static sbitmap isa_quirkbits;
3104 /* Configure a build target TARGET from the user-specified options OPTS and
3105 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3106 architecture have been specified, but the two are not identical. */
3107 void
3108 arm_configure_build_target (struct arm_build_target *target,
3109 struct cl_target_option *opts,
3110 struct gcc_options *opts_set,
3111 bool warn_compatible)
3113 const cpu_option *arm_selected_tune = NULL;
3114 const arch_option *arm_selected_arch = NULL;
3115 const cpu_option *arm_selected_cpu = NULL;
3116 const arm_fpu_desc *arm_selected_fpu = NULL;
3117 const char *tune_opts = NULL;
3118 const char *arch_opts = NULL;
3119 const char *cpu_opts = NULL;
3121 bitmap_clear (target->isa);
3122 target->core_name = NULL;
3123 target->arch_name = NULL;
3125 if (opts_set->x_arm_arch_string)
3127 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3128 "-march",
3129 opts->x_arm_arch_string);
3130 arch_opts = strchr (opts->x_arm_arch_string, '+');
3133 if (opts_set->x_arm_cpu_string)
3135 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3136 opts->x_arm_cpu_string);
3137 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3138 arm_selected_tune = arm_selected_cpu;
3139 /* If taking the tuning from -mcpu, we don't need to rescan the
3140 options for tuning. */
3143 if (opts_set->x_arm_tune_string)
3145 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3146 opts->x_arm_tune_string);
3147 tune_opts = strchr (opts->x_arm_tune_string, '+');
3150 if (arm_selected_arch)
3152 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3153 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3154 arch_opts);
3156 if (arm_selected_cpu)
3158 auto_sbitmap cpu_isa (isa_num_bits);
3159 auto_sbitmap isa_delta (isa_num_bits);
3161 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3162 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3163 cpu_opts);
3164 bitmap_xor (isa_delta, cpu_isa, target->isa);
3165 /* Ignore any bits that are quirk bits. */
3166 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3167 /* Ignore (for now) any bits that might be set by -mfpu. */
3168 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3170 if (!bitmap_empty_p (isa_delta))
3172 if (warn_compatible)
3173 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3174 arm_selected_cpu->common.name,
3175 arm_selected_arch->common.name);
3176 /* -march wins for code generation.
3177 -mcpu wins for default tuning. */
3178 if (!arm_selected_tune)
3179 arm_selected_tune = arm_selected_cpu;
3181 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3182 target->arch_name = arm_selected_arch->common.name;
3184 else
3186 /* Architecture and CPU are essentially the same.
3187 Prefer the CPU setting. */
3188 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3189 target->core_name = arm_selected_cpu->common.name;
3190 /* Copy the CPU's capabilities, so that we inherit the
3191 appropriate extensions and quirks. */
3192 bitmap_copy (target->isa, cpu_isa);
3195 else
3197 /* Pick a CPU based on the architecture. */
3198 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3199 target->arch_name = arm_selected_arch->common.name;
3200 /* Note: target->core_name is left unset in this path. */
3203 else if (arm_selected_cpu)
3205 target->core_name = arm_selected_cpu->common.name;
3206 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3207 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3208 cpu_opts);
3209 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3211 /* If the user did not specify a processor or architecture, choose
3212 one for them. */
3213 else
3215 const cpu_option *sel;
3216 auto_sbitmap sought_isa (isa_num_bits);
3217 bitmap_clear (sought_isa);
3218 auto_sbitmap default_isa (isa_num_bits);
3220 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3221 TARGET_CPU_DEFAULT);
3222 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3223 gcc_assert (arm_selected_cpu->common.name);
3225 /* RWE: All of the selection logic below (to the end of this
3226 'if' clause) looks somewhat suspect. It appears to be mostly
3227 there to support forcing thumb support when the default CPU
3228 does not have thumb (somewhat dubious in terms of what the
3229 user might be expecting). I think it should be removed once
3230 support for the pre-thumb era cores is removed. */
3231 sel = arm_selected_cpu;
3232 arm_initialize_isa (default_isa, sel->common.isa_bits);
3233 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3234 cpu_opts);
3236 /* Now check to see if the user has specified any command line
3237 switches that require certain abilities from the cpu. */
3239 if (TARGET_INTERWORK || TARGET_THUMB)
3241 bitmap_set_bit (sought_isa, isa_bit_thumb);
3242 bitmap_set_bit (sought_isa, isa_bit_mode32);
3244 /* There are no ARM processors that support both APCS-26 and
3245 interworking. Therefore we forcibly remove MODE26 from
3246 from the isa features here (if it was set), so that the
3247 search below will always be able to find a compatible
3248 processor. */
3249 bitmap_clear_bit (default_isa, isa_bit_mode26);
3252 /* If there are such requirements and the default CPU does not
3253 satisfy them, we need to run over the complete list of
3254 cores looking for one that is satisfactory. */
3255 if (!bitmap_empty_p (sought_isa)
3256 && !bitmap_subset_p (sought_isa, default_isa))
3258 auto_sbitmap candidate_isa (isa_num_bits);
3259 /* We're only interested in a CPU with at least the
3260 capabilities of the default CPU and the required
3261 additional features. */
3262 bitmap_ior (default_isa, default_isa, sought_isa);
3264 /* Try to locate a CPU type that supports all of the abilities
3265 of the default CPU, plus the extra abilities requested by
3266 the user. */
3267 for (sel = all_cores; sel->common.name != NULL; sel++)
3269 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3270 /* An exact match? */
3271 if (bitmap_equal_p (default_isa, candidate_isa))
3272 break;
3275 if (sel->common.name == NULL)
3277 unsigned current_bit_count = isa_num_bits;
3278 const cpu_option *best_fit = NULL;
3280 /* Ideally we would like to issue an error message here
3281 saying that it was not possible to find a CPU compatible
3282 with the default CPU, but which also supports the command
3283 line options specified by the programmer, and so they
3284 ought to use the -mcpu=<name> command line option to
3285 override the default CPU type.
3287 If we cannot find a CPU that has exactly the
3288 characteristics of the default CPU and the given
3289 command line options we scan the array again looking
3290 for a best match. The best match must have at least
3291 the capabilities of the perfect match. */
3292 for (sel = all_cores; sel->common.name != NULL; sel++)
3294 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3296 if (bitmap_subset_p (default_isa, candidate_isa))
3298 unsigned count;
3300 bitmap_and_compl (candidate_isa, candidate_isa,
3301 default_isa);
3302 count = bitmap_popcount (candidate_isa);
3304 if (count < current_bit_count)
3306 best_fit = sel;
3307 current_bit_count = count;
3311 gcc_assert (best_fit);
3312 sel = best_fit;
3315 arm_selected_cpu = sel;
3318 /* Now we know the CPU, we can finally initialize the target
3319 structure. */
3320 target->core_name = arm_selected_cpu->common.name;
3321 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3322 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3323 cpu_opts);
3324 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3327 gcc_assert (arm_selected_cpu);
3328 gcc_assert (arm_selected_arch);
3330 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3332 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3333 auto_sbitmap fpu_bits (isa_num_bits);
3335 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3336 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3337 bitmap_ior (target->isa, target->isa, fpu_bits);
3340 if (!arm_selected_tune)
3341 arm_selected_tune = arm_selected_cpu;
3342 else /* Validate the features passed to -mtune. */
3343 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3345 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3347 /* Finish initializing the target structure. */
3348 target->arch_pp_name = arm_selected_arch->arch;
3349 target->base_arch = arm_selected_arch->base_arch;
3350 target->profile = arm_selected_arch->profile;
3352 target->tune_flags = tune_data->tune_flags;
3353 target->tune = tune_data->tune;
3354 target->tune_core = tune_data->scheduler;
3355 arm_option_reconfigure_globals ();
3358 /* Fix up any incompatible options that the user has specified. */
3359 static void
3360 arm_option_override (void)
3362 static const enum isa_feature fpu_bitlist[]
3363 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3364 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3365 cl_target_option opts;
3367 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3368 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3370 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3371 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3373 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3375 if (!global_options_set.x_arm_fpu_index)
3377 bool ok;
3378 int fpu_index;
3380 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3381 CL_TARGET);
3382 gcc_assert (ok);
3383 arm_fpu_index = (enum fpu_type) fpu_index;
3386 cl_target_option_save (&opts, &global_options);
3387 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3388 true);
3390 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3391 SUBTARGET_OVERRIDE_OPTIONS;
3392 #endif
3394 /* Initialize boolean versions of the architectural flags, for use
3395 in the arm.md file and for enabling feature flags. */
3396 arm_option_reconfigure_globals ();
3398 arm_tune = arm_active_target.tune_core;
3399 tune_flags = arm_active_target.tune_flags;
3400 current_tune = arm_active_target.tune;
3402 /* TBD: Dwarf info for apcs frame is not handled yet. */
3403 if (TARGET_APCS_FRAME)
3404 flag_shrink_wrap = false;
3406 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3408 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3409 target_flags |= MASK_APCS_FRAME;
3412 if (TARGET_POKE_FUNCTION_NAME)
3413 target_flags |= MASK_APCS_FRAME;
3415 if (TARGET_APCS_REENT && flag_pic)
3416 error ("-fpic and -mapcs-reent are incompatible");
3418 if (TARGET_APCS_REENT)
3419 warning (0, "APCS reentrant code not supported. Ignored");
3421 /* Set up some tuning parameters. */
3422 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3423 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3424 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3425 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3426 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3427 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3429 /* For arm2/3 there is no need to do any scheduling if we are doing
3430 software floating-point. */
3431 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3432 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3434 /* Override the default structure alignment for AAPCS ABI. */
3435 if (!global_options_set.x_arm_structure_size_boundary)
3437 if (TARGET_AAPCS_BASED)
3438 arm_structure_size_boundary = 8;
3440 else
3442 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3444 if (arm_structure_size_boundary != 8
3445 && arm_structure_size_boundary != 32
3446 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3448 if (ARM_DOUBLEWORD_ALIGN)
3449 warning (0,
3450 "structure size boundary can only be set to 8, 32 or 64");
3451 else
3452 warning (0, "structure size boundary can only be set to 8 or 32");
3453 arm_structure_size_boundary
3454 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3458 if (TARGET_VXWORKS_RTP)
3460 if (!global_options_set.x_arm_pic_data_is_text_relative)
3461 arm_pic_data_is_text_relative = 0;
3463 else if (flag_pic
3464 && !arm_pic_data_is_text_relative
3465 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3466 /* When text & data segments don't have a fixed displacement, the
3467 intended use is with a single, read only, pic base register.
3468 Unless the user explicitly requested not to do that, set
3469 it. */
3470 target_flags |= MASK_SINGLE_PIC_BASE;
3472 /* If stack checking is disabled, we can use r10 as the PIC register,
3473 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3474 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3476 if (TARGET_VXWORKS_RTP)
3477 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3478 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3481 if (flag_pic && TARGET_VXWORKS_RTP)
3482 arm_pic_register = 9;
3484 if (arm_pic_register_string != NULL)
3486 int pic_register = decode_reg_name (arm_pic_register_string);
3488 if (!flag_pic)
3489 warning (0, "-mpic-register= is useless without -fpic");
3491 /* Prevent the user from choosing an obviously stupid PIC register. */
3492 else if (pic_register < 0 || call_used_regs[pic_register]
3493 || pic_register == HARD_FRAME_POINTER_REGNUM
3494 || pic_register == STACK_POINTER_REGNUM
3495 || pic_register >= PC_REGNUM
3496 || (TARGET_VXWORKS_RTP
3497 && (unsigned int) pic_register != arm_pic_register))
3498 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3499 else
3500 arm_pic_register = pic_register;
3503 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3504 if (fix_cm3_ldrd == 2)
3506 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3507 fix_cm3_ldrd = 1;
3508 else
3509 fix_cm3_ldrd = 0;
3512 /* Hot/Cold partitioning is not currently supported, since we can't
3513 handle literal pool placement in that case. */
3514 if (flag_reorder_blocks_and_partition)
3516 inform (input_location,
3517 "-freorder-blocks-and-partition not supported on this architecture");
3518 flag_reorder_blocks_and_partition = 0;
3519 flag_reorder_blocks = 1;
3522 if (flag_pic)
3523 /* Hoisting PIC address calculations more aggressively provides a small,
3524 but measurable, size reduction for PIC code. Therefore, we decrease
3525 the bar for unrestricted expression hoisting to the cost of PIC address
3526 calculation, which is 2 instructions. */
3527 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3528 global_options.x_param_values,
3529 global_options_set.x_param_values);
3531 /* ARM EABI defaults to strict volatile bitfields. */
3532 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3533 && abi_version_at_least(2))
3534 flag_strict_volatile_bitfields = 1;
3536 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3537 have deemed it beneficial (signified by setting
3538 prefetch.num_slots to 1 or more). */
3539 if (flag_prefetch_loop_arrays < 0
3540 && HAVE_prefetch
3541 && optimize >= 3
3542 && current_tune->prefetch.num_slots > 0)
3543 flag_prefetch_loop_arrays = 1;
3545 /* Set up parameters to be used in prefetching algorithm. Do not
3546 override the defaults unless we are tuning for a core we have
3547 researched values for. */
3548 if (current_tune->prefetch.num_slots > 0)
3549 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3550 current_tune->prefetch.num_slots,
3551 global_options.x_param_values,
3552 global_options_set.x_param_values);
3553 if (current_tune->prefetch.l1_cache_line_size >= 0)
3554 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3555 current_tune->prefetch.l1_cache_line_size,
3556 global_options.x_param_values,
3557 global_options_set.x_param_values);
3558 if (current_tune->prefetch.l1_cache_size >= 0)
3559 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3560 current_tune->prefetch.l1_cache_size,
3561 global_options.x_param_values,
3562 global_options_set.x_param_values);
3564 /* Use Neon to perform 64-bits operations rather than core
3565 registers. */
3566 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3567 if (use_neon_for_64bits == 1)
3568 prefer_neon_for_64bits = true;
3570 /* Use the alternative scheduling-pressure algorithm by default. */
3571 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3572 global_options.x_param_values,
3573 global_options_set.x_param_values);
3575 /* Look through ready list and all of queue for instructions
3576 relevant for L2 auto-prefetcher. */
3577 int param_sched_autopref_queue_depth;
3579 switch (current_tune->sched_autopref)
3581 case tune_params::SCHED_AUTOPREF_OFF:
3582 param_sched_autopref_queue_depth = -1;
3583 break;
3585 case tune_params::SCHED_AUTOPREF_RANK:
3586 param_sched_autopref_queue_depth = 0;
3587 break;
3589 case tune_params::SCHED_AUTOPREF_FULL:
3590 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3591 break;
3593 default:
3594 gcc_unreachable ();
3597 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3598 param_sched_autopref_queue_depth,
3599 global_options.x_param_values,
3600 global_options_set.x_param_values);
3602 /* Currently, for slow flash data, we just disable literal pools. We also
3603 disable it for pure-code. */
3604 if (target_slow_flash_data || target_pure_code)
3605 arm_disable_literal_pool = true;
3607 /* Disable scheduling fusion by default if it's not armv7 processor
3608 or doesn't prefer ldrd/strd. */
3609 if (flag_schedule_fusion == 2
3610 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3611 flag_schedule_fusion = 0;
3613 /* Need to remember initial options before they are overriden. */
3614 init_optimize = build_optimization_node (&global_options);
3616 arm_options_perform_arch_sanity_checks ();
3617 arm_option_override_internal (&global_options, &global_options_set);
3618 arm_option_check_internal (&global_options);
3619 arm_option_params_internal ();
3621 /* Create the default target_options structure. */
3622 target_option_default_node = target_option_current_node
3623 = build_target_option_node (&global_options);
3625 /* Register global variables with the garbage collector. */
3626 arm_add_gc_roots ();
3628 /* Init initial mode for testing. */
3629 thumb_flipper = TARGET_THUMB;
3633 /* Reconfigure global status flags from the active_target.isa. */
3634 void
3635 arm_option_reconfigure_globals (void)
3637 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3638 arm_base_arch = arm_active_target.base_arch;
3640 /* Initialize boolean versions of the architectural flags, for use
3641 in the arm.md file. */
3642 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3643 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3644 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3645 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3646 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3647 arm_arch5te = arm_arch5e
3648 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3649 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3650 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3651 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3652 arm_arch6m = arm_arch6 && !arm_arch_notm;
3653 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3654 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3655 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3656 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3657 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3658 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3659 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3660 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3661 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3662 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3663 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3664 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3665 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3666 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3667 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3668 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3669 if (arm_fp16_inst)
3671 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3672 error ("selected fp16 options are incompatible");
3673 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3676 /* And finally, set up some quirks. */
3677 arm_arch_no_volatile_ce
3678 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3679 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3680 isa_bit_quirk_armv6kz);
3682 /* Use the cp15 method if it is available. */
3683 if (target_thread_pointer == TP_AUTO)
3685 if (arm_arch6k && !TARGET_THUMB1)
3686 target_thread_pointer = TP_CP15;
3687 else
3688 target_thread_pointer = TP_SOFT;
3692 /* Perform some validation between the desired architecture and the rest of the
3693 options. */
3694 void
3695 arm_options_perform_arch_sanity_checks (void)
3697 /* V5 code we generate is completely interworking capable, so we turn off
3698 TARGET_INTERWORK here to avoid many tests later on. */
3700 /* XXX However, we must pass the right pre-processor defines to CPP
3701 or GLD can get confused. This is a hack. */
3702 if (TARGET_INTERWORK)
3703 arm_cpp_interwork = 1;
3705 if (arm_arch5)
3706 target_flags &= ~MASK_INTERWORK;
3708 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3709 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3711 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3712 error ("iwmmxt abi requires an iwmmxt capable cpu");
3714 /* BPABI targets use linker tricks to allow interworking on cores
3715 without thumb support. */
3716 if (TARGET_INTERWORK
3717 && !TARGET_BPABI
3718 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3720 warning (0, "target CPU does not support interworking" );
3721 target_flags &= ~MASK_INTERWORK;
3724 /* If soft-float is specified then don't use FPU. */
3725 if (TARGET_SOFT_FLOAT)
3726 arm_fpu_attr = FPU_NONE;
3727 else
3728 arm_fpu_attr = FPU_VFP;
3730 if (TARGET_AAPCS_BASED)
3732 if (TARGET_CALLER_INTERWORKING)
3733 error ("AAPCS does not support -mcaller-super-interworking");
3734 else
3735 if (TARGET_CALLEE_INTERWORKING)
3736 error ("AAPCS does not support -mcallee-super-interworking");
3739 /* __fp16 support currently assumes the core has ldrh. */
3740 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3741 sorry ("__fp16 and no ldrh");
3743 if (use_cmse && !arm_arch_cmse)
3744 error ("target CPU does not support ARMv8-M Security Extensions");
3746 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3747 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3748 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3749 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3752 if (TARGET_AAPCS_BASED)
3754 if (arm_abi == ARM_ABI_IWMMXT)
3755 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3756 else if (TARGET_HARD_FLOAT_ABI)
3758 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3759 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3760 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3762 else
3763 arm_pcs_default = ARM_PCS_AAPCS;
3765 else
3767 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3768 sorry ("-mfloat-abi=hard and VFP");
3770 if (arm_abi == ARM_ABI_APCS)
3771 arm_pcs_default = ARM_PCS_APCS;
3772 else
3773 arm_pcs_default = ARM_PCS_ATPCS;
3777 static void
3778 arm_add_gc_roots (void)
3780 gcc_obstack_init(&minipool_obstack);
3781 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3784 /* A table of known ARM exception types.
3785 For use with the interrupt function attribute. */
3787 typedef struct
3789 const char *const arg;
3790 const unsigned long return_value;
3792 isr_attribute_arg;
3794 static const isr_attribute_arg isr_attribute_args [] =
3796 { "IRQ", ARM_FT_ISR },
3797 { "irq", ARM_FT_ISR },
3798 { "FIQ", ARM_FT_FIQ },
3799 { "fiq", ARM_FT_FIQ },
3800 { "ABORT", ARM_FT_ISR },
3801 { "abort", ARM_FT_ISR },
3802 { "ABORT", ARM_FT_ISR },
3803 { "abort", ARM_FT_ISR },
3804 { "UNDEF", ARM_FT_EXCEPTION },
3805 { "undef", ARM_FT_EXCEPTION },
3806 { "SWI", ARM_FT_EXCEPTION },
3807 { "swi", ARM_FT_EXCEPTION },
3808 { NULL, ARM_FT_NORMAL }
3811 /* Returns the (interrupt) function type of the current
3812 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3814 static unsigned long
3815 arm_isr_value (tree argument)
3817 const isr_attribute_arg * ptr;
3818 const char * arg;
3820 if (!arm_arch_notm)
3821 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3823 /* No argument - default to IRQ. */
3824 if (argument == NULL_TREE)
3825 return ARM_FT_ISR;
3827 /* Get the value of the argument. */
3828 if (TREE_VALUE (argument) == NULL_TREE
3829 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3830 return ARM_FT_UNKNOWN;
3832 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3834 /* Check it against the list of known arguments. */
3835 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3836 if (streq (arg, ptr->arg))
3837 return ptr->return_value;
3839 /* An unrecognized interrupt type. */
3840 return ARM_FT_UNKNOWN;
3843 /* Computes the type of the current function. */
3845 static unsigned long
3846 arm_compute_func_type (void)
3848 unsigned long type = ARM_FT_UNKNOWN;
3849 tree a;
3850 tree attr;
3852 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3854 /* Decide if the current function is volatile. Such functions
3855 never return, and many memory cycles can be saved by not storing
3856 register values that will never be needed again. This optimization
3857 was added to speed up context switching in a kernel application. */
3858 if (optimize > 0
3859 && (TREE_NOTHROW (current_function_decl)
3860 || !(flag_unwind_tables
3861 || (flag_exceptions
3862 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3863 && TREE_THIS_VOLATILE (current_function_decl))
3864 type |= ARM_FT_VOLATILE;
3866 if (cfun->static_chain_decl != NULL)
3867 type |= ARM_FT_NESTED;
3869 attr = DECL_ATTRIBUTES (current_function_decl);
3871 a = lookup_attribute ("naked", attr);
3872 if (a != NULL_TREE)
3873 type |= ARM_FT_NAKED;
3875 a = lookup_attribute ("isr", attr);
3876 if (a == NULL_TREE)
3877 a = lookup_attribute ("interrupt", attr);
3879 if (a == NULL_TREE)
3880 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3881 else
3882 type |= arm_isr_value (TREE_VALUE (a));
3884 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3885 type |= ARM_FT_CMSE_ENTRY;
3887 return type;
3890 /* Returns the type of the current function. */
3892 unsigned long
3893 arm_current_func_type (void)
3895 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3896 cfun->machine->func_type = arm_compute_func_type ();
3898 return cfun->machine->func_type;
3901 bool
3902 arm_allocate_stack_slots_for_args (void)
3904 /* Naked functions should not allocate stack slots for arguments. */
3905 return !IS_NAKED (arm_current_func_type ());
3908 static bool
3909 arm_warn_func_return (tree decl)
3911 /* Naked functions are implemented entirely in assembly, including the
3912 return sequence, so suppress warnings about this. */
3913 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3917 /* Output assembler code for a block containing the constant parts
3918 of a trampoline, leaving space for the variable parts.
3920 On the ARM, (if r8 is the static chain regnum, and remembering that
3921 referencing pc adds an offset of 8) the trampoline looks like:
3922 ldr r8, [pc, #0]
3923 ldr pc, [pc]
3924 .word static chain value
3925 .word function's address
3926 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3928 static void
3929 arm_asm_trampoline_template (FILE *f)
3931 fprintf (f, "\t.syntax unified\n");
3933 if (TARGET_ARM)
3935 fprintf (f, "\t.arm\n");
3936 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3937 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3939 else if (TARGET_THUMB2)
3941 fprintf (f, "\t.thumb\n");
3942 /* The Thumb-2 trampoline is similar to the arm implementation.
3943 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3944 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3945 STATIC_CHAIN_REGNUM, PC_REGNUM);
3946 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3948 else
3950 ASM_OUTPUT_ALIGN (f, 2);
3951 fprintf (f, "\t.code\t16\n");
3952 fprintf (f, ".Ltrampoline_start:\n");
3953 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3954 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3955 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3956 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3957 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3958 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3960 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3961 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3964 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3966 static void
3967 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3969 rtx fnaddr, mem, a_tramp;
3971 emit_block_move (m_tramp, assemble_trampoline_template (),
3972 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3974 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3975 emit_move_insn (mem, chain_value);
3977 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3978 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3979 emit_move_insn (mem, fnaddr);
3981 a_tramp = XEXP (m_tramp, 0);
3982 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3983 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3984 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3987 /* Thumb trampolines should be entered in thumb mode, so set
3988 the bottom bit of the address. */
3990 static rtx
3991 arm_trampoline_adjust_address (rtx addr)
3993 if (TARGET_THUMB)
3994 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3995 NULL, 0, OPTAB_LIB_WIDEN);
3996 return addr;
3999 /* Return 1 if it is possible to return using a single instruction.
4000 If SIBLING is non-null, this is a test for a return before a sibling
4001 call. SIBLING is the call insn, so we can examine its register usage. */
4004 use_return_insn (int iscond, rtx sibling)
4006 int regno;
4007 unsigned int func_type;
4008 unsigned long saved_int_regs;
4009 unsigned HOST_WIDE_INT stack_adjust;
4010 arm_stack_offsets *offsets;
4012 /* Never use a return instruction before reload has run. */
4013 if (!reload_completed)
4014 return 0;
4016 func_type = arm_current_func_type ();
4018 /* Naked, volatile and stack alignment functions need special
4019 consideration. */
4020 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4021 return 0;
4023 /* So do interrupt functions that use the frame pointer and Thumb
4024 interrupt functions. */
4025 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4026 return 0;
4028 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4029 && !optimize_function_for_size_p (cfun))
4030 return 0;
4032 offsets = arm_get_frame_offsets ();
4033 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4035 /* As do variadic functions. */
4036 if (crtl->args.pretend_args_size
4037 || cfun->machine->uses_anonymous_args
4038 /* Or if the function calls __builtin_eh_return () */
4039 || crtl->calls_eh_return
4040 /* Or if the function calls alloca */
4041 || cfun->calls_alloca
4042 /* Or if there is a stack adjustment. However, if the stack pointer
4043 is saved on the stack, we can use a pre-incrementing stack load. */
4044 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4045 && stack_adjust == 4))
4046 /* Or if the static chain register was saved above the frame, under the
4047 assumption that the stack pointer isn't saved on the stack. */
4048 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4049 && arm_compute_static_chain_stack_bytes() != 0))
4050 return 0;
4052 saved_int_regs = offsets->saved_regs_mask;
4054 /* Unfortunately, the insn
4056 ldmib sp, {..., sp, ...}
4058 triggers a bug on most SA-110 based devices, such that the stack
4059 pointer won't be correctly restored if the instruction takes a
4060 page fault. We work around this problem by popping r3 along with
4061 the other registers, since that is never slower than executing
4062 another instruction.
4064 We test for !arm_arch5 here, because code for any architecture
4065 less than this could potentially be run on one of the buggy
4066 chips. */
4067 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4069 /* Validate that r3 is a call-clobbered register (always true in
4070 the default abi) ... */
4071 if (!call_used_regs[3])
4072 return 0;
4074 /* ... that it isn't being used for a return value ... */
4075 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4076 return 0;
4078 /* ... or for a tail-call argument ... */
4079 if (sibling)
4081 gcc_assert (CALL_P (sibling));
4083 if (find_regno_fusage (sibling, USE, 3))
4084 return 0;
4087 /* ... and that there are no call-saved registers in r0-r2
4088 (always true in the default ABI). */
4089 if (saved_int_regs & 0x7)
4090 return 0;
4093 /* Can't be done if interworking with Thumb, and any registers have been
4094 stacked. */
4095 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4096 return 0;
4098 /* On StrongARM, conditional returns are expensive if they aren't
4099 taken and multiple registers have been stacked. */
4100 if (iscond && arm_tune_strongarm)
4102 /* Conditional return when just the LR is stored is a simple
4103 conditional-load instruction, that's not expensive. */
4104 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4105 return 0;
4107 if (flag_pic
4108 && arm_pic_register != INVALID_REGNUM
4109 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4110 return 0;
4113 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4114 several instructions if anything needs to be popped. */
4115 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4116 return 0;
4118 /* If there are saved registers but the LR isn't saved, then we need
4119 two instructions for the return. */
4120 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4121 return 0;
4123 /* Can't be done if any of the VFP regs are pushed,
4124 since this also requires an insn. */
4125 if (TARGET_HARD_FLOAT)
4126 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4127 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4128 return 0;
4130 if (TARGET_REALLY_IWMMXT)
4131 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4132 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4133 return 0;
4135 return 1;
4138 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4139 shrink-wrapping if possible. This is the case if we need to emit a
4140 prologue, which we can test by looking at the offsets. */
4141 bool
4142 use_simple_return_p (void)
4144 arm_stack_offsets *offsets;
4146 /* Note this function can be called before or after reload. */
4147 if (!reload_completed)
4148 arm_compute_frame_layout ();
4150 offsets = arm_get_frame_offsets ();
4151 return offsets->outgoing_args != 0;
4154 /* Return TRUE if int I is a valid immediate ARM constant. */
4157 const_ok_for_arm (HOST_WIDE_INT i)
4159 int lowbit;
4161 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4162 be all zero, or all one. */
4163 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4164 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4165 != ((~(unsigned HOST_WIDE_INT) 0)
4166 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4167 return FALSE;
4169 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4171 /* Fast return for 0 and small values. We must do this for zero, since
4172 the code below can't handle that one case. */
4173 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4174 return TRUE;
4176 /* Get the number of trailing zeros. */
4177 lowbit = ffs((int) i) - 1;
4179 /* Only even shifts are allowed in ARM mode so round down to the
4180 nearest even number. */
4181 if (TARGET_ARM)
4182 lowbit &= ~1;
4184 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4185 return TRUE;
4187 if (TARGET_ARM)
4189 /* Allow rotated constants in ARM mode. */
4190 if (lowbit <= 4
4191 && ((i & ~0xc000003f) == 0
4192 || (i & ~0xf000000f) == 0
4193 || (i & ~0xfc000003) == 0))
4194 return TRUE;
4196 else if (TARGET_THUMB2)
4198 HOST_WIDE_INT v;
4200 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4201 v = i & 0xff;
4202 v |= v << 16;
4203 if (i == v || i == (v | (v << 8)))
4204 return TRUE;
4206 /* Allow repeated pattern 0xXY00XY00. */
4207 v = i & 0xff00;
4208 v |= v << 16;
4209 if (i == v)
4210 return TRUE;
4212 else if (TARGET_HAVE_MOVT)
4214 /* Thumb-1 Targets with MOVT. */
4215 if (i > 0xffff)
4216 return FALSE;
4217 else
4218 return TRUE;
4221 return FALSE;
4224 /* Return true if I is a valid constant for the operation CODE. */
4226 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4228 if (const_ok_for_arm (i))
4229 return 1;
4231 switch (code)
4233 case SET:
4234 /* See if we can use movw. */
4235 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4236 return 1;
4237 else
4238 /* Otherwise, try mvn. */
4239 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4241 case PLUS:
4242 /* See if we can use addw or subw. */
4243 if (TARGET_THUMB2
4244 && ((i & 0xfffff000) == 0
4245 || ((-i) & 0xfffff000) == 0))
4246 return 1;
4247 /* Fall through. */
4248 case COMPARE:
4249 case EQ:
4250 case NE:
4251 case GT:
4252 case LE:
4253 case LT:
4254 case GE:
4255 case GEU:
4256 case LTU:
4257 case GTU:
4258 case LEU:
4259 case UNORDERED:
4260 case ORDERED:
4261 case UNEQ:
4262 case UNGE:
4263 case UNLT:
4264 case UNGT:
4265 case UNLE:
4266 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4268 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4269 case XOR:
4270 return 0;
4272 case IOR:
4273 if (TARGET_THUMB2)
4274 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4275 return 0;
4277 case AND:
4278 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4280 default:
4281 gcc_unreachable ();
4285 /* Return true if I is a valid di mode constant for the operation CODE. */
4287 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4289 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4290 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4291 rtx hi = GEN_INT (hi_val);
4292 rtx lo = GEN_INT (lo_val);
4294 if (TARGET_THUMB1)
4295 return 0;
4297 switch (code)
4299 case AND:
4300 case IOR:
4301 case XOR:
4302 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4303 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4304 case PLUS:
4305 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4307 default:
4308 return 0;
4312 /* Emit a sequence of insns to handle a large constant.
4313 CODE is the code of the operation required, it can be any of SET, PLUS,
4314 IOR, AND, XOR, MINUS;
4315 MODE is the mode in which the operation is being performed;
4316 VAL is the integer to operate on;
4317 SOURCE is the other operand (a register, or a null-pointer for SET);
4318 SUBTARGETS means it is safe to create scratch registers if that will
4319 either produce a simpler sequence, or we will want to cse the values.
4320 Return value is the number of insns emitted. */
4322 /* ??? Tweak this for thumb2. */
4324 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4325 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4327 rtx cond;
4329 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4330 cond = COND_EXEC_TEST (PATTERN (insn));
4331 else
4332 cond = NULL_RTX;
4334 if (subtargets || code == SET
4335 || (REG_P (target) && REG_P (source)
4336 && REGNO (target) != REGNO (source)))
4338 /* After arm_reorg has been called, we can't fix up expensive
4339 constants by pushing them into memory so we must synthesize
4340 them in-line, regardless of the cost. This is only likely to
4341 be more costly on chips that have load delay slots and we are
4342 compiling without running the scheduler (so no splitting
4343 occurred before the final instruction emission).
4345 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4347 if (!cfun->machine->after_arm_reorg
4348 && !cond
4349 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4350 1, 0)
4351 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4352 + (code != SET))))
4354 if (code == SET)
4356 /* Currently SET is the only monadic value for CODE, all
4357 the rest are diadic. */
4358 if (TARGET_USE_MOVT)
4359 arm_emit_movpair (target, GEN_INT (val));
4360 else
4361 emit_set_insn (target, GEN_INT (val));
4363 return 1;
4365 else
4367 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4369 if (TARGET_USE_MOVT)
4370 arm_emit_movpair (temp, GEN_INT (val));
4371 else
4372 emit_set_insn (temp, GEN_INT (val));
4374 /* For MINUS, the value is subtracted from, since we never
4375 have subtraction of a constant. */
4376 if (code == MINUS)
4377 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4378 else
4379 emit_set_insn (target,
4380 gen_rtx_fmt_ee (code, mode, source, temp));
4381 return 2;
4386 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4390 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4391 ARM/THUMB2 immediates, and add up to VAL.
4392 Thr function return value gives the number of insns required. */
4393 static int
4394 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4395 struct four_ints *return_sequence)
4397 int best_consecutive_zeros = 0;
4398 int i;
4399 int best_start = 0;
4400 int insns1, insns2;
4401 struct four_ints tmp_sequence;
4403 /* If we aren't targeting ARM, the best place to start is always at
4404 the bottom, otherwise look more closely. */
4405 if (TARGET_ARM)
4407 for (i = 0; i < 32; i += 2)
4409 int consecutive_zeros = 0;
4411 if (!(val & (3 << i)))
4413 while ((i < 32) && !(val & (3 << i)))
4415 consecutive_zeros += 2;
4416 i += 2;
4418 if (consecutive_zeros > best_consecutive_zeros)
4420 best_consecutive_zeros = consecutive_zeros;
4421 best_start = i - consecutive_zeros;
4423 i -= 2;
4428 /* So long as it won't require any more insns to do so, it's
4429 desirable to emit a small constant (in bits 0...9) in the last
4430 insn. This way there is more chance that it can be combined with
4431 a later addressing insn to form a pre-indexed load or store
4432 operation. Consider:
4434 *((volatile int *)0xe0000100) = 1;
4435 *((volatile int *)0xe0000110) = 2;
4437 We want this to wind up as:
4439 mov rA, #0xe0000000
4440 mov rB, #1
4441 str rB, [rA, #0x100]
4442 mov rB, #2
4443 str rB, [rA, #0x110]
4445 rather than having to synthesize both large constants from scratch.
4447 Therefore, we calculate how many insns would be required to emit
4448 the constant starting from `best_start', and also starting from
4449 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4450 yield a shorter sequence, we may as well use zero. */
4451 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4452 if (best_start != 0
4453 && ((HOST_WIDE_INT_1U << best_start) < val))
4455 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4456 if (insns2 <= insns1)
4458 *return_sequence = tmp_sequence;
4459 insns1 = insns2;
4463 return insns1;
4466 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4467 static int
4468 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4469 struct four_ints *return_sequence, int i)
4471 int remainder = val & 0xffffffff;
4472 int insns = 0;
4474 /* Try and find a way of doing the job in either two or three
4475 instructions.
4477 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4478 location. We start at position I. This may be the MSB, or
4479 optimial_immediate_sequence may have positioned it at the largest block
4480 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4481 wrapping around to the top of the word when we drop off the bottom.
4482 In the worst case this code should produce no more than four insns.
4484 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4485 constants, shifted to any arbitrary location. We should always start
4486 at the MSB. */
4489 int end;
4490 unsigned int b1, b2, b3, b4;
4491 unsigned HOST_WIDE_INT result;
4492 int loc;
4494 gcc_assert (insns < 4);
4496 if (i <= 0)
4497 i += 32;
4499 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4500 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4502 loc = i;
4503 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4504 /* We can use addw/subw for the last 12 bits. */
4505 result = remainder;
4506 else
4508 /* Use an 8-bit shifted/rotated immediate. */
4509 end = i - 8;
4510 if (end < 0)
4511 end += 32;
4512 result = remainder & ((0x0ff << end)
4513 | ((i < end) ? (0xff >> (32 - end))
4514 : 0));
4515 i -= 8;
4518 else
4520 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4521 arbitrary shifts. */
4522 i -= TARGET_ARM ? 2 : 1;
4523 continue;
4526 /* Next, see if we can do a better job with a thumb2 replicated
4527 constant.
4529 We do it this way around to catch the cases like 0x01F001E0 where
4530 two 8-bit immediates would work, but a replicated constant would
4531 make it worse.
4533 TODO: 16-bit constants that don't clear all the bits, but still win.
4534 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4535 if (TARGET_THUMB2)
4537 b1 = (remainder & 0xff000000) >> 24;
4538 b2 = (remainder & 0x00ff0000) >> 16;
4539 b3 = (remainder & 0x0000ff00) >> 8;
4540 b4 = remainder & 0xff;
4542 if (loc > 24)
4544 /* The 8-bit immediate already found clears b1 (and maybe b2),
4545 but must leave b3 and b4 alone. */
4547 /* First try to find a 32-bit replicated constant that clears
4548 almost everything. We can assume that we can't do it in one,
4549 or else we wouldn't be here. */
4550 unsigned int tmp = b1 & b2 & b3 & b4;
4551 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4552 + (tmp << 24);
4553 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4554 + (tmp == b3) + (tmp == b4);
4555 if (tmp
4556 && (matching_bytes >= 3
4557 || (matching_bytes == 2
4558 && const_ok_for_op (remainder & ~tmp2, code))))
4560 /* At least 3 of the bytes match, and the fourth has at
4561 least as many bits set, or two of the bytes match
4562 and it will only require one more insn to finish. */
4563 result = tmp2;
4564 i = tmp != b1 ? 32
4565 : tmp != b2 ? 24
4566 : tmp != b3 ? 16
4567 : 8;
4570 /* Second, try to find a 16-bit replicated constant that can
4571 leave three of the bytes clear. If b2 or b4 is already
4572 zero, then we can. If the 8-bit from above would not
4573 clear b2 anyway, then we still win. */
4574 else if (b1 == b3 && (!b2 || !b4
4575 || (remainder & 0x00ff0000 & ~result)))
4577 result = remainder & 0xff00ff00;
4578 i = 24;
4581 else if (loc > 16)
4583 /* The 8-bit immediate already found clears b2 (and maybe b3)
4584 and we don't get here unless b1 is alredy clear, but it will
4585 leave b4 unchanged. */
4587 /* If we can clear b2 and b4 at once, then we win, since the
4588 8-bits couldn't possibly reach that far. */
4589 if (b2 == b4)
4591 result = remainder & 0x00ff00ff;
4592 i = 16;
4597 return_sequence->i[insns++] = result;
4598 remainder &= ~result;
4600 if (code == SET || code == MINUS)
4601 code = PLUS;
4603 while (remainder);
4605 return insns;
4608 /* Emit an instruction with the indicated PATTERN. If COND is
4609 non-NULL, conditionalize the execution of the instruction on COND
4610 being true. */
4612 static void
4613 emit_constant_insn (rtx cond, rtx pattern)
4615 if (cond)
4616 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4617 emit_insn (pattern);
4620 /* As above, but extra parameter GENERATE which, if clear, suppresses
4621 RTL generation. */
4623 static int
4624 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4625 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4626 int subtargets, int generate)
4628 int can_invert = 0;
4629 int can_negate = 0;
4630 int final_invert = 0;
4631 int i;
4632 int set_sign_bit_copies = 0;
4633 int clear_sign_bit_copies = 0;
4634 int clear_zero_bit_copies = 0;
4635 int set_zero_bit_copies = 0;
4636 int insns = 0, neg_insns, inv_insns;
4637 unsigned HOST_WIDE_INT temp1, temp2;
4638 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4639 struct four_ints *immediates;
4640 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4642 /* Find out which operations are safe for a given CODE. Also do a quick
4643 check for degenerate cases; these can occur when DImode operations
4644 are split. */
4645 switch (code)
4647 case SET:
4648 can_invert = 1;
4649 break;
4651 case PLUS:
4652 can_negate = 1;
4653 break;
4655 case IOR:
4656 if (remainder == 0xffffffff)
4658 if (generate)
4659 emit_constant_insn (cond,
4660 gen_rtx_SET (target,
4661 GEN_INT (ARM_SIGN_EXTEND (val))));
4662 return 1;
4665 if (remainder == 0)
4667 if (reload_completed && rtx_equal_p (target, source))
4668 return 0;
4670 if (generate)
4671 emit_constant_insn (cond, gen_rtx_SET (target, source));
4672 return 1;
4674 break;
4676 case AND:
4677 if (remainder == 0)
4679 if (generate)
4680 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4681 return 1;
4683 if (remainder == 0xffffffff)
4685 if (reload_completed && rtx_equal_p (target, source))
4686 return 0;
4687 if (generate)
4688 emit_constant_insn (cond, gen_rtx_SET (target, source));
4689 return 1;
4691 can_invert = 1;
4692 break;
4694 case XOR:
4695 if (remainder == 0)
4697 if (reload_completed && rtx_equal_p (target, source))
4698 return 0;
4699 if (generate)
4700 emit_constant_insn (cond, gen_rtx_SET (target, source));
4701 return 1;
4704 if (remainder == 0xffffffff)
4706 if (generate)
4707 emit_constant_insn (cond,
4708 gen_rtx_SET (target,
4709 gen_rtx_NOT (mode, source)));
4710 return 1;
4712 final_invert = 1;
4713 break;
4715 case MINUS:
4716 /* We treat MINUS as (val - source), since (source - val) is always
4717 passed as (source + (-val)). */
4718 if (remainder == 0)
4720 if (generate)
4721 emit_constant_insn (cond,
4722 gen_rtx_SET (target,
4723 gen_rtx_NEG (mode, source)));
4724 return 1;
4726 if (const_ok_for_arm (val))
4728 if (generate)
4729 emit_constant_insn (cond,
4730 gen_rtx_SET (target,
4731 gen_rtx_MINUS (mode, GEN_INT (val),
4732 source)));
4733 return 1;
4736 break;
4738 default:
4739 gcc_unreachable ();
4742 /* If we can do it in one insn get out quickly. */
4743 if (const_ok_for_op (val, code))
4745 if (generate)
4746 emit_constant_insn (cond,
4747 gen_rtx_SET (target,
4748 (source
4749 ? gen_rtx_fmt_ee (code, mode, source,
4750 GEN_INT (val))
4751 : GEN_INT (val))));
4752 return 1;
4755 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4756 insn. */
4757 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4758 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4760 if (generate)
4762 if (mode == SImode && i == 16)
4763 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4764 smaller insn. */
4765 emit_constant_insn (cond,
4766 gen_zero_extendhisi2
4767 (target, gen_lowpart (HImode, source)));
4768 else
4769 /* Extz only supports SImode, but we can coerce the operands
4770 into that mode. */
4771 emit_constant_insn (cond,
4772 gen_extzv_t2 (gen_lowpart (SImode, target),
4773 gen_lowpart (SImode, source),
4774 GEN_INT (i), const0_rtx));
4777 return 1;
4780 /* Calculate a few attributes that may be useful for specific
4781 optimizations. */
4782 /* Count number of leading zeros. */
4783 for (i = 31; i >= 0; i--)
4785 if ((remainder & (1 << i)) == 0)
4786 clear_sign_bit_copies++;
4787 else
4788 break;
4791 /* Count number of leading 1's. */
4792 for (i = 31; i >= 0; i--)
4794 if ((remainder & (1 << i)) != 0)
4795 set_sign_bit_copies++;
4796 else
4797 break;
4800 /* Count number of trailing zero's. */
4801 for (i = 0; i <= 31; i++)
4803 if ((remainder & (1 << i)) == 0)
4804 clear_zero_bit_copies++;
4805 else
4806 break;
4809 /* Count number of trailing 1's. */
4810 for (i = 0; i <= 31; i++)
4812 if ((remainder & (1 << i)) != 0)
4813 set_zero_bit_copies++;
4814 else
4815 break;
4818 switch (code)
4820 case SET:
4821 /* See if we can do this by sign_extending a constant that is known
4822 to be negative. This is a good, way of doing it, since the shift
4823 may well merge into a subsequent insn. */
4824 if (set_sign_bit_copies > 1)
4826 if (const_ok_for_arm
4827 (temp1 = ARM_SIGN_EXTEND (remainder
4828 << (set_sign_bit_copies - 1))))
4830 if (generate)
4832 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4833 emit_constant_insn (cond,
4834 gen_rtx_SET (new_src, GEN_INT (temp1)));
4835 emit_constant_insn (cond,
4836 gen_ashrsi3 (target, new_src,
4837 GEN_INT (set_sign_bit_copies - 1)));
4839 return 2;
4841 /* For an inverted constant, we will need to set the low bits,
4842 these will be shifted out of harm's way. */
4843 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4844 if (const_ok_for_arm (~temp1))
4846 if (generate)
4848 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4849 emit_constant_insn (cond,
4850 gen_rtx_SET (new_src, GEN_INT (temp1)));
4851 emit_constant_insn (cond,
4852 gen_ashrsi3 (target, new_src,
4853 GEN_INT (set_sign_bit_copies - 1)));
4855 return 2;
4859 /* See if we can calculate the value as the difference between two
4860 valid immediates. */
4861 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4863 int topshift = clear_sign_bit_copies & ~1;
4865 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4866 & (0xff000000 >> topshift));
4868 /* If temp1 is zero, then that means the 9 most significant
4869 bits of remainder were 1 and we've caused it to overflow.
4870 When topshift is 0 we don't need to do anything since we
4871 can borrow from 'bit 32'. */
4872 if (temp1 == 0 && topshift != 0)
4873 temp1 = 0x80000000 >> (topshift - 1);
4875 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4877 if (const_ok_for_arm (temp2))
4879 if (generate)
4881 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4882 emit_constant_insn (cond,
4883 gen_rtx_SET (new_src, GEN_INT (temp1)));
4884 emit_constant_insn (cond,
4885 gen_addsi3 (target, new_src,
4886 GEN_INT (-temp2)));
4889 return 2;
4893 /* See if we can generate this by setting the bottom (or the top)
4894 16 bits, and then shifting these into the other half of the
4895 word. We only look for the simplest cases, to do more would cost
4896 too much. Be careful, however, not to generate this when the
4897 alternative would take fewer insns. */
4898 if (val & 0xffff0000)
4900 temp1 = remainder & 0xffff0000;
4901 temp2 = remainder & 0x0000ffff;
4903 /* Overlaps outside this range are best done using other methods. */
4904 for (i = 9; i < 24; i++)
4906 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4907 && !const_ok_for_arm (temp2))
4909 rtx new_src = (subtargets
4910 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4911 : target);
4912 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4913 source, subtargets, generate);
4914 source = new_src;
4915 if (generate)
4916 emit_constant_insn
4917 (cond,
4918 gen_rtx_SET
4919 (target,
4920 gen_rtx_IOR (mode,
4921 gen_rtx_ASHIFT (mode, source,
4922 GEN_INT (i)),
4923 source)));
4924 return insns + 1;
4928 /* Don't duplicate cases already considered. */
4929 for (i = 17; i < 24; i++)
4931 if (((temp1 | (temp1 >> i)) == remainder)
4932 && !const_ok_for_arm (temp1))
4934 rtx new_src = (subtargets
4935 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4936 : target);
4937 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4938 source, subtargets, generate);
4939 source = new_src;
4940 if (generate)
4941 emit_constant_insn
4942 (cond,
4943 gen_rtx_SET (target,
4944 gen_rtx_IOR
4945 (mode,
4946 gen_rtx_LSHIFTRT (mode, source,
4947 GEN_INT (i)),
4948 source)));
4949 return insns + 1;
4953 break;
4955 case IOR:
4956 case XOR:
4957 /* If we have IOR or XOR, and the constant can be loaded in a
4958 single instruction, and we can find a temporary to put it in,
4959 then this can be done in two instructions instead of 3-4. */
4960 if (subtargets
4961 /* TARGET can't be NULL if SUBTARGETS is 0 */
4962 || (reload_completed && !reg_mentioned_p (target, source)))
4964 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4966 if (generate)
4968 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4970 emit_constant_insn (cond,
4971 gen_rtx_SET (sub, GEN_INT (val)));
4972 emit_constant_insn (cond,
4973 gen_rtx_SET (target,
4974 gen_rtx_fmt_ee (code, mode,
4975 source, sub)));
4977 return 2;
4981 if (code == XOR)
4982 break;
4984 /* Convert.
4985 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4986 and the remainder 0s for e.g. 0xfff00000)
4987 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4989 This can be done in 2 instructions by using shifts with mov or mvn.
4990 e.g. for
4991 x = x | 0xfff00000;
4992 we generate.
4993 mvn r0, r0, asl #12
4994 mvn r0, r0, lsr #12 */
4995 if (set_sign_bit_copies > 8
4996 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4998 if (generate)
5000 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5001 rtx shift = GEN_INT (set_sign_bit_copies);
5003 emit_constant_insn
5004 (cond,
5005 gen_rtx_SET (sub,
5006 gen_rtx_NOT (mode,
5007 gen_rtx_ASHIFT (mode,
5008 source,
5009 shift))));
5010 emit_constant_insn
5011 (cond,
5012 gen_rtx_SET (target,
5013 gen_rtx_NOT (mode,
5014 gen_rtx_LSHIFTRT (mode, sub,
5015 shift))));
5017 return 2;
5020 /* Convert
5021 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5023 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5025 For eg. r0 = r0 | 0xfff
5026 mvn r0, r0, lsr #12
5027 mvn r0, r0, asl #12
5030 if (set_zero_bit_copies > 8
5031 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5033 if (generate)
5035 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5036 rtx shift = GEN_INT (set_zero_bit_copies);
5038 emit_constant_insn
5039 (cond,
5040 gen_rtx_SET (sub,
5041 gen_rtx_NOT (mode,
5042 gen_rtx_LSHIFTRT (mode,
5043 source,
5044 shift))));
5045 emit_constant_insn
5046 (cond,
5047 gen_rtx_SET (target,
5048 gen_rtx_NOT (mode,
5049 gen_rtx_ASHIFT (mode, sub,
5050 shift))));
5052 return 2;
5055 /* This will never be reached for Thumb2 because orn is a valid
5056 instruction. This is for Thumb1 and the ARM 32 bit cases.
5058 x = y | constant (such that ~constant is a valid constant)
5059 Transform this to
5060 x = ~(~y & ~constant).
5062 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5064 if (generate)
5066 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5067 emit_constant_insn (cond,
5068 gen_rtx_SET (sub,
5069 gen_rtx_NOT (mode, source)));
5070 source = sub;
5071 if (subtargets)
5072 sub = gen_reg_rtx (mode);
5073 emit_constant_insn (cond,
5074 gen_rtx_SET (sub,
5075 gen_rtx_AND (mode, source,
5076 GEN_INT (temp1))));
5077 emit_constant_insn (cond,
5078 gen_rtx_SET (target,
5079 gen_rtx_NOT (mode, sub)));
5081 return 3;
5083 break;
5085 case AND:
5086 /* See if two shifts will do 2 or more insn's worth of work. */
5087 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5089 HOST_WIDE_INT shift_mask = ((0xffffffff
5090 << (32 - clear_sign_bit_copies))
5091 & 0xffffffff);
5093 if ((remainder | shift_mask) != 0xffffffff)
5095 HOST_WIDE_INT new_val
5096 = ARM_SIGN_EXTEND (remainder | shift_mask);
5098 if (generate)
5100 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5101 insns = arm_gen_constant (AND, SImode, cond, new_val,
5102 new_src, source, subtargets, 1);
5103 source = new_src;
5105 else
5107 rtx targ = subtargets ? NULL_RTX : target;
5108 insns = arm_gen_constant (AND, mode, cond, new_val,
5109 targ, source, subtargets, 0);
5113 if (generate)
5115 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5116 rtx shift = GEN_INT (clear_sign_bit_copies);
5118 emit_insn (gen_ashlsi3 (new_src, source, shift));
5119 emit_insn (gen_lshrsi3 (target, new_src, shift));
5122 return insns + 2;
5125 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5127 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5129 if ((remainder | shift_mask) != 0xffffffff)
5131 HOST_WIDE_INT new_val
5132 = ARM_SIGN_EXTEND (remainder | shift_mask);
5133 if (generate)
5135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5137 insns = arm_gen_constant (AND, mode, cond, new_val,
5138 new_src, source, subtargets, 1);
5139 source = new_src;
5141 else
5143 rtx targ = subtargets ? NULL_RTX : target;
5145 insns = arm_gen_constant (AND, mode, cond, new_val,
5146 targ, source, subtargets, 0);
5150 if (generate)
5152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5153 rtx shift = GEN_INT (clear_zero_bit_copies);
5155 emit_insn (gen_lshrsi3 (new_src, source, shift));
5156 emit_insn (gen_ashlsi3 (target, new_src, shift));
5159 return insns + 2;
5162 break;
5164 default:
5165 break;
5168 /* Calculate what the instruction sequences would be if we generated it
5169 normally, negated, or inverted. */
5170 if (code == AND)
5171 /* AND cannot be split into multiple insns, so invert and use BIC. */
5172 insns = 99;
5173 else
5174 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5176 if (can_negate)
5177 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5178 &neg_immediates);
5179 else
5180 neg_insns = 99;
5182 if (can_invert || final_invert)
5183 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5184 &inv_immediates);
5185 else
5186 inv_insns = 99;
5188 immediates = &pos_immediates;
5190 /* Is the negated immediate sequence more efficient? */
5191 if (neg_insns < insns && neg_insns <= inv_insns)
5193 insns = neg_insns;
5194 immediates = &neg_immediates;
5196 else
5197 can_negate = 0;
5199 /* Is the inverted immediate sequence more efficient?
5200 We must allow for an extra NOT instruction for XOR operations, although
5201 there is some chance that the final 'mvn' will get optimized later. */
5202 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5204 insns = inv_insns;
5205 immediates = &inv_immediates;
5207 else
5209 can_invert = 0;
5210 final_invert = 0;
5213 /* Now output the chosen sequence as instructions. */
5214 if (generate)
5216 for (i = 0; i < insns; i++)
5218 rtx new_src, temp1_rtx;
5220 temp1 = immediates->i[i];
5222 if (code == SET || code == MINUS)
5223 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5224 else if ((final_invert || i < (insns - 1)) && subtargets)
5225 new_src = gen_reg_rtx (mode);
5226 else
5227 new_src = target;
5229 if (can_invert)
5230 temp1 = ~temp1;
5231 else if (can_negate)
5232 temp1 = -temp1;
5234 temp1 = trunc_int_for_mode (temp1, mode);
5235 temp1_rtx = GEN_INT (temp1);
5237 if (code == SET)
5239 else if (code == MINUS)
5240 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5241 else
5242 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5244 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5245 source = new_src;
5247 if (code == SET)
5249 can_negate = can_invert;
5250 can_invert = 0;
5251 code = PLUS;
5253 else if (code == MINUS)
5254 code = PLUS;
5258 if (final_invert)
5260 if (generate)
5261 emit_constant_insn (cond, gen_rtx_SET (target,
5262 gen_rtx_NOT (mode, source)));
5263 insns++;
5266 return insns;
5269 /* Canonicalize a comparison so that we are more likely to recognize it.
5270 This can be done for a few constant compares, where we can make the
5271 immediate value easier to load. */
5273 static void
5274 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5275 bool op0_preserve_value)
5277 machine_mode mode;
5278 unsigned HOST_WIDE_INT i, maxval;
5280 mode = GET_MODE (*op0);
5281 if (mode == VOIDmode)
5282 mode = GET_MODE (*op1);
5284 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5286 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5287 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5288 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5289 for GTU/LEU in Thumb mode. */
5290 if (mode == DImode)
5293 if (*code == GT || *code == LE
5294 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5296 /* Missing comparison. First try to use an available
5297 comparison. */
5298 if (CONST_INT_P (*op1))
5300 i = INTVAL (*op1);
5301 switch (*code)
5303 case GT:
5304 case LE:
5305 if (i != maxval
5306 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5308 *op1 = GEN_INT (i + 1);
5309 *code = *code == GT ? GE : LT;
5310 return;
5312 break;
5313 case GTU:
5314 case LEU:
5315 if (i != ~((unsigned HOST_WIDE_INT) 0)
5316 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5318 *op1 = GEN_INT (i + 1);
5319 *code = *code == GTU ? GEU : LTU;
5320 return;
5322 break;
5323 default:
5324 gcc_unreachable ();
5328 /* If that did not work, reverse the condition. */
5329 if (!op0_preserve_value)
5331 std::swap (*op0, *op1);
5332 *code = (int)swap_condition ((enum rtx_code)*code);
5335 return;
5338 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5339 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5340 to facilitate possible combining with a cmp into 'ands'. */
5341 if (mode == SImode
5342 && GET_CODE (*op0) == ZERO_EXTEND
5343 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5344 && GET_MODE (XEXP (*op0, 0)) == QImode
5345 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5346 && subreg_lowpart_p (XEXP (*op0, 0))
5347 && *op1 == const0_rtx)
5348 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5349 GEN_INT (255));
5351 /* Comparisons smaller than DImode. Only adjust comparisons against
5352 an out-of-range constant. */
5353 if (!CONST_INT_P (*op1)
5354 || const_ok_for_arm (INTVAL (*op1))
5355 || const_ok_for_arm (- INTVAL (*op1)))
5356 return;
5358 i = INTVAL (*op1);
5360 switch (*code)
5362 case EQ:
5363 case NE:
5364 return;
5366 case GT:
5367 case LE:
5368 if (i != maxval
5369 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5371 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5372 *code = *code == GT ? GE : LT;
5373 return;
5375 break;
5377 case GE:
5378 case LT:
5379 if (i != ~maxval
5380 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5382 *op1 = GEN_INT (i - 1);
5383 *code = *code == GE ? GT : LE;
5384 return;
5386 break;
5388 case GTU:
5389 case LEU:
5390 if (i != ~((unsigned HOST_WIDE_INT) 0)
5391 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5393 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5394 *code = *code == GTU ? GEU : LTU;
5395 return;
5397 break;
5399 case GEU:
5400 case LTU:
5401 if (i != 0
5402 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5404 *op1 = GEN_INT (i - 1);
5405 *code = *code == GEU ? GTU : LEU;
5406 return;
5408 break;
5410 default:
5411 gcc_unreachable ();
5416 /* Define how to find the value returned by a function. */
5418 static rtx
5419 arm_function_value(const_tree type, const_tree func,
5420 bool outgoing ATTRIBUTE_UNUSED)
5422 machine_mode mode;
5423 int unsignedp ATTRIBUTE_UNUSED;
5424 rtx r ATTRIBUTE_UNUSED;
5426 mode = TYPE_MODE (type);
5428 if (TARGET_AAPCS_BASED)
5429 return aapcs_allocate_return_reg (mode, type, func);
5431 /* Promote integer types. */
5432 if (INTEGRAL_TYPE_P (type))
5433 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5435 /* Promotes small structs returned in a register to full-word size
5436 for big-endian AAPCS. */
5437 if (arm_return_in_msb (type))
5439 HOST_WIDE_INT size = int_size_in_bytes (type);
5440 if (size % UNITS_PER_WORD != 0)
5442 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5443 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5447 return arm_libcall_value_1 (mode);
5450 /* libcall hashtable helpers. */
5452 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5454 static inline hashval_t hash (const rtx_def *);
5455 static inline bool equal (const rtx_def *, const rtx_def *);
5456 static inline void remove (rtx_def *);
5459 inline bool
5460 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5462 return rtx_equal_p (p1, p2);
5465 inline hashval_t
5466 libcall_hasher::hash (const rtx_def *p1)
5468 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5471 typedef hash_table<libcall_hasher> libcall_table_type;
5473 static void
5474 add_libcall (libcall_table_type *htab, rtx libcall)
5476 *htab->find_slot (libcall, INSERT) = libcall;
5479 static bool
5480 arm_libcall_uses_aapcs_base (const_rtx libcall)
5482 static bool init_done = false;
5483 static libcall_table_type *libcall_htab = NULL;
5485 if (!init_done)
5487 init_done = true;
5489 libcall_htab = new libcall_table_type (31);
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5492 add_libcall (libcall_htab,
5493 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5494 add_libcall (libcall_htab,
5495 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5496 add_libcall (libcall_htab,
5497 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5501 add_libcall (libcall_htab,
5502 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5503 add_libcall (libcall_htab,
5504 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5505 add_libcall (libcall_htab,
5506 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5514 add_libcall (libcall_htab,
5515 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5516 add_libcall (libcall_htab,
5517 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5518 add_libcall (libcall_htab,
5519 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5520 add_libcall (libcall_htab,
5521 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5522 add_libcall (libcall_htab,
5523 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5525 /* Values from double-precision helper functions are returned in core
5526 registers if the selected core only supports single-precision
5527 arithmetic, even if we are using the hard-float ABI. The same is
5528 true for single-precision helpers, but we will never be using the
5529 hard-float ABI on a CPU which doesn't support single-precision
5530 operations in hardware. */
5531 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5533 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5534 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5536 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5537 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5538 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5539 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5540 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5541 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5542 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5543 SFmode));
5544 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5545 DFmode));
5546 add_libcall (libcall_htab,
5547 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5550 return libcall && libcall_htab->find (libcall) != NULL;
5553 static rtx
5554 arm_libcall_value_1 (machine_mode mode)
5556 if (TARGET_AAPCS_BASED)
5557 return aapcs_libcall_value (mode);
5558 else if (TARGET_IWMMXT_ABI
5559 && arm_vector_mode_supported_p (mode))
5560 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5561 else
5562 return gen_rtx_REG (mode, ARG_REGISTER (1));
5565 /* Define how to find the value returned by a library function
5566 assuming the value has mode MODE. */
5568 static rtx
5569 arm_libcall_value (machine_mode mode, const_rtx libcall)
5571 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5572 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5574 /* The following libcalls return their result in integer registers,
5575 even though they return a floating point value. */
5576 if (arm_libcall_uses_aapcs_base (libcall))
5577 return gen_rtx_REG (mode, ARG_REGISTER(1));
5581 return arm_libcall_value_1 (mode);
5584 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5586 static bool
5587 arm_function_value_regno_p (const unsigned int regno)
5589 if (regno == ARG_REGISTER (1)
5590 || (TARGET_32BIT
5591 && TARGET_AAPCS_BASED
5592 && TARGET_HARD_FLOAT
5593 && regno == FIRST_VFP_REGNUM)
5594 || (TARGET_IWMMXT_ABI
5595 && regno == FIRST_IWMMXT_REGNUM))
5596 return true;
5598 return false;
5601 /* Determine the amount of memory needed to store the possible return
5602 registers of an untyped call. */
5604 arm_apply_result_size (void)
5606 int size = 16;
5608 if (TARGET_32BIT)
5610 if (TARGET_HARD_FLOAT_ABI)
5611 size += 32;
5612 if (TARGET_IWMMXT_ABI)
5613 size += 8;
5616 return size;
5619 /* Decide whether TYPE should be returned in memory (true)
5620 or in a register (false). FNTYPE is the type of the function making
5621 the call. */
5622 static bool
5623 arm_return_in_memory (const_tree type, const_tree fntype)
5625 HOST_WIDE_INT size;
5627 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5629 if (TARGET_AAPCS_BASED)
5631 /* Simple, non-aggregate types (ie not including vectors and
5632 complex) are always returned in a register (or registers).
5633 We don't care about which register here, so we can short-cut
5634 some of the detail. */
5635 if (!AGGREGATE_TYPE_P (type)
5636 && TREE_CODE (type) != VECTOR_TYPE
5637 && TREE_CODE (type) != COMPLEX_TYPE)
5638 return false;
5640 /* Any return value that is no larger than one word can be
5641 returned in r0. */
5642 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5643 return false;
5645 /* Check any available co-processors to see if they accept the
5646 type as a register candidate (VFP, for example, can return
5647 some aggregates in consecutive registers). These aren't
5648 available if the call is variadic. */
5649 if (aapcs_select_return_coproc (type, fntype) >= 0)
5650 return false;
5652 /* Vector values should be returned using ARM registers, not
5653 memory (unless they're over 16 bytes, which will break since
5654 we only have four call-clobbered registers to play with). */
5655 if (TREE_CODE (type) == VECTOR_TYPE)
5656 return (size < 0 || size > (4 * UNITS_PER_WORD));
5658 /* The rest go in memory. */
5659 return true;
5662 if (TREE_CODE (type) == VECTOR_TYPE)
5663 return (size < 0 || size > (4 * UNITS_PER_WORD));
5665 if (!AGGREGATE_TYPE_P (type) &&
5666 (TREE_CODE (type) != VECTOR_TYPE))
5667 /* All simple types are returned in registers. */
5668 return false;
5670 if (arm_abi != ARM_ABI_APCS)
5672 /* ATPCS and later return aggregate types in memory only if they are
5673 larger than a word (or are variable size). */
5674 return (size < 0 || size > UNITS_PER_WORD);
5677 /* For the arm-wince targets we choose to be compatible with Microsoft's
5678 ARM and Thumb compilers, which always return aggregates in memory. */
5679 #ifndef ARM_WINCE
5680 /* All structures/unions bigger than one word are returned in memory.
5681 Also catch the case where int_size_in_bytes returns -1. In this case
5682 the aggregate is either huge or of variable size, and in either case
5683 we will want to return it via memory and not in a register. */
5684 if (size < 0 || size > UNITS_PER_WORD)
5685 return true;
5687 if (TREE_CODE (type) == RECORD_TYPE)
5689 tree field;
5691 /* For a struct the APCS says that we only return in a register
5692 if the type is 'integer like' and every addressable element
5693 has an offset of zero. For practical purposes this means
5694 that the structure can have at most one non bit-field element
5695 and that this element must be the first one in the structure. */
5697 /* Find the first field, ignoring non FIELD_DECL things which will
5698 have been created by C++. */
5699 for (field = TYPE_FIELDS (type);
5700 field && TREE_CODE (field) != FIELD_DECL;
5701 field = DECL_CHAIN (field))
5702 continue;
5704 if (field == NULL)
5705 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5707 /* Check that the first field is valid for returning in a register. */
5709 /* ... Floats are not allowed */
5710 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5711 return true;
5713 /* ... Aggregates that are not themselves valid for returning in
5714 a register are not allowed. */
5715 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5716 return true;
5718 /* Now check the remaining fields, if any. Only bitfields are allowed,
5719 since they are not addressable. */
5720 for (field = DECL_CHAIN (field);
5721 field;
5722 field = DECL_CHAIN (field))
5724 if (TREE_CODE (field) != FIELD_DECL)
5725 continue;
5727 if (!DECL_BIT_FIELD_TYPE (field))
5728 return true;
5731 return false;
5734 if (TREE_CODE (type) == UNION_TYPE)
5736 tree field;
5738 /* Unions can be returned in registers if every element is
5739 integral, or can be returned in an integer register. */
5740 for (field = TYPE_FIELDS (type);
5741 field;
5742 field = DECL_CHAIN (field))
5744 if (TREE_CODE (field) != FIELD_DECL)
5745 continue;
5747 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5748 return true;
5750 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5751 return true;
5754 return false;
5756 #endif /* not ARM_WINCE */
5758 /* Return all other types in memory. */
5759 return true;
5762 const struct pcs_attribute_arg
5764 const char *arg;
5765 enum arm_pcs value;
5766 } pcs_attribute_args[] =
5768 {"aapcs", ARM_PCS_AAPCS},
5769 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5770 #if 0
5771 /* We could recognize these, but changes would be needed elsewhere
5772 * to implement them. */
5773 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5774 {"atpcs", ARM_PCS_ATPCS},
5775 {"apcs", ARM_PCS_APCS},
5776 #endif
5777 {NULL, ARM_PCS_UNKNOWN}
5780 static enum arm_pcs
5781 arm_pcs_from_attribute (tree attr)
5783 const struct pcs_attribute_arg *ptr;
5784 const char *arg;
5786 /* Get the value of the argument. */
5787 if (TREE_VALUE (attr) == NULL_TREE
5788 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5789 return ARM_PCS_UNKNOWN;
5791 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5793 /* Check it against the list of known arguments. */
5794 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5795 if (streq (arg, ptr->arg))
5796 return ptr->value;
5798 /* An unrecognized interrupt type. */
5799 return ARM_PCS_UNKNOWN;
5802 /* Get the PCS variant to use for this call. TYPE is the function's type
5803 specification, DECL is the specific declartion. DECL may be null if
5804 the call could be indirect or if this is a library call. */
5805 static enum arm_pcs
5806 arm_get_pcs_model (const_tree type, const_tree decl)
5808 bool user_convention = false;
5809 enum arm_pcs user_pcs = arm_pcs_default;
5810 tree attr;
5812 gcc_assert (type);
5814 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5815 if (attr)
5817 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5818 user_convention = true;
5821 if (TARGET_AAPCS_BASED)
5823 /* Detect varargs functions. These always use the base rules
5824 (no argument is ever a candidate for a co-processor
5825 register). */
5826 bool base_rules = stdarg_p (type);
5828 if (user_convention)
5830 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5831 sorry ("non-AAPCS derived PCS variant");
5832 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5833 error ("variadic functions must use the base AAPCS variant");
5836 if (base_rules)
5837 return ARM_PCS_AAPCS;
5838 else if (user_convention)
5839 return user_pcs;
5840 else if (decl && flag_unit_at_a_time)
5842 /* Local functions never leak outside this compilation unit,
5843 so we are free to use whatever conventions are
5844 appropriate. */
5845 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5846 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5847 if (i && i->local)
5848 return ARM_PCS_AAPCS_LOCAL;
5851 else if (user_convention && user_pcs != arm_pcs_default)
5852 sorry ("PCS variant");
5854 /* For everything else we use the target's default. */
5855 return arm_pcs_default;
5859 static void
5860 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5861 const_tree fntype ATTRIBUTE_UNUSED,
5862 rtx libcall ATTRIBUTE_UNUSED,
5863 const_tree fndecl ATTRIBUTE_UNUSED)
5865 /* Record the unallocated VFP registers. */
5866 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5867 pcum->aapcs_vfp_reg_alloc = 0;
5870 /* Walk down the type tree of TYPE counting consecutive base elements.
5871 If *MODEP is VOIDmode, then set it to the first valid floating point
5872 type. If a non-floating point type is found, or if a floating point
5873 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5874 otherwise return the count in the sub-tree. */
5875 static int
5876 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5878 machine_mode mode;
5879 HOST_WIDE_INT size;
5881 switch (TREE_CODE (type))
5883 case REAL_TYPE:
5884 mode = TYPE_MODE (type);
5885 if (mode != DFmode && mode != SFmode && mode != HFmode)
5886 return -1;
5888 if (*modep == VOIDmode)
5889 *modep = mode;
5891 if (*modep == mode)
5892 return 1;
5894 break;
5896 case COMPLEX_TYPE:
5897 mode = TYPE_MODE (TREE_TYPE (type));
5898 if (mode != DFmode && mode != SFmode)
5899 return -1;
5901 if (*modep == VOIDmode)
5902 *modep = mode;
5904 if (*modep == mode)
5905 return 2;
5907 break;
5909 case VECTOR_TYPE:
5910 /* Use V2SImode and V4SImode as representatives of all 64-bit
5911 and 128-bit vector types, whether or not those modes are
5912 supported with the present options. */
5913 size = int_size_in_bytes (type);
5914 switch (size)
5916 case 8:
5917 mode = V2SImode;
5918 break;
5919 case 16:
5920 mode = V4SImode;
5921 break;
5922 default:
5923 return -1;
5926 if (*modep == VOIDmode)
5927 *modep = mode;
5929 /* Vector modes are considered to be opaque: two vectors are
5930 equivalent for the purposes of being homogeneous aggregates
5931 if they are the same size. */
5932 if (*modep == mode)
5933 return 1;
5935 break;
5937 case ARRAY_TYPE:
5939 int count;
5940 tree index = TYPE_DOMAIN (type);
5942 /* Can't handle incomplete types nor sizes that are not
5943 fixed. */
5944 if (!COMPLETE_TYPE_P (type)
5945 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5946 return -1;
5948 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5949 if (count == -1
5950 || !index
5951 || !TYPE_MAX_VALUE (index)
5952 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5953 || !TYPE_MIN_VALUE (index)
5954 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5955 || count < 0)
5956 return -1;
5958 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5959 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5961 /* There must be no padding. */
5962 if (wi::to_wide (TYPE_SIZE (type))
5963 != count * GET_MODE_BITSIZE (*modep))
5964 return -1;
5966 return count;
5969 case RECORD_TYPE:
5971 int count = 0;
5972 int sub_count;
5973 tree field;
5975 /* Can't handle incomplete types nor sizes that are not
5976 fixed. */
5977 if (!COMPLETE_TYPE_P (type)
5978 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5979 return -1;
5981 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5983 if (TREE_CODE (field) != FIELD_DECL)
5984 continue;
5986 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5987 if (sub_count < 0)
5988 return -1;
5989 count += sub_count;
5992 /* There must be no padding. */
5993 if (wi::to_wide (TYPE_SIZE (type))
5994 != count * GET_MODE_BITSIZE (*modep))
5995 return -1;
5997 return count;
6000 case UNION_TYPE:
6001 case QUAL_UNION_TYPE:
6003 /* These aren't very interesting except in a degenerate case. */
6004 int count = 0;
6005 int sub_count;
6006 tree field;
6008 /* Can't handle incomplete types nor sizes that are not
6009 fixed. */
6010 if (!COMPLETE_TYPE_P (type)
6011 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6012 return -1;
6014 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6016 if (TREE_CODE (field) != FIELD_DECL)
6017 continue;
6019 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6020 if (sub_count < 0)
6021 return -1;
6022 count = count > sub_count ? count : sub_count;
6025 /* There must be no padding. */
6026 if (wi::to_wide (TYPE_SIZE (type))
6027 != count * GET_MODE_BITSIZE (*modep))
6028 return -1;
6030 return count;
6033 default:
6034 break;
6037 return -1;
6040 /* Return true if PCS_VARIANT should use VFP registers. */
6041 static bool
6042 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6044 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6046 static bool seen_thumb1_vfp = false;
6048 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6050 sorry ("Thumb-1 hard-float VFP ABI");
6051 /* sorry() is not immediately fatal, so only display this once. */
6052 seen_thumb1_vfp = true;
6055 return true;
6058 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6059 return false;
6061 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6062 (TARGET_VFP_DOUBLE || !is_double));
6065 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6066 suitable for passing or returning in VFP registers for the PCS
6067 variant selected. If it is, then *BASE_MODE is updated to contain
6068 a machine mode describing each element of the argument's type and
6069 *COUNT to hold the number of such elements. */
6070 static bool
6071 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6072 machine_mode mode, const_tree type,
6073 machine_mode *base_mode, int *count)
6075 machine_mode new_mode = VOIDmode;
6077 /* If we have the type information, prefer that to working things
6078 out from the mode. */
6079 if (type)
6081 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6083 if (ag_count > 0 && ag_count <= 4)
6084 *count = ag_count;
6085 else
6086 return false;
6088 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6089 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6090 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6092 *count = 1;
6093 new_mode = mode;
6095 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6097 *count = 2;
6098 new_mode = (mode == DCmode ? DFmode : SFmode);
6100 else
6101 return false;
6104 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6105 return false;
6107 *base_mode = new_mode;
6108 return true;
6111 static bool
6112 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6113 machine_mode mode, const_tree type)
6115 int count ATTRIBUTE_UNUSED;
6116 machine_mode ag_mode ATTRIBUTE_UNUSED;
6118 if (!use_vfp_abi (pcs_variant, false))
6119 return false;
6120 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6121 &ag_mode, &count);
6124 static bool
6125 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6126 const_tree type)
6128 if (!use_vfp_abi (pcum->pcs_variant, false))
6129 return false;
6131 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6132 &pcum->aapcs_vfp_rmode,
6133 &pcum->aapcs_vfp_rcount);
6136 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6137 for the behaviour of this function. */
6139 static bool
6140 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6141 const_tree type ATTRIBUTE_UNUSED)
6143 int rmode_size
6144 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6145 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6146 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6147 int regno;
6149 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6150 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6152 pcum->aapcs_vfp_reg_alloc = mask << regno;
6153 if (mode == BLKmode
6154 || (mode == TImode && ! TARGET_NEON)
6155 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6157 int i;
6158 int rcount = pcum->aapcs_vfp_rcount;
6159 int rshift = shift;
6160 machine_mode rmode = pcum->aapcs_vfp_rmode;
6161 rtx par;
6162 if (!TARGET_NEON)
6164 /* Avoid using unsupported vector modes. */
6165 if (rmode == V2SImode)
6166 rmode = DImode;
6167 else if (rmode == V4SImode)
6169 rmode = DImode;
6170 rcount *= 2;
6171 rshift /= 2;
6174 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6175 for (i = 0; i < rcount; i++)
6177 rtx tmp = gen_rtx_REG (rmode,
6178 FIRST_VFP_REGNUM + regno + i * rshift);
6179 tmp = gen_rtx_EXPR_LIST
6180 (VOIDmode, tmp,
6181 GEN_INT (i * GET_MODE_SIZE (rmode)));
6182 XVECEXP (par, 0, i) = tmp;
6185 pcum->aapcs_reg = par;
6187 else
6188 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6189 return true;
6191 return false;
6194 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6195 comment there for the behaviour of this function. */
6197 static rtx
6198 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6199 machine_mode mode,
6200 const_tree type ATTRIBUTE_UNUSED)
6202 if (!use_vfp_abi (pcs_variant, false))
6203 return NULL;
6205 if (mode == BLKmode
6206 || (GET_MODE_CLASS (mode) == MODE_INT
6207 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6208 && !TARGET_NEON))
6210 int count;
6211 machine_mode ag_mode;
6212 int i;
6213 rtx par;
6214 int shift;
6216 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6217 &ag_mode, &count);
6219 if (!TARGET_NEON)
6221 if (ag_mode == V2SImode)
6222 ag_mode = DImode;
6223 else if (ag_mode == V4SImode)
6225 ag_mode = DImode;
6226 count *= 2;
6229 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6230 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6231 for (i = 0; i < count; i++)
6233 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6234 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6235 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6236 XVECEXP (par, 0, i) = tmp;
6239 return par;
6242 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6245 static void
6246 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6247 machine_mode mode ATTRIBUTE_UNUSED,
6248 const_tree type ATTRIBUTE_UNUSED)
6250 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6251 pcum->aapcs_vfp_reg_alloc = 0;
6252 return;
6255 #define AAPCS_CP(X) \
6257 aapcs_ ## X ## _cum_init, \
6258 aapcs_ ## X ## _is_call_candidate, \
6259 aapcs_ ## X ## _allocate, \
6260 aapcs_ ## X ## _is_return_candidate, \
6261 aapcs_ ## X ## _allocate_return_reg, \
6262 aapcs_ ## X ## _advance \
6265 /* Table of co-processors that can be used to pass arguments in
6266 registers. Idealy no arugment should be a candidate for more than
6267 one co-processor table entry, but the table is processed in order
6268 and stops after the first match. If that entry then fails to put
6269 the argument into a co-processor register, the argument will go on
6270 the stack. */
6271 static struct
6273 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6274 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6276 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6277 BLKmode) is a candidate for this co-processor's registers; this
6278 function should ignore any position-dependent state in
6279 CUMULATIVE_ARGS and only use call-type dependent information. */
6280 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282 /* Return true if the argument does get a co-processor register; it
6283 should set aapcs_reg to an RTX of the register allocated as is
6284 required for a return from FUNCTION_ARG. */
6285 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6287 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6288 be returned in this co-processor's registers. */
6289 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6291 /* Allocate and return an RTX element to hold the return type of a call. This
6292 routine must not fail and will only be called if is_return_candidate
6293 returned true with the same parameters. */
6294 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6296 /* Finish processing this argument and prepare to start processing
6297 the next one. */
6298 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6299 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6301 AAPCS_CP(vfp)
6304 #undef AAPCS_CP
6306 static int
6307 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6308 const_tree type)
6310 int i;
6312 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6313 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6314 return i;
6316 return -1;
6319 static int
6320 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6322 /* We aren't passed a decl, so we can't check that a call is local.
6323 However, it isn't clear that that would be a win anyway, since it
6324 might limit some tail-calling opportunities. */
6325 enum arm_pcs pcs_variant;
6327 if (fntype)
6329 const_tree fndecl = NULL_TREE;
6331 if (TREE_CODE (fntype) == FUNCTION_DECL)
6333 fndecl = fntype;
6334 fntype = TREE_TYPE (fntype);
6337 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6339 else
6340 pcs_variant = arm_pcs_default;
6342 if (pcs_variant != ARM_PCS_AAPCS)
6344 int i;
6346 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6347 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6348 TYPE_MODE (type),
6349 type))
6350 return i;
6352 return -1;
6355 static rtx
6356 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6357 const_tree fntype)
6359 /* We aren't passed a decl, so we can't check that a call is local.
6360 However, it isn't clear that that would be a win anyway, since it
6361 might limit some tail-calling opportunities. */
6362 enum arm_pcs pcs_variant;
6363 int unsignedp ATTRIBUTE_UNUSED;
6365 if (fntype)
6367 const_tree fndecl = NULL_TREE;
6369 if (TREE_CODE (fntype) == FUNCTION_DECL)
6371 fndecl = fntype;
6372 fntype = TREE_TYPE (fntype);
6375 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6377 else
6378 pcs_variant = arm_pcs_default;
6380 /* Promote integer types. */
6381 if (type && INTEGRAL_TYPE_P (type))
6382 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6384 if (pcs_variant != ARM_PCS_AAPCS)
6386 int i;
6388 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6389 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6390 type))
6391 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6392 mode, type);
6395 /* Promotes small structs returned in a register to full-word size
6396 for big-endian AAPCS. */
6397 if (type && arm_return_in_msb (type))
6399 HOST_WIDE_INT size = int_size_in_bytes (type);
6400 if (size % UNITS_PER_WORD != 0)
6402 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6403 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6407 return gen_rtx_REG (mode, R0_REGNUM);
6410 static rtx
6411 aapcs_libcall_value (machine_mode mode)
6413 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6414 && GET_MODE_SIZE (mode) <= 4)
6415 mode = SImode;
6417 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6420 /* Lay out a function argument using the AAPCS rules. The rule
6421 numbers referred to here are those in the AAPCS. */
6422 static void
6423 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6424 const_tree type, bool named)
6426 int nregs, nregs2;
6427 int ncrn;
6429 /* We only need to do this once per argument. */
6430 if (pcum->aapcs_arg_processed)
6431 return;
6433 pcum->aapcs_arg_processed = true;
6435 /* Special case: if named is false then we are handling an incoming
6436 anonymous argument which is on the stack. */
6437 if (!named)
6438 return;
6440 /* Is this a potential co-processor register candidate? */
6441 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6443 int slot = aapcs_select_call_coproc (pcum, mode, type);
6444 pcum->aapcs_cprc_slot = slot;
6446 /* We don't have to apply any of the rules from part B of the
6447 preparation phase, these are handled elsewhere in the
6448 compiler. */
6450 if (slot >= 0)
6452 /* A Co-processor register candidate goes either in its own
6453 class of registers or on the stack. */
6454 if (!pcum->aapcs_cprc_failed[slot])
6456 /* C1.cp - Try to allocate the argument to co-processor
6457 registers. */
6458 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6459 return;
6461 /* C2.cp - Put the argument on the stack and note that we
6462 can't assign any more candidates in this slot. We also
6463 need to note that we have allocated stack space, so that
6464 we won't later try to split a non-cprc candidate between
6465 core registers and the stack. */
6466 pcum->aapcs_cprc_failed[slot] = true;
6467 pcum->can_split = false;
6470 /* We didn't get a register, so this argument goes on the
6471 stack. */
6472 gcc_assert (pcum->can_split == false);
6473 return;
6477 /* C3 - For double-word aligned arguments, round the NCRN up to the
6478 next even number. */
6479 ncrn = pcum->aapcs_ncrn;
6480 if (ncrn & 1)
6482 int res = arm_needs_doubleword_align (mode, type);
6483 /* Only warn during RTL expansion of call stmts, otherwise we would
6484 warn e.g. during gimplification even on functions that will be
6485 always inlined, and we'd warn multiple times. Don't warn when
6486 called in expand_function_start either, as we warn instead in
6487 arm_function_arg_boundary in that case. */
6488 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6489 inform (input_location, "parameter passing for argument of type "
6490 "%qT changed in GCC 7.1", type);
6491 else if (res > 0)
6492 ncrn++;
6495 nregs = ARM_NUM_REGS2(mode, type);
6497 /* Sigh, this test should really assert that nregs > 0, but a GCC
6498 extension allows empty structs and then gives them empty size; it
6499 then allows such a structure to be passed by value. For some of
6500 the code below we have to pretend that such an argument has
6501 non-zero size so that we 'locate' it correctly either in
6502 registers or on the stack. */
6503 gcc_assert (nregs >= 0);
6505 nregs2 = nregs ? nregs : 1;
6507 /* C4 - Argument fits entirely in core registers. */
6508 if (ncrn + nregs2 <= NUM_ARG_REGS)
6510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6511 pcum->aapcs_next_ncrn = ncrn + nregs;
6512 return;
6515 /* C5 - Some core registers left and there are no arguments already
6516 on the stack: split this argument between the remaining core
6517 registers and the stack. */
6518 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6520 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6522 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6523 return;
6526 /* C6 - NCRN is set to 4. */
6527 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6530 return;
6533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6534 for a call to a function whose data type is FNTYPE.
6535 For a library call, FNTYPE is NULL. */
6536 void
6537 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6538 rtx libname,
6539 tree fndecl ATTRIBUTE_UNUSED)
6541 /* Long call handling. */
6542 if (fntype)
6543 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6544 else
6545 pcum->pcs_variant = arm_pcs_default;
6547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6549 if (arm_libcall_uses_aapcs_base (libname))
6550 pcum->pcs_variant = ARM_PCS_AAPCS;
6552 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6553 pcum->aapcs_reg = NULL_RTX;
6554 pcum->aapcs_partial = 0;
6555 pcum->aapcs_arg_processed = false;
6556 pcum->aapcs_cprc_slot = -1;
6557 pcum->can_split = true;
6559 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6561 int i;
6563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6565 pcum->aapcs_cprc_failed[i] = false;
6566 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6569 return;
6572 /* Legacy ABIs */
6574 /* On the ARM, the offset starts at 0. */
6575 pcum->nregs = 0;
6576 pcum->iwmmxt_nregs = 0;
6577 pcum->can_split = true;
6579 /* Varargs vectors are treated the same as long long.
6580 named_count avoids having to change the way arm handles 'named' */
6581 pcum->named_count = 0;
6582 pcum->nargs = 0;
6584 if (TARGET_REALLY_IWMMXT && fntype)
6586 tree fn_arg;
6588 for (fn_arg = TYPE_ARG_TYPES (fntype);
6589 fn_arg;
6590 fn_arg = TREE_CHAIN (fn_arg))
6591 pcum->named_count += 1;
6593 if (! pcum->named_count)
6594 pcum->named_count = INT_MAX;
6598 /* Return 1 if double word alignment is required for argument passing.
6599 Return -1 if double word alignment used to be required for argument
6600 passing before PR77728 ABI fix, but is not required anymore.
6601 Return 0 if double word alignment is not required and wasn't requried
6602 before either. */
6603 static int
6604 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6606 if (!type)
6607 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6609 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6610 if (!AGGREGATE_TYPE_P (type))
6611 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6613 /* Array types: Use member alignment of element type. */
6614 if (TREE_CODE (type) == ARRAY_TYPE)
6615 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6617 int ret = 0;
6618 /* Record/aggregate types: Use greatest member alignment of any member. */
6619 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6620 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6622 if (TREE_CODE (field) == FIELD_DECL)
6623 return 1;
6624 else
6625 /* Before PR77728 fix, we were incorrectly considering also
6626 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6627 Make sure we can warn about that with -Wpsabi. */
6628 ret = -1;
6631 return ret;
6635 /* Determine where to put an argument to a function.
6636 Value is zero to push the argument on the stack,
6637 or a hard register in which to store the argument.
6639 MODE is the argument's machine mode.
6640 TYPE is the data type of the argument (as a tree).
6641 This is null for libcalls where that information may
6642 not be available.
6643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6644 the preceding args and about the function being called.
6645 NAMED is nonzero if this argument is a named parameter
6646 (otherwise it is an extra parameter matching an ellipsis).
6648 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6649 other arguments are passed on the stack. If (NAMED == 0) (which happens
6650 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6651 defined), say it is passed in the stack (function_prologue will
6652 indeed make it pass in the stack if necessary). */
6654 static rtx
6655 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6656 const_tree type, bool named)
6658 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6659 int nregs;
6661 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6662 a call insn (op3 of a call_value insn). */
6663 if (mode == VOIDmode)
6664 return const0_rtx;
6666 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668 aapcs_layout_arg (pcum, mode, type, named);
6669 return pcum->aapcs_reg;
6672 /* Varargs vectors are treated the same as long long.
6673 named_count avoids having to change the way arm handles 'named' */
6674 if (TARGET_IWMMXT_ABI
6675 && arm_vector_mode_supported_p (mode)
6676 && pcum->named_count > pcum->nargs + 1)
6678 if (pcum->iwmmxt_nregs <= 9)
6679 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6680 else
6682 pcum->can_split = false;
6683 return NULL_RTX;
6687 /* Put doubleword aligned quantities in even register pairs. */
6688 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6690 int res = arm_needs_doubleword_align (mode, type);
6691 if (res < 0 && warn_psabi)
6692 inform (input_location, "parameter passing for argument of type "
6693 "%qT changed in GCC 7.1", type);
6694 else if (res > 0)
6695 pcum->nregs++;
6698 /* Only allow splitting an arg between regs and memory if all preceding
6699 args were allocated to regs. For args passed by reference we only count
6700 the reference pointer. */
6701 if (pcum->can_split)
6702 nregs = 1;
6703 else
6704 nregs = ARM_NUM_REGS2 (mode, type);
6706 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6707 return NULL_RTX;
6709 return gen_rtx_REG (mode, pcum->nregs);
6712 static unsigned int
6713 arm_function_arg_boundary (machine_mode mode, const_tree type)
6715 if (!ARM_DOUBLEWORD_ALIGN)
6716 return PARM_BOUNDARY;
6718 int res = arm_needs_doubleword_align (mode, type);
6719 if (res < 0 && warn_psabi)
6720 inform (input_location, "parameter passing for argument of type %qT "
6721 "changed in GCC 7.1", type);
6723 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6726 static int
6727 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6728 tree type, bool named)
6730 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6731 int nregs = pcum->nregs;
6733 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6735 aapcs_layout_arg (pcum, mode, type, named);
6736 return pcum->aapcs_partial;
6739 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6740 return 0;
6742 if (NUM_ARG_REGS > nregs
6743 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6744 && pcum->can_split)
6745 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6747 return 0;
6750 /* Update the data in PCUM to advance over an argument
6751 of mode MODE and data type TYPE.
6752 (TYPE is null for libcalls where that information may not be available.) */
6754 static void
6755 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6756 const_tree type, bool named)
6758 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6760 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6762 aapcs_layout_arg (pcum, mode, type, named);
6764 if (pcum->aapcs_cprc_slot >= 0)
6766 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6767 type);
6768 pcum->aapcs_cprc_slot = -1;
6771 /* Generic stuff. */
6772 pcum->aapcs_arg_processed = false;
6773 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6774 pcum->aapcs_reg = NULL_RTX;
6775 pcum->aapcs_partial = 0;
6777 else
6779 pcum->nargs += 1;
6780 if (arm_vector_mode_supported_p (mode)
6781 && pcum->named_count > pcum->nargs
6782 && TARGET_IWMMXT_ABI)
6783 pcum->iwmmxt_nregs += 1;
6784 else
6785 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6789 /* Variable sized types are passed by reference. This is a GCC
6790 extension to the ARM ABI. */
6792 static bool
6793 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6794 machine_mode mode ATTRIBUTE_UNUSED,
6795 const_tree type, bool named ATTRIBUTE_UNUSED)
6797 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6800 /* Encode the current state of the #pragma [no_]long_calls. */
6801 typedef enum
6803 OFF, /* No #pragma [no_]long_calls is in effect. */
6804 LONG, /* #pragma long_calls is in effect. */
6805 SHORT /* #pragma no_long_calls is in effect. */
6806 } arm_pragma_enum;
6808 static arm_pragma_enum arm_pragma_long_calls = OFF;
6810 void
6811 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6813 arm_pragma_long_calls = LONG;
6816 void
6817 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6819 arm_pragma_long_calls = SHORT;
6822 void
6823 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6825 arm_pragma_long_calls = OFF;
6828 /* Handle an attribute requiring a FUNCTION_DECL;
6829 arguments as in struct attribute_spec.handler. */
6830 static tree
6831 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6832 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6834 if (TREE_CODE (*node) != FUNCTION_DECL)
6836 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6837 name);
6838 *no_add_attrs = true;
6841 return NULL_TREE;
6844 /* Handle an "interrupt" or "isr" attribute;
6845 arguments as in struct attribute_spec.handler. */
6846 static tree
6847 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6848 bool *no_add_attrs)
6850 if (DECL_P (*node))
6852 if (TREE_CODE (*node) != FUNCTION_DECL)
6854 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6855 name);
6856 *no_add_attrs = true;
6858 /* FIXME: the argument if any is checked for type attributes;
6859 should it be checked for decl ones? */
6861 else
6863 if (TREE_CODE (*node) == FUNCTION_TYPE
6864 || TREE_CODE (*node) == METHOD_TYPE)
6866 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6868 warning (OPT_Wattributes, "%qE attribute ignored",
6869 name);
6870 *no_add_attrs = true;
6873 else if (TREE_CODE (*node) == POINTER_TYPE
6874 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6875 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6876 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6878 *node = build_variant_type_copy (*node);
6879 TREE_TYPE (*node) = build_type_attribute_variant
6880 (TREE_TYPE (*node),
6881 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6882 *no_add_attrs = true;
6884 else
6886 /* Possibly pass this attribute on from the type to a decl. */
6887 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6888 | (int) ATTR_FLAG_FUNCTION_NEXT
6889 | (int) ATTR_FLAG_ARRAY_NEXT))
6891 *no_add_attrs = true;
6892 return tree_cons (name, args, NULL_TREE);
6894 else
6896 warning (OPT_Wattributes, "%qE attribute ignored",
6897 name);
6902 return NULL_TREE;
6905 /* Handle a "pcs" attribute; arguments as in struct
6906 attribute_spec.handler. */
6907 static tree
6908 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6909 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6911 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6913 warning (OPT_Wattributes, "%qE attribute ignored", name);
6914 *no_add_attrs = true;
6916 return NULL_TREE;
6919 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6920 /* Handle the "notshared" attribute. This attribute is another way of
6921 requesting hidden visibility. ARM's compiler supports
6922 "__declspec(notshared)"; we support the same thing via an
6923 attribute. */
6925 static tree
6926 arm_handle_notshared_attribute (tree *node,
6927 tree name ATTRIBUTE_UNUSED,
6928 tree args ATTRIBUTE_UNUSED,
6929 int flags ATTRIBUTE_UNUSED,
6930 bool *no_add_attrs)
6932 tree decl = TYPE_NAME (*node);
6934 if (decl)
6936 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6937 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6938 *no_add_attrs = false;
6940 return NULL_TREE;
6942 #endif
6944 /* This function returns true if a function with declaration FNDECL and type
6945 FNTYPE uses the stack to pass arguments or return variables and false
6946 otherwise. This is used for functions with the attributes
6947 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6948 diagnostic messages if the stack is used. NAME is the name of the attribute
6949 used. */
6951 static bool
6952 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6954 function_args_iterator args_iter;
6955 CUMULATIVE_ARGS args_so_far_v;
6956 cumulative_args_t args_so_far;
6957 bool first_param = true;
6958 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6960 /* Error out if any argument is passed on the stack. */
6961 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6962 args_so_far = pack_cumulative_args (&args_so_far_v);
6963 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6965 rtx arg_rtx;
6966 machine_mode arg_mode = TYPE_MODE (arg_type);
6968 prev_arg_type = arg_type;
6969 if (VOID_TYPE_P (arg_type))
6970 continue;
6972 if (!first_param)
6973 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6974 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6975 if (!arg_rtx
6976 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6978 error ("%qE attribute not available to functions with arguments "
6979 "passed on the stack", name);
6980 return true;
6982 first_param = false;
6985 /* Error out for variadic functions since we cannot control how many
6986 arguments will be passed and thus stack could be used. stdarg_p () is not
6987 used for the checking to avoid browsing arguments twice. */
6988 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6990 error ("%qE attribute not available to functions with variable number "
6991 "of arguments", name);
6992 return true;
6995 /* Error out if return value is passed on the stack. */
6996 ret_type = TREE_TYPE (fntype);
6997 if (arm_return_in_memory (ret_type, fntype))
6999 error ("%qE attribute not available to functions that return value on "
7000 "the stack", name);
7001 return true;
7003 return false;
7006 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7007 function will check whether the attribute is allowed here and will add the
7008 attribute to the function declaration tree or otherwise issue a warning. */
7010 static tree
7011 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7012 tree /* args */,
7013 int /* flags */,
7014 bool *no_add_attrs)
7016 tree fndecl;
7018 if (!use_cmse)
7020 *no_add_attrs = true;
7021 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7022 name);
7023 return NULL_TREE;
7026 /* Ignore attribute for function types. */
7027 if (TREE_CODE (*node) != FUNCTION_DECL)
7029 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7030 name);
7031 *no_add_attrs = true;
7032 return NULL_TREE;
7035 fndecl = *node;
7037 /* Warn for static linkage functions. */
7038 if (!TREE_PUBLIC (fndecl))
7040 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7041 "with static linkage", name);
7042 *no_add_attrs = true;
7043 return NULL_TREE;
7046 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7047 TREE_TYPE (fndecl));
7048 return NULL_TREE;
7052 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7053 function will check whether the attribute is allowed here and will add the
7054 attribute to the function type tree or otherwise issue a diagnostic. The
7055 reason we check this at declaration time is to only allow the use of the
7056 attribute with declarations of function pointers and not function
7057 declarations. This function checks NODE is of the expected type and issues
7058 diagnostics otherwise using NAME. If it is not of the expected type
7059 *NO_ADD_ATTRS will be set to true. */
7061 static tree
7062 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7063 tree /* args */,
7064 int /* flags */,
7065 bool *no_add_attrs)
7067 tree decl = NULL_TREE, fntype = NULL_TREE;
7068 tree type;
7070 if (!use_cmse)
7072 *no_add_attrs = true;
7073 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7074 name);
7075 return NULL_TREE;
7078 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7080 decl = *node;
7081 fntype = TREE_TYPE (decl);
7084 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7085 fntype = TREE_TYPE (fntype);
7087 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7089 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7090 "function pointer", name);
7091 *no_add_attrs = true;
7092 return NULL_TREE;
7095 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7097 if (*no_add_attrs)
7098 return NULL_TREE;
7100 /* Prevent trees being shared among function types with and without
7101 cmse_nonsecure_call attribute. */
7102 type = TREE_TYPE (decl);
7104 type = build_distinct_type_copy (type);
7105 TREE_TYPE (decl) = type;
7106 fntype = type;
7108 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7110 type = fntype;
7111 fntype = TREE_TYPE (fntype);
7112 fntype = build_distinct_type_copy (fntype);
7113 TREE_TYPE (type) = fntype;
7116 /* Construct a type attribute and add it to the function type. */
7117 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7118 TYPE_ATTRIBUTES (fntype));
7119 TYPE_ATTRIBUTES (fntype) = attrs;
7120 return NULL_TREE;
7123 /* Return 0 if the attributes for two types are incompatible, 1 if they
7124 are compatible, and 2 if they are nearly compatible (which causes a
7125 warning to be generated). */
7126 static int
7127 arm_comp_type_attributes (const_tree type1, const_tree type2)
7129 int l1, l2, s1, s2;
7131 /* Check for mismatch of non-default calling convention. */
7132 if (TREE_CODE (type1) != FUNCTION_TYPE)
7133 return 1;
7135 /* Check for mismatched call attributes. */
7136 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7137 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7138 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7139 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7141 /* Only bother to check if an attribute is defined. */
7142 if (l1 | l2 | s1 | s2)
7144 /* If one type has an attribute, the other must have the same attribute. */
7145 if ((l1 != l2) || (s1 != s2))
7146 return 0;
7148 /* Disallow mixed attributes. */
7149 if ((l1 & s2) || (l2 & s1))
7150 return 0;
7153 /* Check for mismatched ISR attribute. */
7154 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7155 if (! l1)
7156 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7157 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7158 if (! l2)
7159 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7160 if (l1 != l2)
7161 return 0;
7163 l1 = lookup_attribute ("cmse_nonsecure_call",
7164 TYPE_ATTRIBUTES (type1)) != NULL;
7165 l2 = lookup_attribute ("cmse_nonsecure_call",
7166 TYPE_ATTRIBUTES (type2)) != NULL;
7168 if (l1 != l2)
7169 return 0;
7171 return 1;
7174 /* Assigns default attributes to newly defined type. This is used to
7175 set short_call/long_call attributes for function types of
7176 functions defined inside corresponding #pragma scopes. */
7177 static void
7178 arm_set_default_type_attributes (tree type)
7180 /* Add __attribute__ ((long_call)) to all functions, when
7181 inside #pragma long_calls or __attribute__ ((short_call)),
7182 when inside #pragma no_long_calls. */
7183 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7185 tree type_attr_list, attr_name;
7186 type_attr_list = TYPE_ATTRIBUTES (type);
7188 if (arm_pragma_long_calls == LONG)
7189 attr_name = get_identifier ("long_call");
7190 else if (arm_pragma_long_calls == SHORT)
7191 attr_name = get_identifier ("short_call");
7192 else
7193 return;
7195 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7196 TYPE_ATTRIBUTES (type) = type_attr_list;
7200 /* Return true if DECL is known to be linked into section SECTION. */
7202 static bool
7203 arm_function_in_section_p (tree decl, section *section)
7205 /* We can only be certain about the prevailing symbol definition. */
7206 if (!decl_binds_to_current_def_p (decl))
7207 return false;
7209 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7210 if (!DECL_SECTION_NAME (decl))
7212 /* Make sure that we will not create a unique section for DECL. */
7213 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7214 return false;
7217 return function_section (decl) == section;
7220 /* Return nonzero if a 32-bit "long_call" should be generated for
7221 a call from the current function to DECL. We generate a long_call
7222 if the function:
7224 a. has an __attribute__((long call))
7225 or b. is within the scope of a #pragma long_calls
7226 or c. the -mlong-calls command line switch has been specified
7228 However we do not generate a long call if the function:
7230 d. has an __attribute__ ((short_call))
7231 or e. is inside the scope of a #pragma no_long_calls
7232 or f. is defined in the same section as the current function. */
7234 bool
7235 arm_is_long_call_p (tree decl)
7237 tree attrs;
7239 if (!decl)
7240 return TARGET_LONG_CALLS;
7242 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7243 if (lookup_attribute ("short_call", attrs))
7244 return false;
7246 /* For "f", be conservative, and only cater for cases in which the
7247 whole of the current function is placed in the same section. */
7248 if (!flag_reorder_blocks_and_partition
7249 && TREE_CODE (decl) == FUNCTION_DECL
7250 && arm_function_in_section_p (decl, current_function_section ()))
7251 return false;
7253 if (lookup_attribute ("long_call", attrs))
7254 return true;
7256 return TARGET_LONG_CALLS;
7259 /* Return nonzero if it is ok to make a tail-call to DECL. */
7260 static bool
7261 arm_function_ok_for_sibcall (tree decl, tree exp)
7263 unsigned long func_type;
7265 if (cfun->machine->sibcall_blocked)
7266 return false;
7268 /* Never tailcall something if we are generating code for Thumb-1. */
7269 if (TARGET_THUMB1)
7270 return false;
7272 /* The PIC register is live on entry to VxWorks PLT entries, so we
7273 must make the call before restoring the PIC register. */
7274 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7275 return false;
7277 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7278 may be used both as target of the call and base register for restoring
7279 the VFP registers */
7280 if (TARGET_APCS_FRAME && TARGET_ARM
7281 && TARGET_HARD_FLOAT
7282 && decl && arm_is_long_call_p (decl))
7283 return false;
7285 /* If we are interworking and the function is not declared static
7286 then we can't tail-call it unless we know that it exists in this
7287 compilation unit (since it might be a Thumb routine). */
7288 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7289 && !TREE_ASM_WRITTEN (decl))
7290 return false;
7292 func_type = arm_current_func_type ();
7293 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7294 if (IS_INTERRUPT (func_type))
7295 return false;
7297 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7298 generated for entry functions themselves. */
7299 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7300 return false;
7302 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7303 this would complicate matters for later code generation. */
7304 if (TREE_CODE (exp) == CALL_EXPR)
7306 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7307 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7308 return false;
7311 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7313 /* Check that the return value locations are the same. For
7314 example that we aren't returning a value from the sibling in
7315 a VFP register but then need to transfer it to a core
7316 register. */
7317 rtx a, b;
7318 tree decl_or_type = decl;
7320 /* If it is an indirect function pointer, get the function type. */
7321 if (!decl)
7322 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7324 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7325 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7326 cfun->decl, false);
7327 if (!rtx_equal_p (a, b))
7328 return false;
7331 /* Never tailcall if function may be called with a misaligned SP. */
7332 if (IS_STACKALIGN (func_type))
7333 return false;
7335 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7336 references should become a NOP. Don't convert such calls into
7337 sibling calls. */
7338 if (TARGET_AAPCS_BASED
7339 && arm_abi == ARM_ABI_AAPCS
7340 && decl
7341 && DECL_WEAK (decl))
7342 return false;
7344 /* We cannot do a tailcall for an indirect call by descriptor if all the
7345 argument registers are used because the only register left to load the
7346 address is IP and it will already contain the static chain. */
7347 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7349 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7350 CUMULATIVE_ARGS cum;
7351 cumulative_args_t cum_v;
7353 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7354 cum_v = pack_cumulative_args (&cum);
7356 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7358 tree type = TREE_VALUE (t);
7359 if (!VOID_TYPE_P (type))
7360 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7363 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7364 return false;
7367 /* Everything else is ok. */
7368 return true;
7372 /* Addressing mode support functions. */
7374 /* Return nonzero if X is a legitimate immediate operand when compiling
7375 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7377 legitimate_pic_operand_p (rtx x)
7379 if (GET_CODE (x) == SYMBOL_REF
7380 || (GET_CODE (x) == CONST
7381 && GET_CODE (XEXP (x, 0)) == PLUS
7382 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7383 return 0;
7385 return 1;
7388 /* Record that the current function needs a PIC register. Initialize
7389 cfun->machine->pic_reg if we have not already done so. */
7391 static void
7392 require_pic_register (void)
7394 /* A lot of the logic here is made obscure by the fact that this
7395 routine gets called as part of the rtx cost estimation process.
7396 We don't want those calls to affect any assumptions about the real
7397 function; and further, we can't call entry_of_function() until we
7398 start the real expansion process. */
7399 if (!crtl->uses_pic_offset_table)
7401 gcc_assert (can_create_pseudo_p ());
7402 if (arm_pic_register != INVALID_REGNUM
7403 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7405 if (!cfun->machine->pic_reg)
7406 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7408 /* Play games to avoid marking the function as needing pic
7409 if we are being called as part of the cost-estimation
7410 process. */
7411 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7412 crtl->uses_pic_offset_table = 1;
7414 else
7416 rtx_insn *seq, *insn;
7418 if (!cfun->machine->pic_reg)
7419 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7421 /* Play games to avoid marking the function as needing pic
7422 if we are being called as part of the cost-estimation
7423 process. */
7424 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7426 crtl->uses_pic_offset_table = 1;
7427 start_sequence ();
7429 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7430 && arm_pic_register > LAST_LO_REGNUM)
7431 emit_move_insn (cfun->machine->pic_reg,
7432 gen_rtx_REG (Pmode, arm_pic_register));
7433 else
7434 arm_load_pic_register (0UL);
7436 seq = get_insns ();
7437 end_sequence ();
7439 for (insn = seq; insn; insn = NEXT_INSN (insn))
7440 if (INSN_P (insn))
7441 INSN_LOCATION (insn) = prologue_location;
7443 /* We can be called during expansion of PHI nodes, where
7444 we can't yet emit instructions directly in the final
7445 insn stream. Queue the insns on the entry edge, they will
7446 be committed after everything else is expanded. */
7447 insert_insn_on_edge (seq,
7448 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7455 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7457 if (GET_CODE (orig) == SYMBOL_REF
7458 || GET_CODE (orig) == LABEL_REF)
7460 if (reg == 0)
7462 gcc_assert (can_create_pseudo_p ());
7463 reg = gen_reg_rtx (Pmode);
7466 /* VxWorks does not impose a fixed gap between segments; the run-time
7467 gap can be different from the object-file gap. We therefore can't
7468 use GOTOFF unless we are absolutely sure that the symbol is in the
7469 same segment as the GOT. Unfortunately, the flexibility of linker
7470 scripts means that we can't be sure of that in general, so assume
7471 that GOTOFF is never valid on VxWorks. */
7472 /* References to weak symbols cannot be resolved locally: they
7473 may be overridden by a non-weak definition at link time. */
7474 rtx_insn *insn;
7475 if ((GET_CODE (orig) == LABEL_REF
7476 || (GET_CODE (orig) == SYMBOL_REF
7477 && SYMBOL_REF_LOCAL_P (orig)
7478 && (SYMBOL_REF_DECL (orig)
7479 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7480 && NEED_GOT_RELOC
7481 && arm_pic_data_is_text_relative)
7482 insn = arm_pic_static_addr (orig, reg);
7483 else
7485 rtx pat;
7486 rtx mem;
7488 /* If this function doesn't have a pic register, create one now. */
7489 require_pic_register ();
7491 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7493 /* Make the MEM as close to a constant as possible. */
7494 mem = SET_SRC (pat);
7495 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7496 MEM_READONLY_P (mem) = 1;
7497 MEM_NOTRAP_P (mem) = 1;
7499 insn = emit_insn (pat);
7502 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7503 by loop. */
7504 set_unique_reg_note (insn, REG_EQUAL, orig);
7506 return reg;
7508 else if (GET_CODE (orig) == CONST)
7510 rtx base, offset;
7512 if (GET_CODE (XEXP (orig, 0)) == PLUS
7513 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7514 return orig;
7516 /* Handle the case where we have: const (UNSPEC_TLS). */
7517 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7518 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7519 return orig;
7521 /* Handle the case where we have:
7522 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7523 CONST_INT. */
7524 if (GET_CODE (XEXP (orig, 0)) == PLUS
7525 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7526 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7528 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7529 return orig;
7532 if (reg == 0)
7534 gcc_assert (can_create_pseudo_p ());
7535 reg = gen_reg_rtx (Pmode);
7538 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7540 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7541 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7542 base == reg ? 0 : reg);
7544 if (CONST_INT_P (offset))
7546 /* The base register doesn't really matter, we only want to
7547 test the index for the appropriate mode. */
7548 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7550 gcc_assert (can_create_pseudo_p ());
7551 offset = force_reg (Pmode, offset);
7554 if (CONST_INT_P (offset))
7555 return plus_constant (Pmode, base, INTVAL (offset));
7558 if (GET_MODE_SIZE (mode) > 4
7559 && (GET_MODE_CLASS (mode) == MODE_INT
7560 || TARGET_SOFT_FLOAT))
7562 emit_insn (gen_addsi3 (reg, base, offset));
7563 return reg;
7566 return gen_rtx_PLUS (Pmode, base, offset);
7569 return orig;
7573 /* Find a spare register to use during the prolog of a function. */
7575 static int
7576 thumb_find_work_register (unsigned long pushed_regs_mask)
7578 int reg;
7580 /* Check the argument registers first as these are call-used. The
7581 register allocation order means that sometimes r3 might be used
7582 but earlier argument registers might not, so check them all. */
7583 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7584 if (!df_regs_ever_live_p (reg))
7585 return reg;
7587 /* Before going on to check the call-saved registers we can try a couple
7588 more ways of deducing that r3 is available. The first is when we are
7589 pushing anonymous arguments onto the stack and we have less than 4
7590 registers worth of fixed arguments(*). In this case r3 will be part of
7591 the variable argument list and so we can be sure that it will be
7592 pushed right at the start of the function. Hence it will be available
7593 for the rest of the prologue.
7594 (*): ie crtl->args.pretend_args_size is greater than 0. */
7595 if (cfun->machine->uses_anonymous_args
7596 && crtl->args.pretend_args_size > 0)
7597 return LAST_ARG_REGNUM;
7599 /* The other case is when we have fixed arguments but less than 4 registers
7600 worth. In this case r3 might be used in the body of the function, but
7601 it is not being used to convey an argument into the function. In theory
7602 we could just check crtl->args.size to see how many bytes are
7603 being passed in argument registers, but it seems that it is unreliable.
7604 Sometimes it will have the value 0 when in fact arguments are being
7605 passed. (See testcase execute/20021111-1.c for an example). So we also
7606 check the args_info.nregs field as well. The problem with this field is
7607 that it makes no allowances for arguments that are passed to the
7608 function but which are not used. Hence we could miss an opportunity
7609 when a function has an unused argument in r3. But it is better to be
7610 safe than to be sorry. */
7611 if (! cfun->machine->uses_anonymous_args
7612 && crtl->args.size >= 0
7613 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7614 && (TARGET_AAPCS_BASED
7615 ? crtl->args.info.aapcs_ncrn < 4
7616 : crtl->args.info.nregs < 4))
7617 return LAST_ARG_REGNUM;
7619 /* Otherwise look for a call-saved register that is going to be pushed. */
7620 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7621 if (pushed_regs_mask & (1 << reg))
7622 return reg;
7624 if (TARGET_THUMB2)
7626 /* Thumb-2 can use high regs. */
7627 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7628 if (pushed_regs_mask & (1 << reg))
7629 return reg;
7631 /* Something went wrong - thumb_compute_save_reg_mask()
7632 should have arranged for a suitable register to be pushed. */
7633 gcc_unreachable ();
7636 static GTY(()) int pic_labelno;
7638 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7639 low register. */
7641 void
7642 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7644 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7646 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7647 return;
7649 gcc_assert (flag_pic);
7651 pic_reg = cfun->machine->pic_reg;
7652 if (TARGET_VXWORKS_RTP)
7654 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7655 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7656 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7658 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7660 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7661 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7663 else
7665 /* We use an UNSPEC rather than a LABEL_REF because this label
7666 never appears in the code stream. */
7668 labelno = GEN_INT (pic_labelno++);
7669 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7670 l1 = gen_rtx_CONST (VOIDmode, l1);
7672 /* On the ARM the PC register contains 'dot + 8' at the time of the
7673 addition, on the Thumb it is 'dot + 4'. */
7674 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7675 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7676 UNSPEC_GOTSYM_OFF);
7677 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7679 if (TARGET_32BIT)
7681 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7683 else /* TARGET_THUMB1 */
7685 if (arm_pic_register != INVALID_REGNUM
7686 && REGNO (pic_reg) > LAST_LO_REGNUM)
7688 /* We will have pushed the pic register, so we should always be
7689 able to find a work register. */
7690 pic_tmp = gen_rtx_REG (SImode,
7691 thumb_find_work_register (saved_regs));
7692 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7693 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7694 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7696 else if (arm_pic_register != INVALID_REGNUM
7697 && arm_pic_register > LAST_LO_REGNUM
7698 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7700 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7701 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7702 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7704 else
7705 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7709 /* Need to emit this whether or not we obey regdecls,
7710 since setjmp/longjmp can cause life info to screw up. */
7711 emit_use (pic_reg);
7714 /* Generate code to load the address of a static var when flag_pic is set. */
7715 static rtx_insn *
7716 arm_pic_static_addr (rtx orig, rtx reg)
7718 rtx l1, labelno, offset_rtx;
7720 gcc_assert (flag_pic);
7722 /* We use an UNSPEC rather than a LABEL_REF because this label
7723 never appears in the code stream. */
7724 labelno = GEN_INT (pic_labelno++);
7725 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7726 l1 = gen_rtx_CONST (VOIDmode, l1);
7728 /* On the ARM the PC register contains 'dot + 8' at the time of the
7729 addition, on the Thumb it is 'dot + 4'. */
7730 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7731 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7732 UNSPEC_SYMBOL_OFFSET);
7733 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7735 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7738 /* Return nonzero if X is valid as an ARM state addressing register. */
7739 static int
7740 arm_address_register_rtx_p (rtx x, int strict_p)
7742 int regno;
7744 if (!REG_P (x))
7745 return 0;
7747 regno = REGNO (x);
7749 if (strict_p)
7750 return ARM_REGNO_OK_FOR_BASE_P (regno);
7752 return (regno <= LAST_ARM_REGNUM
7753 || regno >= FIRST_PSEUDO_REGISTER
7754 || regno == FRAME_POINTER_REGNUM
7755 || regno == ARG_POINTER_REGNUM);
7758 /* Return TRUE if this rtx is the difference of a symbol and a label,
7759 and will reduce to a PC-relative relocation in the object file.
7760 Expressions like this can be left alone when generating PIC, rather
7761 than forced through the GOT. */
7762 static int
7763 pcrel_constant_p (rtx x)
7765 if (GET_CODE (x) == MINUS)
7766 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7768 return FALSE;
7771 /* Return true if X will surely end up in an index register after next
7772 splitting pass. */
7773 static bool
7774 will_be_in_index_register (const_rtx x)
7776 /* arm.md: calculate_pic_address will split this into a register. */
7777 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7780 /* Return nonzero if X is a valid ARM state address operand. */
7782 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7783 int strict_p)
7785 bool use_ldrd;
7786 enum rtx_code code = GET_CODE (x);
7788 if (arm_address_register_rtx_p (x, strict_p))
7789 return 1;
7791 use_ldrd = (TARGET_LDRD
7792 && (mode == DImode || mode == DFmode));
7794 if (code == POST_INC || code == PRE_DEC
7795 || ((code == PRE_INC || code == POST_DEC)
7796 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7797 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7799 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7800 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7801 && GET_CODE (XEXP (x, 1)) == PLUS
7802 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7804 rtx addend = XEXP (XEXP (x, 1), 1);
7806 /* Don't allow ldrd post increment by register because it's hard
7807 to fixup invalid register choices. */
7808 if (use_ldrd
7809 && GET_CODE (x) == POST_MODIFY
7810 && REG_P (addend))
7811 return 0;
7813 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7814 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7817 /* After reload constants split into minipools will have addresses
7818 from a LABEL_REF. */
7819 else if (reload_completed
7820 && (code == LABEL_REF
7821 || (code == CONST
7822 && GET_CODE (XEXP (x, 0)) == PLUS
7823 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7824 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7825 return 1;
7827 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7828 return 0;
7830 else if (code == PLUS)
7832 rtx xop0 = XEXP (x, 0);
7833 rtx xop1 = XEXP (x, 1);
7835 return ((arm_address_register_rtx_p (xop0, strict_p)
7836 && ((CONST_INT_P (xop1)
7837 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7838 || (!strict_p && will_be_in_index_register (xop1))))
7839 || (arm_address_register_rtx_p (xop1, strict_p)
7840 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7843 #if 0
7844 /* Reload currently can't handle MINUS, so disable this for now */
7845 else if (GET_CODE (x) == MINUS)
7847 rtx xop0 = XEXP (x, 0);
7848 rtx xop1 = XEXP (x, 1);
7850 return (arm_address_register_rtx_p (xop0, strict_p)
7851 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7853 #endif
7855 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7856 && code == SYMBOL_REF
7857 && CONSTANT_POOL_ADDRESS_P (x)
7858 && ! (flag_pic
7859 && symbol_mentioned_p (get_pool_constant (x))
7860 && ! pcrel_constant_p (get_pool_constant (x))))
7861 return 1;
7863 return 0;
7866 /* Return true if we can avoid creating a constant pool entry for x. */
7867 static bool
7868 can_avoid_literal_pool_for_label_p (rtx x)
7870 /* Normally we can assign constant values to target registers without
7871 the help of constant pool. But there are cases we have to use constant
7872 pool like:
7873 1) assign a label to register.
7874 2) sign-extend a 8bit value to 32bit and then assign to register.
7876 Constant pool access in format:
7877 (set (reg r0) (mem (symbol_ref (".LC0"))))
7878 will cause the use of literal pool (later in function arm_reorg).
7879 So here we mark such format as an invalid format, then the compiler
7880 will adjust it into:
7881 (set (reg r0) (symbol_ref (".LC0")))
7882 (set (reg r0) (mem (reg r0))).
7883 No extra register is required, and (mem (reg r0)) won't cause the use
7884 of literal pools. */
7885 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7886 && CONSTANT_POOL_ADDRESS_P (x))
7887 return 1;
7888 return 0;
7892 /* Return nonzero if X is a valid Thumb-2 address operand. */
7893 static int
7894 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7896 bool use_ldrd;
7897 enum rtx_code code = GET_CODE (x);
7899 if (arm_address_register_rtx_p (x, strict_p))
7900 return 1;
7902 use_ldrd = (TARGET_LDRD
7903 && (mode == DImode || mode == DFmode));
7905 if (code == POST_INC || code == PRE_DEC
7906 || ((code == PRE_INC || code == POST_DEC)
7907 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7908 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7910 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7911 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7912 && GET_CODE (XEXP (x, 1)) == PLUS
7913 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7915 /* Thumb-2 only has autoincrement by constant. */
7916 rtx addend = XEXP (XEXP (x, 1), 1);
7917 HOST_WIDE_INT offset;
7919 if (!CONST_INT_P (addend))
7920 return 0;
7922 offset = INTVAL(addend);
7923 if (GET_MODE_SIZE (mode) <= 4)
7924 return (offset > -256 && offset < 256);
7926 return (use_ldrd && offset > -1024 && offset < 1024
7927 && (offset & 3) == 0);
7930 /* After reload constants split into minipools will have addresses
7931 from a LABEL_REF. */
7932 else if (reload_completed
7933 && (code == LABEL_REF
7934 || (code == CONST
7935 && GET_CODE (XEXP (x, 0)) == PLUS
7936 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7937 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7938 return 1;
7940 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7941 return 0;
7943 else if (code == PLUS)
7945 rtx xop0 = XEXP (x, 0);
7946 rtx xop1 = XEXP (x, 1);
7948 return ((arm_address_register_rtx_p (xop0, strict_p)
7949 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7950 || (!strict_p && will_be_in_index_register (xop1))))
7951 || (arm_address_register_rtx_p (xop1, strict_p)
7952 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7955 else if (can_avoid_literal_pool_for_label_p (x))
7956 return 0;
7958 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7959 && code == SYMBOL_REF
7960 && CONSTANT_POOL_ADDRESS_P (x)
7961 && ! (flag_pic
7962 && symbol_mentioned_p (get_pool_constant (x))
7963 && ! pcrel_constant_p (get_pool_constant (x))))
7964 return 1;
7966 return 0;
7969 /* Return nonzero if INDEX is valid for an address index operand in
7970 ARM state. */
7971 static int
7972 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7973 int strict_p)
7975 HOST_WIDE_INT range;
7976 enum rtx_code code = GET_CODE (index);
7978 /* Standard coprocessor addressing modes. */
7979 if (TARGET_HARD_FLOAT
7980 && (mode == SFmode || mode == DFmode))
7981 return (code == CONST_INT && INTVAL (index) < 1024
7982 && INTVAL (index) > -1024
7983 && (INTVAL (index) & 3) == 0);
7985 /* For quad modes, we restrict the constant offset to be slightly less
7986 than what the instruction format permits. We do this because for
7987 quad mode moves, we will actually decompose them into two separate
7988 double-mode reads or writes. INDEX must therefore be a valid
7989 (double-mode) offset and so should INDEX+8. */
7990 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7991 return (code == CONST_INT
7992 && INTVAL (index) < 1016
7993 && INTVAL (index) > -1024
7994 && (INTVAL (index) & 3) == 0);
7996 /* We have no such constraint on double mode offsets, so we permit the
7997 full range of the instruction format. */
7998 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7999 return (code == CONST_INT
8000 && INTVAL (index) < 1024
8001 && INTVAL (index) > -1024
8002 && (INTVAL (index) & 3) == 0);
8004 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8005 return (code == CONST_INT
8006 && INTVAL (index) < 1024
8007 && INTVAL (index) > -1024
8008 && (INTVAL (index) & 3) == 0);
8010 if (arm_address_register_rtx_p (index, strict_p)
8011 && (GET_MODE_SIZE (mode) <= 4))
8012 return 1;
8014 if (mode == DImode || mode == DFmode)
8016 if (code == CONST_INT)
8018 HOST_WIDE_INT val = INTVAL (index);
8020 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8021 If vldr is selected it uses arm_coproc_mem_operand. */
8022 if (TARGET_LDRD)
8023 return val > -256 && val < 256;
8024 else
8025 return val > -4096 && val < 4092;
8028 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8031 if (GET_MODE_SIZE (mode) <= 4
8032 && ! (arm_arch4
8033 && (mode == HImode
8034 || mode == HFmode
8035 || (mode == QImode && outer == SIGN_EXTEND))))
8037 if (code == MULT)
8039 rtx xiop0 = XEXP (index, 0);
8040 rtx xiop1 = XEXP (index, 1);
8042 return ((arm_address_register_rtx_p (xiop0, strict_p)
8043 && power_of_two_operand (xiop1, SImode))
8044 || (arm_address_register_rtx_p (xiop1, strict_p)
8045 && power_of_two_operand (xiop0, SImode)));
8047 else if (code == LSHIFTRT || code == ASHIFTRT
8048 || code == ASHIFT || code == ROTATERT)
8050 rtx op = XEXP (index, 1);
8052 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8053 && CONST_INT_P (op)
8054 && INTVAL (op) > 0
8055 && INTVAL (op) <= 31);
8059 /* For ARM v4 we may be doing a sign-extend operation during the
8060 load. */
8061 if (arm_arch4)
8063 if (mode == HImode
8064 || mode == HFmode
8065 || (outer == SIGN_EXTEND && mode == QImode))
8066 range = 256;
8067 else
8068 range = 4096;
8070 else
8071 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8073 return (code == CONST_INT
8074 && INTVAL (index) < range
8075 && INTVAL (index) > -range);
8078 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8079 index operand. i.e. 1, 2, 4 or 8. */
8080 static bool
8081 thumb2_index_mul_operand (rtx op)
8083 HOST_WIDE_INT val;
8085 if (!CONST_INT_P (op))
8086 return false;
8088 val = INTVAL(op);
8089 return (val == 1 || val == 2 || val == 4 || val == 8);
8092 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8093 static int
8094 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8096 enum rtx_code code = GET_CODE (index);
8098 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8099 /* Standard coprocessor addressing modes. */
8100 if (TARGET_HARD_FLOAT
8101 && (mode == SFmode || mode == DFmode))
8102 return (code == CONST_INT && INTVAL (index) < 1024
8103 /* Thumb-2 allows only > -256 index range for it's core register
8104 load/stores. Since we allow SF/DF in core registers, we have
8105 to use the intersection between -256~4096 (core) and -1024~1024
8106 (coprocessor). */
8107 && INTVAL (index) > -256
8108 && (INTVAL (index) & 3) == 0);
8110 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8112 /* For DImode assume values will usually live in core regs
8113 and only allow LDRD addressing modes. */
8114 if (!TARGET_LDRD || mode != DImode)
8115 return (code == CONST_INT
8116 && INTVAL (index) < 1024
8117 && INTVAL (index) > -1024
8118 && (INTVAL (index) & 3) == 0);
8121 /* For quad modes, we restrict the constant offset to be slightly less
8122 than what the instruction format permits. We do this because for
8123 quad mode moves, we will actually decompose them into two separate
8124 double-mode reads or writes. INDEX must therefore be a valid
8125 (double-mode) offset and so should INDEX+8. */
8126 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8127 return (code == CONST_INT
8128 && INTVAL (index) < 1016
8129 && INTVAL (index) > -1024
8130 && (INTVAL (index) & 3) == 0);
8132 /* We have no such constraint on double mode offsets, so we permit the
8133 full range of the instruction format. */
8134 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8135 return (code == CONST_INT
8136 && INTVAL (index) < 1024
8137 && INTVAL (index) > -1024
8138 && (INTVAL (index) & 3) == 0);
8140 if (arm_address_register_rtx_p (index, strict_p)
8141 && (GET_MODE_SIZE (mode) <= 4))
8142 return 1;
8144 if (mode == DImode || mode == DFmode)
8146 if (code == CONST_INT)
8148 HOST_WIDE_INT val = INTVAL (index);
8149 /* Thumb-2 ldrd only has reg+const addressing modes.
8150 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8151 If vldr is selected it uses arm_coproc_mem_operand. */
8152 if (TARGET_LDRD)
8153 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8154 else
8155 return IN_RANGE (val, -255, 4095 - 4);
8157 else
8158 return 0;
8161 if (code == MULT)
8163 rtx xiop0 = XEXP (index, 0);
8164 rtx xiop1 = XEXP (index, 1);
8166 return ((arm_address_register_rtx_p (xiop0, strict_p)
8167 && thumb2_index_mul_operand (xiop1))
8168 || (arm_address_register_rtx_p (xiop1, strict_p)
8169 && thumb2_index_mul_operand (xiop0)));
8171 else if (code == ASHIFT)
8173 rtx op = XEXP (index, 1);
8175 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8176 && CONST_INT_P (op)
8177 && INTVAL (op) > 0
8178 && INTVAL (op) <= 3);
8181 return (code == CONST_INT
8182 && INTVAL (index) < 4096
8183 && INTVAL (index) > -256);
8186 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8187 static int
8188 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8190 int regno;
8192 if (!REG_P (x))
8193 return 0;
8195 regno = REGNO (x);
8197 if (strict_p)
8198 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8200 return (regno <= LAST_LO_REGNUM
8201 || regno > LAST_VIRTUAL_REGISTER
8202 || regno == FRAME_POINTER_REGNUM
8203 || (GET_MODE_SIZE (mode) >= 4
8204 && (regno == STACK_POINTER_REGNUM
8205 || regno >= FIRST_PSEUDO_REGISTER
8206 || x == hard_frame_pointer_rtx
8207 || x == arg_pointer_rtx)));
8210 /* Return nonzero if x is a legitimate index register. This is the case
8211 for any base register that can access a QImode object. */
8212 inline static int
8213 thumb1_index_register_rtx_p (rtx x, int strict_p)
8215 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8218 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8220 The AP may be eliminated to either the SP or the FP, so we use the
8221 least common denominator, e.g. SImode, and offsets from 0 to 64.
8223 ??? Verify whether the above is the right approach.
8225 ??? Also, the FP may be eliminated to the SP, so perhaps that
8226 needs special handling also.
8228 ??? Look at how the mips16 port solves this problem. It probably uses
8229 better ways to solve some of these problems.
8231 Although it is not incorrect, we don't accept QImode and HImode
8232 addresses based on the frame pointer or arg pointer until the
8233 reload pass starts. This is so that eliminating such addresses
8234 into stack based ones won't produce impossible code. */
8236 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8238 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8239 return 0;
8241 /* ??? Not clear if this is right. Experiment. */
8242 if (GET_MODE_SIZE (mode) < 4
8243 && !(reload_in_progress || reload_completed)
8244 && (reg_mentioned_p (frame_pointer_rtx, x)
8245 || reg_mentioned_p (arg_pointer_rtx, x)
8246 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8247 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8248 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8249 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8250 return 0;
8252 /* Accept any base register. SP only in SImode or larger. */
8253 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8254 return 1;
8256 /* This is PC relative data before arm_reorg runs. */
8257 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8258 && GET_CODE (x) == SYMBOL_REF
8259 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8260 return 1;
8262 /* This is PC relative data after arm_reorg runs. */
8263 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8264 && reload_completed
8265 && (GET_CODE (x) == LABEL_REF
8266 || (GET_CODE (x) == CONST
8267 && GET_CODE (XEXP (x, 0)) == PLUS
8268 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8269 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8270 return 1;
8272 /* Post-inc indexing only supported for SImode and larger. */
8273 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8274 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8275 return 1;
8277 else if (GET_CODE (x) == PLUS)
8279 /* REG+REG address can be any two index registers. */
8280 /* We disallow FRAME+REG addressing since we know that FRAME
8281 will be replaced with STACK, and SP relative addressing only
8282 permits SP+OFFSET. */
8283 if (GET_MODE_SIZE (mode) <= 4
8284 && XEXP (x, 0) != frame_pointer_rtx
8285 && XEXP (x, 1) != frame_pointer_rtx
8286 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8287 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8288 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8289 return 1;
8291 /* REG+const has 5-7 bit offset for non-SP registers. */
8292 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8293 || XEXP (x, 0) == arg_pointer_rtx)
8294 && CONST_INT_P (XEXP (x, 1))
8295 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8296 return 1;
8298 /* REG+const has 10-bit offset for SP, but only SImode and
8299 larger is supported. */
8300 /* ??? Should probably check for DI/DFmode overflow here
8301 just like GO_IF_LEGITIMATE_OFFSET does. */
8302 else if (REG_P (XEXP (x, 0))
8303 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8304 && GET_MODE_SIZE (mode) >= 4
8305 && CONST_INT_P (XEXP (x, 1))
8306 && INTVAL (XEXP (x, 1)) >= 0
8307 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8308 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8309 return 1;
8311 else if (REG_P (XEXP (x, 0))
8312 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8313 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8314 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8315 && REGNO (XEXP (x, 0))
8316 <= LAST_VIRTUAL_POINTER_REGISTER))
8317 && GET_MODE_SIZE (mode) >= 4
8318 && CONST_INT_P (XEXP (x, 1))
8319 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8320 return 1;
8323 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8324 && GET_MODE_SIZE (mode) == 4
8325 && GET_CODE (x) == SYMBOL_REF
8326 && CONSTANT_POOL_ADDRESS_P (x)
8327 && ! (flag_pic
8328 && symbol_mentioned_p (get_pool_constant (x))
8329 && ! pcrel_constant_p (get_pool_constant (x))))
8330 return 1;
8332 return 0;
8335 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8336 instruction of mode MODE. */
8338 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8340 switch (GET_MODE_SIZE (mode))
8342 case 1:
8343 return val >= 0 && val < 32;
8345 case 2:
8346 return val >= 0 && val < 64 && (val & 1) == 0;
8348 default:
8349 return (val >= 0
8350 && (val + GET_MODE_SIZE (mode)) <= 128
8351 && (val & 3) == 0);
8355 bool
8356 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8358 if (TARGET_ARM)
8359 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8360 else if (TARGET_THUMB2)
8361 return thumb2_legitimate_address_p (mode, x, strict_p);
8362 else /* if (TARGET_THUMB1) */
8363 return thumb1_legitimate_address_p (mode, x, strict_p);
8366 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8368 Given an rtx X being reloaded into a reg required to be
8369 in class CLASS, return the class of reg to actually use.
8370 In general this is just CLASS, but for the Thumb core registers and
8371 immediate constants we prefer a LO_REGS class or a subset. */
8373 static reg_class_t
8374 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8376 if (TARGET_32BIT)
8377 return rclass;
8378 else
8380 if (rclass == GENERAL_REGS)
8381 return LO_REGS;
8382 else
8383 return rclass;
8387 /* Build the SYMBOL_REF for __tls_get_addr. */
8389 static GTY(()) rtx tls_get_addr_libfunc;
8391 static rtx
8392 get_tls_get_addr (void)
8394 if (!tls_get_addr_libfunc)
8395 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8396 return tls_get_addr_libfunc;
8400 arm_load_tp (rtx target)
8402 if (!target)
8403 target = gen_reg_rtx (SImode);
8405 if (TARGET_HARD_TP)
8407 /* Can return in any reg. */
8408 emit_insn (gen_load_tp_hard (target));
8410 else
8412 /* Always returned in r0. Immediately copy the result into a pseudo,
8413 otherwise other uses of r0 (e.g. setting up function arguments) may
8414 clobber the value. */
8416 rtx tmp;
8418 emit_insn (gen_load_tp_soft ());
8420 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8421 emit_move_insn (target, tmp);
8423 return target;
8426 static rtx
8427 load_tls_operand (rtx x, rtx reg)
8429 rtx tmp;
8431 if (reg == NULL_RTX)
8432 reg = gen_reg_rtx (SImode);
8434 tmp = gen_rtx_CONST (SImode, x);
8436 emit_move_insn (reg, tmp);
8438 return reg;
8441 static rtx_insn *
8442 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8444 rtx label, labelno, sum;
8446 gcc_assert (reloc != TLS_DESCSEQ);
8447 start_sequence ();
8449 labelno = GEN_INT (pic_labelno++);
8450 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8451 label = gen_rtx_CONST (VOIDmode, label);
8453 sum = gen_rtx_UNSPEC (Pmode,
8454 gen_rtvec (4, x, GEN_INT (reloc), label,
8455 GEN_INT (TARGET_ARM ? 8 : 4)),
8456 UNSPEC_TLS);
8457 reg = load_tls_operand (sum, reg);
8459 if (TARGET_ARM)
8460 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8461 else
8462 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8464 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8465 LCT_PURE, /* LCT_CONST? */
8466 Pmode, reg, Pmode);
8468 rtx_insn *insns = get_insns ();
8469 end_sequence ();
8471 return insns;
8474 static rtx
8475 arm_tls_descseq_addr (rtx x, rtx reg)
8477 rtx labelno = GEN_INT (pic_labelno++);
8478 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8479 rtx sum = gen_rtx_UNSPEC (Pmode,
8480 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8481 gen_rtx_CONST (VOIDmode, label),
8482 GEN_INT (!TARGET_ARM)),
8483 UNSPEC_TLS);
8484 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8486 emit_insn (gen_tlscall (x, labelno));
8487 if (!reg)
8488 reg = gen_reg_rtx (SImode);
8489 else
8490 gcc_assert (REGNO (reg) != R0_REGNUM);
8492 emit_move_insn (reg, reg0);
8494 return reg;
8498 legitimize_tls_address (rtx x, rtx reg)
8500 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8501 rtx_insn *insns;
8502 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8504 switch (model)
8506 case TLS_MODEL_GLOBAL_DYNAMIC:
8507 if (TARGET_GNU2_TLS)
8509 reg = arm_tls_descseq_addr (x, reg);
8511 tp = arm_load_tp (NULL_RTX);
8513 dest = gen_rtx_PLUS (Pmode, tp, reg);
8515 else
8517 /* Original scheme */
8518 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8519 dest = gen_reg_rtx (Pmode);
8520 emit_libcall_block (insns, dest, ret, x);
8522 return dest;
8524 case TLS_MODEL_LOCAL_DYNAMIC:
8525 if (TARGET_GNU2_TLS)
8527 reg = arm_tls_descseq_addr (x, reg);
8529 tp = arm_load_tp (NULL_RTX);
8531 dest = gen_rtx_PLUS (Pmode, tp, reg);
8533 else
8535 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8537 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8538 share the LDM result with other LD model accesses. */
8539 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8540 UNSPEC_TLS);
8541 dest = gen_reg_rtx (Pmode);
8542 emit_libcall_block (insns, dest, ret, eqv);
8544 /* Load the addend. */
8545 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8546 GEN_INT (TLS_LDO32)),
8547 UNSPEC_TLS);
8548 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8549 dest = gen_rtx_PLUS (Pmode, dest, addend);
8551 return dest;
8553 case TLS_MODEL_INITIAL_EXEC:
8554 labelno = GEN_INT (pic_labelno++);
8555 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8556 label = gen_rtx_CONST (VOIDmode, label);
8557 sum = gen_rtx_UNSPEC (Pmode,
8558 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8559 GEN_INT (TARGET_ARM ? 8 : 4)),
8560 UNSPEC_TLS);
8561 reg = load_tls_operand (sum, reg);
8563 if (TARGET_ARM)
8564 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8565 else if (TARGET_THUMB2)
8566 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8567 else
8569 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8570 emit_move_insn (reg, gen_const_mem (SImode, reg));
8573 tp = arm_load_tp (NULL_RTX);
8575 return gen_rtx_PLUS (Pmode, tp, reg);
8577 case TLS_MODEL_LOCAL_EXEC:
8578 tp = arm_load_tp (NULL_RTX);
8580 reg = gen_rtx_UNSPEC (Pmode,
8581 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8582 UNSPEC_TLS);
8583 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8585 return gen_rtx_PLUS (Pmode, tp, reg);
8587 default:
8588 abort ();
8592 /* Try machine-dependent ways of modifying an illegitimate address
8593 to be legitimate. If we find one, return the new, valid address. */
8595 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8597 if (arm_tls_referenced_p (x))
8599 rtx addend = NULL;
8601 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8603 addend = XEXP (XEXP (x, 0), 1);
8604 x = XEXP (XEXP (x, 0), 0);
8607 if (GET_CODE (x) != SYMBOL_REF)
8608 return x;
8610 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8612 x = legitimize_tls_address (x, NULL_RTX);
8614 if (addend)
8616 x = gen_rtx_PLUS (SImode, x, addend);
8617 orig_x = x;
8619 else
8620 return x;
8623 if (!TARGET_ARM)
8625 /* TODO: legitimize_address for Thumb2. */
8626 if (TARGET_THUMB2)
8627 return x;
8628 return thumb_legitimize_address (x, orig_x, mode);
8631 if (GET_CODE (x) == PLUS)
8633 rtx xop0 = XEXP (x, 0);
8634 rtx xop1 = XEXP (x, 1);
8636 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8637 xop0 = force_reg (SImode, xop0);
8639 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8640 && !symbol_mentioned_p (xop1))
8641 xop1 = force_reg (SImode, xop1);
8643 if (ARM_BASE_REGISTER_RTX_P (xop0)
8644 && CONST_INT_P (xop1))
8646 HOST_WIDE_INT n, low_n;
8647 rtx base_reg, val;
8648 n = INTVAL (xop1);
8650 /* VFP addressing modes actually allow greater offsets, but for
8651 now we just stick with the lowest common denominator. */
8652 if (mode == DImode || mode == DFmode)
8654 low_n = n & 0x0f;
8655 n &= ~0x0f;
8656 if (low_n > 4)
8658 n += 16;
8659 low_n -= 16;
8662 else
8664 low_n = ((mode) == TImode ? 0
8665 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8666 n -= low_n;
8669 base_reg = gen_reg_rtx (SImode);
8670 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8671 emit_move_insn (base_reg, val);
8672 x = plus_constant (Pmode, base_reg, low_n);
8674 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8675 x = gen_rtx_PLUS (SImode, xop0, xop1);
8678 /* XXX We don't allow MINUS any more -- see comment in
8679 arm_legitimate_address_outer_p (). */
8680 else if (GET_CODE (x) == MINUS)
8682 rtx xop0 = XEXP (x, 0);
8683 rtx xop1 = XEXP (x, 1);
8685 if (CONSTANT_P (xop0))
8686 xop0 = force_reg (SImode, xop0);
8688 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8689 xop1 = force_reg (SImode, xop1);
8691 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8692 x = gen_rtx_MINUS (SImode, xop0, xop1);
8695 /* Make sure to take full advantage of the pre-indexed addressing mode
8696 with absolute addresses which often allows for the base register to
8697 be factorized for multiple adjacent memory references, and it might
8698 even allows for the mini pool to be avoided entirely. */
8699 else if (CONST_INT_P (x) && optimize > 0)
8701 unsigned int bits;
8702 HOST_WIDE_INT mask, base, index;
8703 rtx base_reg;
8705 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8706 use a 8-bit index. So let's use a 12-bit index for SImode only and
8707 hope that arm_gen_constant will enable ldrb to use more bits. */
8708 bits = (mode == SImode) ? 12 : 8;
8709 mask = (1 << bits) - 1;
8710 base = INTVAL (x) & ~mask;
8711 index = INTVAL (x) & mask;
8712 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8714 /* It'll most probably be more efficient to generate the base
8715 with more bits set and use a negative index instead. */
8716 base |= mask;
8717 index -= mask;
8719 base_reg = force_reg (SImode, GEN_INT (base));
8720 x = plus_constant (Pmode, base_reg, index);
8723 if (flag_pic)
8725 /* We need to find and carefully transform any SYMBOL and LABEL
8726 references; so go back to the original address expression. */
8727 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8729 if (new_x != orig_x)
8730 x = new_x;
8733 return x;
8737 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8738 to be legitimate. If we find one, return the new, valid address. */
8740 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8742 if (GET_CODE (x) == PLUS
8743 && CONST_INT_P (XEXP (x, 1))
8744 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8745 || INTVAL (XEXP (x, 1)) < 0))
8747 rtx xop0 = XEXP (x, 0);
8748 rtx xop1 = XEXP (x, 1);
8749 HOST_WIDE_INT offset = INTVAL (xop1);
8751 /* Try and fold the offset into a biasing of the base register and
8752 then offsetting that. Don't do this when optimizing for space
8753 since it can cause too many CSEs. */
8754 if (optimize_size && offset >= 0
8755 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8757 HOST_WIDE_INT delta;
8759 if (offset >= 256)
8760 delta = offset - (256 - GET_MODE_SIZE (mode));
8761 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8762 delta = 31 * GET_MODE_SIZE (mode);
8763 else
8764 delta = offset & (~31 * GET_MODE_SIZE (mode));
8766 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8767 NULL_RTX);
8768 x = plus_constant (Pmode, xop0, delta);
8770 else if (offset < 0 && offset > -256)
8771 /* Small negative offsets are best done with a subtract before the
8772 dereference, forcing these into a register normally takes two
8773 instructions. */
8774 x = force_operand (x, NULL_RTX);
8775 else
8777 /* For the remaining cases, force the constant into a register. */
8778 xop1 = force_reg (SImode, xop1);
8779 x = gen_rtx_PLUS (SImode, xop0, xop1);
8782 else if (GET_CODE (x) == PLUS
8783 && s_register_operand (XEXP (x, 1), SImode)
8784 && !s_register_operand (XEXP (x, 0), SImode))
8786 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8788 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8791 if (flag_pic)
8793 /* We need to find and carefully transform any SYMBOL and LABEL
8794 references; so go back to the original address expression. */
8795 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8797 if (new_x != orig_x)
8798 x = new_x;
8801 return x;
8804 /* Return TRUE if X contains any TLS symbol references. */
8806 bool
8807 arm_tls_referenced_p (rtx x)
8809 if (! TARGET_HAVE_TLS)
8810 return false;
8812 subrtx_iterator::array_type array;
8813 FOR_EACH_SUBRTX (iter, array, x, ALL)
8815 const_rtx x = *iter;
8816 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8818 /* ARM currently does not provide relocations to encode TLS variables
8819 into AArch32 instructions, only data, so there is no way to
8820 currently implement these if a literal pool is disabled. */
8821 if (arm_disable_literal_pool)
8822 sorry ("accessing thread-local storage is not currently supported "
8823 "with -mpure-code or -mslow-flash-data");
8825 return true;
8828 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8829 TLS offsets, not real symbol references. */
8830 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8831 iter.skip_subrtxes ();
8833 return false;
8836 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8838 On the ARM, allow any integer (invalid ones are removed later by insn
8839 patterns), nice doubles and symbol_refs which refer to the function's
8840 constant pool XXX.
8842 When generating pic allow anything. */
8844 static bool
8845 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8847 return flag_pic || !label_mentioned_p (x);
8850 static bool
8851 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8853 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8854 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8855 for ARMv8-M Baseline or later the result is valid. */
8856 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8857 x = XEXP (x, 0);
8859 return (CONST_INT_P (x)
8860 || CONST_DOUBLE_P (x)
8861 || CONSTANT_ADDRESS_P (x)
8862 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8863 || flag_pic);
8866 static bool
8867 arm_legitimate_constant_p (machine_mode mode, rtx x)
8869 return (!arm_cannot_force_const_mem (mode, x)
8870 && (TARGET_32BIT
8871 ? arm_legitimate_constant_p_1 (mode, x)
8872 : thumb_legitimate_constant_p (mode, x)));
8875 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8877 static bool
8878 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8880 rtx base, offset;
8882 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8884 split_const (x, &base, &offset);
8885 if (GET_CODE (base) == SYMBOL_REF
8886 && !offset_within_block_p (base, INTVAL (offset)))
8887 return true;
8889 return arm_tls_referenced_p (x);
8892 #define REG_OR_SUBREG_REG(X) \
8893 (REG_P (X) \
8894 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8896 #define REG_OR_SUBREG_RTX(X) \
8897 (REG_P (X) ? (X) : SUBREG_REG (X))
8899 static inline int
8900 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8902 machine_mode mode = GET_MODE (x);
8903 int total, words;
8905 switch (code)
8907 case ASHIFT:
8908 case ASHIFTRT:
8909 case LSHIFTRT:
8910 case ROTATERT:
8911 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8913 case PLUS:
8914 case MINUS:
8915 case COMPARE:
8916 case NEG:
8917 case NOT:
8918 return COSTS_N_INSNS (1);
8920 case MULT:
8921 if (arm_arch6m && arm_m_profile_small_mul)
8922 return COSTS_N_INSNS (32);
8924 if (CONST_INT_P (XEXP (x, 1)))
8926 int cycles = 0;
8927 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8929 while (i)
8931 i >>= 2;
8932 cycles++;
8934 return COSTS_N_INSNS (2) + cycles;
8936 return COSTS_N_INSNS (1) + 16;
8938 case SET:
8939 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8940 the mode. */
8941 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8942 return (COSTS_N_INSNS (words)
8943 + 4 * ((MEM_P (SET_SRC (x)))
8944 + MEM_P (SET_DEST (x))));
8946 case CONST_INT:
8947 if (outer == SET)
8949 if (UINTVAL (x) < 256
8950 /* 16-bit constant. */
8951 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8952 return 0;
8953 if (thumb_shiftable_const (INTVAL (x)))
8954 return COSTS_N_INSNS (2);
8955 return COSTS_N_INSNS (3);
8957 else if ((outer == PLUS || outer == COMPARE)
8958 && INTVAL (x) < 256 && INTVAL (x) > -256)
8959 return 0;
8960 else if ((outer == IOR || outer == XOR || outer == AND)
8961 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8962 return COSTS_N_INSNS (1);
8963 else if (outer == AND)
8965 int i;
8966 /* This duplicates the tests in the andsi3 expander. */
8967 for (i = 9; i <= 31; i++)
8968 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8969 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8970 return COSTS_N_INSNS (2);
8972 else if (outer == ASHIFT || outer == ASHIFTRT
8973 || outer == LSHIFTRT)
8974 return 0;
8975 return COSTS_N_INSNS (2);
8977 case CONST:
8978 case CONST_DOUBLE:
8979 case LABEL_REF:
8980 case SYMBOL_REF:
8981 return COSTS_N_INSNS (3);
8983 case UDIV:
8984 case UMOD:
8985 case DIV:
8986 case MOD:
8987 return 100;
8989 case TRUNCATE:
8990 return 99;
8992 case AND:
8993 case XOR:
8994 case IOR:
8995 /* XXX guess. */
8996 return 8;
8998 case MEM:
8999 /* XXX another guess. */
9000 /* Memory costs quite a lot for the first word, but subsequent words
9001 load at the equivalent of a single insn each. */
9002 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9003 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9004 ? 4 : 0));
9006 case IF_THEN_ELSE:
9007 /* XXX a guess. */
9008 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9009 return 14;
9010 return 2;
9012 case SIGN_EXTEND:
9013 case ZERO_EXTEND:
9014 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9015 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9017 if (mode == SImode)
9018 return total;
9020 if (arm_arch6)
9021 return total + COSTS_N_INSNS (1);
9023 /* Assume a two-shift sequence. Increase the cost slightly so
9024 we prefer actual shifts over an extend operation. */
9025 return total + 1 + COSTS_N_INSNS (2);
9027 default:
9028 return 99;
9032 /* Estimates the size cost of thumb1 instructions.
9033 For now most of the code is copied from thumb1_rtx_costs. We need more
9034 fine grain tuning when we have more related test cases. */
9035 static inline int
9036 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9038 machine_mode mode = GET_MODE (x);
9039 int words, cost;
9041 switch (code)
9043 case ASHIFT:
9044 case ASHIFTRT:
9045 case LSHIFTRT:
9046 case ROTATERT:
9047 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9049 case PLUS:
9050 case MINUS:
9051 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9052 defined by RTL expansion, especially for the expansion of
9053 multiplication. */
9054 if ((GET_CODE (XEXP (x, 0)) == MULT
9055 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9056 || (GET_CODE (XEXP (x, 1)) == MULT
9057 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9058 return COSTS_N_INSNS (2);
9059 /* Fall through. */
9060 case COMPARE:
9061 case NEG:
9062 case NOT:
9063 return COSTS_N_INSNS (1);
9065 case MULT:
9066 if (CONST_INT_P (XEXP (x, 1)))
9068 /* Thumb1 mul instruction can't operate on const. We must Load it
9069 into a register first. */
9070 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9071 /* For the targets which have a very small and high-latency multiply
9072 unit, we prefer to synthesize the mult with up to 5 instructions,
9073 giving a good balance between size and performance. */
9074 if (arm_arch6m && arm_m_profile_small_mul)
9075 return COSTS_N_INSNS (5);
9076 else
9077 return COSTS_N_INSNS (1) + const_size;
9079 return COSTS_N_INSNS (1);
9081 case SET:
9082 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9083 the mode. */
9084 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9085 cost = COSTS_N_INSNS (words);
9086 if (satisfies_constraint_J (SET_SRC (x))
9087 || satisfies_constraint_K (SET_SRC (x))
9088 /* Too big an immediate for a 2-byte mov, using MOVT. */
9089 || (CONST_INT_P (SET_SRC (x))
9090 && UINTVAL (SET_SRC (x)) >= 256
9091 && TARGET_HAVE_MOVT
9092 && satisfies_constraint_j (SET_SRC (x)))
9093 /* thumb1_movdi_insn. */
9094 || ((words > 1) && MEM_P (SET_SRC (x))))
9095 cost += COSTS_N_INSNS (1);
9096 return cost;
9098 case CONST_INT:
9099 if (outer == SET)
9101 if (UINTVAL (x) < 256)
9102 return COSTS_N_INSNS (1);
9103 /* movw is 4byte long. */
9104 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9105 return COSTS_N_INSNS (2);
9106 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9107 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9108 return COSTS_N_INSNS (2);
9109 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9110 if (thumb_shiftable_const (INTVAL (x)))
9111 return COSTS_N_INSNS (2);
9112 return COSTS_N_INSNS (3);
9114 else if ((outer == PLUS || outer == COMPARE)
9115 && INTVAL (x) < 256 && INTVAL (x) > -256)
9116 return 0;
9117 else if ((outer == IOR || outer == XOR || outer == AND)
9118 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9119 return COSTS_N_INSNS (1);
9120 else if (outer == AND)
9122 int i;
9123 /* This duplicates the tests in the andsi3 expander. */
9124 for (i = 9; i <= 31; i++)
9125 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9126 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9127 return COSTS_N_INSNS (2);
9129 else if (outer == ASHIFT || outer == ASHIFTRT
9130 || outer == LSHIFTRT)
9131 return 0;
9132 return COSTS_N_INSNS (2);
9134 case CONST:
9135 case CONST_DOUBLE:
9136 case LABEL_REF:
9137 case SYMBOL_REF:
9138 return COSTS_N_INSNS (3);
9140 case UDIV:
9141 case UMOD:
9142 case DIV:
9143 case MOD:
9144 return 100;
9146 case TRUNCATE:
9147 return 99;
9149 case AND:
9150 case XOR:
9151 case IOR:
9152 return COSTS_N_INSNS (1);
9154 case MEM:
9155 return (COSTS_N_INSNS (1)
9156 + COSTS_N_INSNS (1)
9157 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9158 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9159 ? COSTS_N_INSNS (1) : 0));
9161 case IF_THEN_ELSE:
9162 /* XXX a guess. */
9163 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9164 return 14;
9165 return 2;
9167 case ZERO_EXTEND:
9168 /* XXX still guessing. */
9169 switch (GET_MODE (XEXP (x, 0)))
9171 case E_QImode:
9172 return (1 + (mode == DImode ? 4 : 0)
9173 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9175 case E_HImode:
9176 return (4 + (mode == DImode ? 4 : 0)
9177 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9179 case E_SImode:
9180 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9182 default:
9183 return 99;
9186 default:
9187 return 99;
9191 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9192 operand, then return the operand that is being shifted. If the shift
9193 is not by a constant, then set SHIFT_REG to point to the operand.
9194 Return NULL if OP is not a shifter operand. */
9195 static rtx
9196 shifter_op_p (rtx op, rtx *shift_reg)
9198 enum rtx_code code = GET_CODE (op);
9200 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9201 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9202 return XEXP (op, 0);
9203 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9204 return XEXP (op, 0);
9205 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9206 || code == ASHIFTRT)
9208 if (!CONST_INT_P (XEXP (op, 1)))
9209 *shift_reg = XEXP (op, 1);
9210 return XEXP (op, 0);
9213 return NULL;
9216 static bool
9217 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9219 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9220 rtx_code code = GET_CODE (x);
9221 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9223 switch (XINT (x, 1))
9225 case UNSPEC_UNALIGNED_LOAD:
9226 /* We can only do unaligned loads into the integer unit, and we can't
9227 use LDM or LDRD. */
9228 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9229 if (speed_p)
9230 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9231 + extra_cost->ldst.load_unaligned);
9233 #ifdef NOT_YET
9234 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9235 ADDR_SPACE_GENERIC, speed_p);
9236 #endif
9237 return true;
9239 case UNSPEC_UNALIGNED_STORE:
9240 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9241 if (speed_p)
9242 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9243 + extra_cost->ldst.store_unaligned);
9245 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9246 #ifdef NOT_YET
9247 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9248 ADDR_SPACE_GENERIC, speed_p);
9249 #endif
9250 return true;
9252 case UNSPEC_VRINTZ:
9253 case UNSPEC_VRINTP:
9254 case UNSPEC_VRINTM:
9255 case UNSPEC_VRINTR:
9256 case UNSPEC_VRINTX:
9257 case UNSPEC_VRINTA:
9258 if (speed_p)
9259 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9261 return true;
9262 default:
9263 *cost = COSTS_N_INSNS (2);
9264 break;
9266 return true;
9269 /* Cost of a libcall. We assume one insn per argument, an amount for the
9270 call (one insn for -Os) and then one for processing the result. */
9271 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9273 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9274 do \
9276 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9277 if (shift_op != NULL \
9278 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9280 if (shift_reg) \
9282 if (speed_p) \
9283 *cost += extra_cost->alu.arith_shift_reg; \
9284 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9285 ASHIFT, 1, speed_p); \
9287 else if (speed_p) \
9288 *cost += extra_cost->alu.arith_shift; \
9290 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9291 ASHIFT, 0, speed_p) \
9292 + rtx_cost (XEXP (x, 1 - IDX), \
9293 GET_MODE (shift_op), \
9294 OP, 1, speed_p)); \
9295 return true; \
9298 while (0)
9300 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9301 considering the costs of the addressing mode and memory access
9302 separately. */
9303 static bool
9304 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9305 int *cost, bool speed_p)
9307 machine_mode mode = GET_MODE (x);
9309 *cost = COSTS_N_INSNS (1);
9311 if (flag_pic
9312 && GET_CODE (XEXP (x, 0)) == PLUS
9313 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9314 /* This will be split into two instructions. Add the cost of the
9315 additional instruction here. The cost of the memory access is computed
9316 below. See arm.md:calculate_pic_address. */
9317 *cost += COSTS_N_INSNS (1);
9319 /* Calculate cost of the addressing mode. */
9320 if (speed_p)
9322 arm_addr_mode_op op_type;
9323 switch (GET_CODE (XEXP (x, 0)))
9325 default:
9326 case REG:
9327 op_type = AMO_DEFAULT;
9328 break;
9329 case MINUS:
9330 /* MINUS does not appear in RTL, but the architecture supports it,
9331 so handle this case defensively. */
9332 /* fall through */
9333 case PLUS:
9334 op_type = AMO_NO_WB;
9335 break;
9336 case PRE_INC:
9337 case PRE_DEC:
9338 case POST_INC:
9339 case POST_DEC:
9340 case PRE_MODIFY:
9341 case POST_MODIFY:
9342 op_type = AMO_WB;
9343 break;
9346 if (VECTOR_MODE_P (mode))
9347 *cost += current_tune->addr_mode_costs->vector[op_type];
9348 else if (FLOAT_MODE_P (mode))
9349 *cost += current_tune->addr_mode_costs->fp[op_type];
9350 else
9351 *cost += current_tune->addr_mode_costs->integer[op_type];
9354 /* Calculate cost of memory access. */
9355 if (speed_p)
9357 if (FLOAT_MODE_P (mode))
9359 if (GET_MODE_SIZE (mode) == 8)
9360 *cost += extra_cost->ldst.loadd;
9361 else
9362 *cost += extra_cost->ldst.loadf;
9364 else if (VECTOR_MODE_P (mode))
9365 *cost += extra_cost->ldst.loadv;
9366 else
9368 /* Integer modes */
9369 if (GET_MODE_SIZE (mode) == 8)
9370 *cost += extra_cost->ldst.ldrd;
9371 else
9372 *cost += extra_cost->ldst.load;
9376 return true;
9379 /* RTX costs. Make an estimate of the cost of executing the operation
9380 X, which is contained within an operation with code OUTER_CODE.
9381 SPEED_P indicates whether the cost desired is the performance cost,
9382 or the size cost. The estimate is stored in COST and the return
9383 value is TRUE if the cost calculation is final, or FALSE if the
9384 caller should recurse through the operands of X to add additional
9385 costs.
9387 We currently make no attempt to model the size savings of Thumb-2
9388 16-bit instructions. At the normal points in compilation where
9389 this code is called we have no measure of whether the condition
9390 flags are live or not, and thus no realistic way to determine what
9391 the size will eventually be. */
9392 static bool
9393 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9394 const struct cpu_cost_table *extra_cost,
9395 int *cost, bool speed_p)
9397 machine_mode mode = GET_MODE (x);
9399 *cost = COSTS_N_INSNS (1);
9401 if (TARGET_THUMB1)
9403 if (speed_p)
9404 *cost = thumb1_rtx_costs (x, code, outer_code);
9405 else
9406 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9407 return true;
9410 switch (code)
9412 case SET:
9413 *cost = 0;
9414 /* SET RTXs don't have a mode so we get it from the destination. */
9415 mode = GET_MODE (SET_DEST (x));
9417 if (REG_P (SET_SRC (x))
9418 && REG_P (SET_DEST (x)))
9420 /* Assume that most copies can be done with a single insn,
9421 unless we don't have HW FP, in which case everything
9422 larger than word mode will require two insns. */
9423 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9424 && GET_MODE_SIZE (mode) > 4)
9425 || mode == DImode)
9426 ? 2 : 1);
9427 /* Conditional register moves can be encoded
9428 in 16 bits in Thumb mode. */
9429 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9430 *cost >>= 1;
9432 return true;
9435 if (CONST_INT_P (SET_SRC (x)))
9437 /* Handle CONST_INT here, since the value doesn't have a mode
9438 and we would otherwise be unable to work out the true cost. */
9439 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9440 0, speed_p);
9441 outer_code = SET;
9442 /* Slightly lower the cost of setting a core reg to a constant.
9443 This helps break up chains and allows for better scheduling. */
9444 if (REG_P (SET_DEST (x))
9445 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9446 *cost -= 1;
9447 x = SET_SRC (x);
9448 /* Immediate moves with an immediate in the range [0, 255] can be
9449 encoded in 16 bits in Thumb mode. */
9450 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9451 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9452 *cost >>= 1;
9453 goto const_int_cost;
9456 return false;
9458 case MEM:
9459 return arm_mem_costs (x, extra_cost, cost, speed_p);
9461 case PARALLEL:
9463 /* Calculations of LDM costs are complex. We assume an initial cost
9464 (ldm_1st) which will load the number of registers mentioned in
9465 ldm_regs_per_insn_1st registers; then each additional
9466 ldm_regs_per_insn_subsequent registers cost one more insn. The
9467 formula for N regs is thus:
9469 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9470 + ldm_regs_per_insn_subsequent - 1)
9471 / ldm_regs_per_insn_subsequent).
9473 Additional costs may also be added for addressing. A similar
9474 formula is used for STM. */
9476 bool is_ldm = load_multiple_operation (x, SImode);
9477 bool is_stm = store_multiple_operation (x, SImode);
9479 if (is_ldm || is_stm)
9481 if (speed_p)
9483 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9484 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9485 ? extra_cost->ldst.ldm_regs_per_insn_1st
9486 : extra_cost->ldst.stm_regs_per_insn_1st;
9487 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9488 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9489 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9491 *cost += regs_per_insn_1st
9492 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9493 + regs_per_insn_sub - 1)
9494 / regs_per_insn_sub);
9495 return true;
9499 return false;
9501 case DIV:
9502 case UDIV:
9503 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9504 && (mode == SFmode || !TARGET_VFP_SINGLE))
9505 *cost += COSTS_N_INSNS (speed_p
9506 ? extra_cost->fp[mode != SFmode].div : 0);
9507 else if (mode == SImode && TARGET_IDIV)
9508 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9509 else
9510 *cost = LIBCALL_COST (2);
9512 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9513 possible udiv is prefered. */
9514 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9515 return false; /* All arguments must be in registers. */
9517 case MOD:
9518 /* MOD by a power of 2 can be expanded as:
9519 rsbs r1, r0, #0
9520 and r0, r0, #(n - 1)
9521 and r1, r1, #(n - 1)
9522 rsbpl r0, r1, #0. */
9523 if (CONST_INT_P (XEXP (x, 1))
9524 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9525 && mode == SImode)
9527 *cost += COSTS_N_INSNS (3);
9529 if (speed_p)
9530 *cost += 2 * extra_cost->alu.logical
9531 + extra_cost->alu.arith;
9532 return true;
9535 /* Fall-through. */
9536 case UMOD:
9537 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9538 possible udiv is prefered. */
9539 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9540 return false; /* All arguments must be in registers. */
9542 case ROTATE:
9543 if (mode == SImode && REG_P (XEXP (x, 1)))
9545 *cost += (COSTS_N_INSNS (1)
9546 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9547 if (speed_p)
9548 *cost += extra_cost->alu.shift_reg;
9549 return true;
9551 /* Fall through */
9552 case ROTATERT:
9553 case ASHIFT:
9554 case LSHIFTRT:
9555 case ASHIFTRT:
9556 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9558 *cost += (COSTS_N_INSNS (2)
9559 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9560 if (speed_p)
9561 *cost += 2 * extra_cost->alu.shift;
9562 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9563 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9564 *cost += 1;
9565 return true;
9567 else if (mode == SImode)
9569 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9570 /* Slightly disparage register shifts at -Os, but not by much. */
9571 if (!CONST_INT_P (XEXP (x, 1)))
9572 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9573 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9574 return true;
9576 else if (GET_MODE_CLASS (mode) == MODE_INT
9577 && GET_MODE_SIZE (mode) < 4)
9579 if (code == ASHIFT)
9581 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9582 /* Slightly disparage register shifts at -Os, but not by
9583 much. */
9584 if (!CONST_INT_P (XEXP (x, 1)))
9585 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9586 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9588 else if (code == LSHIFTRT || code == ASHIFTRT)
9590 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9592 /* Can use SBFX/UBFX. */
9593 if (speed_p)
9594 *cost += extra_cost->alu.bfx;
9595 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9597 else
9599 *cost += COSTS_N_INSNS (1);
9600 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9601 if (speed_p)
9603 if (CONST_INT_P (XEXP (x, 1)))
9604 *cost += 2 * extra_cost->alu.shift;
9605 else
9606 *cost += (extra_cost->alu.shift
9607 + extra_cost->alu.shift_reg);
9609 else
9610 /* Slightly disparage register shifts. */
9611 *cost += !CONST_INT_P (XEXP (x, 1));
9614 else /* Rotates. */
9616 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9617 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9618 if (speed_p)
9620 if (CONST_INT_P (XEXP (x, 1)))
9621 *cost += (2 * extra_cost->alu.shift
9622 + extra_cost->alu.log_shift);
9623 else
9624 *cost += (extra_cost->alu.shift
9625 + extra_cost->alu.shift_reg
9626 + extra_cost->alu.log_shift_reg);
9629 return true;
9632 *cost = LIBCALL_COST (2);
9633 return false;
9635 case BSWAP:
9636 if (arm_arch6)
9638 if (mode == SImode)
9640 if (speed_p)
9641 *cost += extra_cost->alu.rev;
9643 return false;
9646 else
9648 /* No rev instruction available. Look at arm_legacy_rev
9649 and thumb_legacy_rev for the form of RTL used then. */
9650 if (TARGET_THUMB)
9652 *cost += COSTS_N_INSNS (9);
9654 if (speed_p)
9656 *cost += 6 * extra_cost->alu.shift;
9657 *cost += 3 * extra_cost->alu.logical;
9660 else
9662 *cost += COSTS_N_INSNS (4);
9664 if (speed_p)
9666 *cost += 2 * extra_cost->alu.shift;
9667 *cost += extra_cost->alu.arith_shift;
9668 *cost += 2 * extra_cost->alu.logical;
9671 return true;
9673 return false;
9675 case MINUS:
9676 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9677 && (mode == SFmode || !TARGET_VFP_SINGLE))
9679 if (GET_CODE (XEXP (x, 0)) == MULT
9680 || GET_CODE (XEXP (x, 1)) == MULT)
9682 rtx mul_op0, mul_op1, sub_op;
9684 if (speed_p)
9685 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9687 if (GET_CODE (XEXP (x, 0)) == MULT)
9689 mul_op0 = XEXP (XEXP (x, 0), 0);
9690 mul_op1 = XEXP (XEXP (x, 0), 1);
9691 sub_op = XEXP (x, 1);
9693 else
9695 mul_op0 = XEXP (XEXP (x, 1), 0);
9696 mul_op1 = XEXP (XEXP (x, 1), 1);
9697 sub_op = XEXP (x, 0);
9700 /* The first operand of the multiply may be optionally
9701 negated. */
9702 if (GET_CODE (mul_op0) == NEG)
9703 mul_op0 = XEXP (mul_op0, 0);
9705 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9706 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9707 + rtx_cost (sub_op, mode, code, 0, speed_p));
9709 return true;
9712 if (speed_p)
9713 *cost += extra_cost->fp[mode != SFmode].addsub;
9714 return false;
9717 if (mode == SImode)
9719 rtx shift_by_reg = NULL;
9720 rtx shift_op;
9721 rtx non_shift_op;
9723 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9724 if (shift_op == NULL)
9726 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9727 non_shift_op = XEXP (x, 0);
9729 else
9730 non_shift_op = XEXP (x, 1);
9732 if (shift_op != NULL)
9734 if (shift_by_reg != NULL)
9736 if (speed_p)
9737 *cost += extra_cost->alu.arith_shift_reg;
9738 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9740 else if (speed_p)
9741 *cost += extra_cost->alu.arith_shift;
9743 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9744 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9745 return true;
9748 if (arm_arch_thumb2
9749 && GET_CODE (XEXP (x, 1)) == MULT)
9751 /* MLS. */
9752 if (speed_p)
9753 *cost += extra_cost->mult[0].add;
9754 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9755 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9756 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9757 return true;
9760 if (CONST_INT_P (XEXP (x, 0)))
9762 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9763 INTVAL (XEXP (x, 0)), NULL_RTX,
9764 NULL_RTX, 1, 0);
9765 *cost = COSTS_N_INSNS (insns);
9766 if (speed_p)
9767 *cost += insns * extra_cost->alu.arith;
9768 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9769 return true;
9771 else if (speed_p)
9772 *cost += extra_cost->alu.arith;
9774 return false;
9777 if (GET_MODE_CLASS (mode) == MODE_INT
9778 && GET_MODE_SIZE (mode) < 4)
9780 rtx shift_op, shift_reg;
9781 shift_reg = NULL;
9783 /* We check both sides of the MINUS for shifter operands since,
9784 unlike PLUS, it's not commutative. */
9786 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9787 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9789 /* Slightly disparage, as we might need to widen the result. */
9790 *cost += 1;
9791 if (speed_p)
9792 *cost += extra_cost->alu.arith;
9794 if (CONST_INT_P (XEXP (x, 0)))
9796 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9797 return true;
9800 return false;
9803 if (mode == DImode)
9805 *cost += COSTS_N_INSNS (1);
9807 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9809 rtx op1 = XEXP (x, 1);
9811 if (speed_p)
9812 *cost += 2 * extra_cost->alu.arith;
9814 if (GET_CODE (op1) == ZERO_EXTEND)
9815 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9816 0, speed_p);
9817 else
9818 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9819 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9820 0, speed_p);
9821 return true;
9823 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9825 if (speed_p)
9826 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9827 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9828 0, speed_p)
9829 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9830 return true;
9832 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9833 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9835 if (speed_p)
9836 *cost += (extra_cost->alu.arith
9837 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9838 ? extra_cost->alu.arith
9839 : extra_cost->alu.arith_shift));
9840 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9841 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9842 GET_CODE (XEXP (x, 1)), 0, speed_p));
9843 return true;
9846 if (speed_p)
9847 *cost += 2 * extra_cost->alu.arith;
9848 return false;
9851 /* Vector mode? */
9853 *cost = LIBCALL_COST (2);
9854 return false;
9856 case PLUS:
9857 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9858 && (mode == SFmode || !TARGET_VFP_SINGLE))
9860 if (GET_CODE (XEXP (x, 0)) == MULT)
9862 rtx mul_op0, mul_op1, add_op;
9864 if (speed_p)
9865 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9867 mul_op0 = XEXP (XEXP (x, 0), 0);
9868 mul_op1 = XEXP (XEXP (x, 0), 1);
9869 add_op = XEXP (x, 1);
9871 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9872 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9873 + rtx_cost (add_op, mode, code, 0, speed_p));
9875 return true;
9878 if (speed_p)
9879 *cost += extra_cost->fp[mode != SFmode].addsub;
9880 return false;
9882 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9884 *cost = LIBCALL_COST (2);
9885 return false;
9888 /* Narrow modes can be synthesized in SImode, but the range
9889 of useful sub-operations is limited. Check for shift operations
9890 on one of the operands. Only left shifts can be used in the
9891 narrow modes. */
9892 if (GET_MODE_CLASS (mode) == MODE_INT
9893 && GET_MODE_SIZE (mode) < 4)
9895 rtx shift_op, shift_reg;
9896 shift_reg = NULL;
9898 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9900 if (CONST_INT_P (XEXP (x, 1)))
9902 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9903 INTVAL (XEXP (x, 1)), NULL_RTX,
9904 NULL_RTX, 1, 0);
9905 *cost = COSTS_N_INSNS (insns);
9906 if (speed_p)
9907 *cost += insns * extra_cost->alu.arith;
9908 /* Slightly penalize a narrow operation as the result may
9909 need widening. */
9910 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9911 return true;
9914 /* Slightly penalize a narrow operation as the result may
9915 need widening. */
9916 *cost += 1;
9917 if (speed_p)
9918 *cost += extra_cost->alu.arith;
9920 return false;
9923 if (mode == SImode)
9925 rtx shift_op, shift_reg;
9927 if (TARGET_INT_SIMD
9928 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9929 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9931 /* UXTA[BH] or SXTA[BH]. */
9932 if (speed_p)
9933 *cost += extra_cost->alu.extend_arith;
9934 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9935 0, speed_p)
9936 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9937 return true;
9940 shift_reg = NULL;
9941 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9942 if (shift_op != NULL)
9944 if (shift_reg)
9946 if (speed_p)
9947 *cost += extra_cost->alu.arith_shift_reg;
9948 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9950 else if (speed_p)
9951 *cost += extra_cost->alu.arith_shift;
9953 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9954 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9955 return true;
9957 if (GET_CODE (XEXP (x, 0)) == MULT)
9959 rtx mul_op = XEXP (x, 0);
9961 if (TARGET_DSP_MULTIPLY
9962 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9963 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9964 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9965 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9966 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9967 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9968 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9969 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9970 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9971 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9972 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9973 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9974 == 16))))))
9976 /* SMLA[BT][BT]. */
9977 if (speed_p)
9978 *cost += extra_cost->mult[0].extend_add;
9979 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9980 SIGN_EXTEND, 0, speed_p)
9981 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9982 SIGN_EXTEND, 0, speed_p)
9983 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9984 return true;
9987 if (speed_p)
9988 *cost += extra_cost->mult[0].add;
9989 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9990 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9991 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9992 return true;
9994 if (CONST_INT_P (XEXP (x, 1)))
9996 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9997 INTVAL (XEXP (x, 1)), NULL_RTX,
9998 NULL_RTX, 1, 0);
9999 *cost = COSTS_N_INSNS (insns);
10000 if (speed_p)
10001 *cost += insns * extra_cost->alu.arith;
10002 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10003 return true;
10005 else if (speed_p)
10006 *cost += extra_cost->alu.arith;
10008 return false;
10011 if (mode == DImode)
10013 if (arm_arch3m
10014 && GET_CODE (XEXP (x, 0)) == MULT
10015 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10016 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10017 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10020 if (speed_p)
10021 *cost += extra_cost->mult[1].extend_add;
10022 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10023 ZERO_EXTEND, 0, speed_p)
10024 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10025 ZERO_EXTEND, 0, speed_p)
10026 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10027 return true;
10030 *cost += COSTS_N_INSNS (1);
10032 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10033 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10035 if (speed_p)
10036 *cost += (extra_cost->alu.arith
10037 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10038 ? extra_cost->alu.arith
10039 : extra_cost->alu.arith_shift));
10041 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10042 0, speed_p)
10043 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10044 return true;
10047 if (speed_p)
10048 *cost += 2 * extra_cost->alu.arith;
10049 return false;
10052 /* Vector mode? */
10053 *cost = LIBCALL_COST (2);
10054 return false;
10055 case IOR:
10056 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10058 if (speed_p)
10059 *cost += extra_cost->alu.rev;
10061 return true;
10063 /* Fall through. */
10064 case AND: case XOR:
10065 if (mode == SImode)
10067 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10068 rtx op0 = XEXP (x, 0);
10069 rtx shift_op, shift_reg;
10071 if (subcode == NOT
10072 && (code == AND
10073 || (code == IOR && TARGET_THUMB2)))
10074 op0 = XEXP (op0, 0);
10076 shift_reg = NULL;
10077 shift_op = shifter_op_p (op0, &shift_reg);
10078 if (shift_op != NULL)
10080 if (shift_reg)
10082 if (speed_p)
10083 *cost += extra_cost->alu.log_shift_reg;
10084 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10086 else if (speed_p)
10087 *cost += extra_cost->alu.log_shift;
10089 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10090 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10091 return true;
10094 if (CONST_INT_P (XEXP (x, 1)))
10096 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10097 INTVAL (XEXP (x, 1)), NULL_RTX,
10098 NULL_RTX, 1, 0);
10100 *cost = COSTS_N_INSNS (insns);
10101 if (speed_p)
10102 *cost += insns * extra_cost->alu.logical;
10103 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10104 return true;
10107 if (speed_p)
10108 *cost += extra_cost->alu.logical;
10109 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10110 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10111 return true;
10114 if (mode == DImode)
10116 rtx op0 = XEXP (x, 0);
10117 enum rtx_code subcode = GET_CODE (op0);
10119 *cost += COSTS_N_INSNS (1);
10121 if (subcode == NOT
10122 && (code == AND
10123 || (code == IOR && TARGET_THUMB2)))
10124 op0 = XEXP (op0, 0);
10126 if (GET_CODE (op0) == ZERO_EXTEND)
10128 if (speed_p)
10129 *cost += 2 * extra_cost->alu.logical;
10131 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10132 0, speed_p)
10133 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10134 return true;
10136 else if (GET_CODE (op0) == SIGN_EXTEND)
10138 if (speed_p)
10139 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10141 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10142 0, speed_p)
10143 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10144 return true;
10147 if (speed_p)
10148 *cost += 2 * extra_cost->alu.logical;
10150 return true;
10152 /* Vector mode? */
10154 *cost = LIBCALL_COST (2);
10155 return false;
10157 case MULT:
10158 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10159 && (mode == SFmode || !TARGET_VFP_SINGLE))
10161 rtx op0 = XEXP (x, 0);
10163 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10164 op0 = XEXP (op0, 0);
10166 if (speed_p)
10167 *cost += extra_cost->fp[mode != SFmode].mult;
10169 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10170 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10171 return true;
10173 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10175 *cost = LIBCALL_COST (2);
10176 return false;
10179 if (mode == SImode)
10181 if (TARGET_DSP_MULTIPLY
10182 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10183 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10184 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10186 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10187 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10188 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10189 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10190 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10191 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10192 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10193 && (INTVAL (XEXP (XEXP (x, 1), 1))
10194 == 16))))))
10196 /* SMUL[TB][TB]. */
10197 if (speed_p)
10198 *cost += extra_cost->mult[0].extend;
10199 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10200 SIGN_EXTEND, 0, speed_p);
10201 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10202 SIGN_EXTEND, 1, speed_p);
10203 return true;
10205 if (speed_p)
10206 *cost += extra_cost->mult[0].simple;
10207 return false;
10210 if (mode == DImode)
10212 if (arm_arch3m
10213 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10214 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10215 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10216 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10218 if (speed_p)
10219 *cost += extra_cost->mult[1].extend;
10220 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10221 ZERO_EXTEND, 0, speed_p)
10222 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10223 ZERO_EXTEND, 0, speed_p));
10224 return true;
10227 *cost = LIBCALL_COST (2);
10228 return false;
10231 /* Vector mode? */
10232 *cost = LIBCALL_COST (2);
10233 return false;
10235 case NEG:
10236 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10237 && (mode == SFmode || !TARGET_VFP_SINGLE))
10239 if (GET_CODE (XEXP (x, 0)) == MULT)
10241 /* VNMUL. */
10242 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10243 return true;
10246 if (speed_p)
10247 *cost += extra_cost->fp[mode != SFmode].neg;
10249 return false;
10251 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10253 *cost = LIBCALL_COST (1);
10254 return false;
10257 if (mode == SImode)
10259 if (GET_CODE (XEXP (x, 0)) == ABS)
10261 *cost += COSTS_N_INSNS (1);
10262 /* Assume the non-flag-changing variant. */
10263 if (speed_p)
10264 *cost += (extra_cost->alu.log_shift
10265 + extra_cost->alu.arith_shift);
10266 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10267 return true;
10270 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10271 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10273 *cost += COSTS_N_INSNS (1);
10274 /* No extra cost for MOV imm and MVN imm. */
10275 /* If the comparison op is using the flags, there's no further
10276 cost, otherwise we need to add the cost of the comparison. */
10277 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10278 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10279 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10281 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10282 *cost += (COSTS_N_INSNS (1)
10283 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10284 0, speed_p)
10285 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10286 1, speed_p));
10287 if (speed_p)
10288 *cost += extra_cost->alu.arith;
10290 return true;
10293 if (speed_p)
10294 *cost += extra_cost->alu.arith;
10295 return false;
10298 if (GET_MODE_CLASS (mode) == MODE_INT
10299 && GET_MODE_SIZE (mode) < 4)
10301 /* Slightly disparage, as we might need an extend operation. */
10302 *cost += 1;
10303 if (speed_p)
10304 *cost += extra_cost->alu.arith;
10305 return false;
10308 if (mode == DImode)
10310 *cost += COSTS_N_INSNS (1);
10311 if (speed_p)
10312 *cost += 2 * extra_cost->alu.arith;
10313 return false;
10316 /* Vector mode? */
10317 *cost = LIBCALL_COST (1);
10318 return false;
10320 case NOT:
10321 if (mode == SImode)
10323 rtx shift_op;
10324 rtx shift_reg = NULL;
10326 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10328 if (shift_op)
10330 if (shift_reg != NULL)
10332 if (speed_p)
10333 *cost += extra_cost->alu.log_shift_reg;
10334 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10336 else if (speed_p)
10337 *cost += extra_cost->alu.log_shift;
10338 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10339 return true;
10342 if (speed_p)
10343 *cost += extra_cost->alu.logical;
10344 return false;
10346 if (mode == DImode)
10348 *cost += COSTS_N_INSNS (1);
10349 return false;
10352 /* Vector mode? */
10354 *cost += LIBCALL_COST (1);
10355 return false;
10357 case IF_THEN_ELSE:
10359 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10361 *cost += COSTS_N_INSNS (3);
10362 return true;
10364 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10365 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10367 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10368 /* Assume that if one arm of the if_then_else is a register,
10369 that it will be tied with the result and eliminate the
10370 conditional insn. */
10371 if (REG_P (XEXP (x, 1)))
10372 *cost += op2cost;
10373 else if (REG_P (XEXP (x, 2)))
10374 *cost += op1cost;
10375 else
10377 if (speed_p)
10379 if (extra_cost->alu.non_exec_costs_exec)
10380 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10381 else
10382 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10384 else
10385 *cost += op1cost + op2cost;
10388 return true;
10390 case COMPARE:
10391 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10392 *cost = 0;
10393 else
10395 machine_mode op0mode;
10396 /* We'll mostly assume that the cost of a compare is the cost of the
10397 LHS. However, there are some notable exceptions. */
10399 /* Floating point compares are never done as side-effects. */
10400 op0mode = GET_MODE (XEXP (x, 0));
10401 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10402 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10404 if (speed_p)
10405 *cost += extra_cost->fp[op0mode != SFmode].compare;
10407 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10409 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10410 return true;
10413 return false;
10415 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10417 *cost = LIBCALL_COST (2);
10418 return false;
10421 /* DImode compares normally take two insns. */
10422 if (op0mode == DImode)
10424 *cost += COSTS_N_INSNS (1);
10425 if (speed_p)
10426 *cost += 2 * extra_cost->alu.arith;
10427 return false;
10430 if (op0mode == SImode)
10432 rtx shift_op;
10433 rtx shift_reg;
10435 if (XEXP (x, 1) == const0_rtx
10436 && !(REG_P (XEXP (x, 0))
10437 || (GET_CODE (XEXP (x, 0)) == SUBREG
10438 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10440 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10442 /* Multiply operations that set the flags are often
10443 significantly more expensive. */
10444 if (speed_p
10445 && GET_CODE (XEXP (x, 0)) == MULT
10446 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10447 *cost += extra_cost->mult[0].flag_setting;
10449 if (speed_p
10450 && GET_CODE (XEXP (x, 0)) == PLUS
10451 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10452 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10453 0), 1), mode))
10454 *cost += extra_cost->mult[0].flag_setting;
10455 return true;
10458 shift_reg = NULL;
10459 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10460 if (shift_op != NULL)
10462 if (shift_reg != NULL)
10464 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10465 1, speed_p);
10466 if (speed_p)
10467 *cost += extra_cost->alu.arith_shift_reg;
10469 else if (speed_p)
10470 *cost += extra_cost->alu.arith_shift;
10471 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10472 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10473 return true;
10476 if (speed_p)
10477 *cost += extra_cost->alu.arith;
10478 if (CONST_INT_P (XEXP (x, 1))
10479 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10481 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10482 return true;
10484 return false;
10487 /* Vector mode? */
10489 *cost = LIBCALL_COST (2);
10490 return false;
10492 return true;
10494 case EQ:
10495 case NE:
10496 case LT:
10497 case LE:
10498 case GT:
10499 case GE:
10500 case LTU:
10501 case LEU:
10502 case GEU:
10503 case GTU:
10504 case ORDERED:
10505 case UNORDERED:
10506 case UNEQ:
10507 case UNLE:
10508 case UNLT:
10509 case UNGE:
10510 case UNGT:
10511 case LTGT:
10512 if (outer_code == SET)
10514 /* Is it a store-flag operation? */
10515 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10516 && XEXP (x, 1) == const0_rtx)
10518 /* Thumb also needs an IT insn. */
10519 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10520 return true;
10522 if (XEXP (x, 1) == const0_rtx)
10524 switch (code)
10526 case LT:
10527 /* LSR Rd, Rn, #31. */
10528 if (speed_p)
10529 *cost += extra_cost->alu.shift;
10530 break;
10532 case EQ:
10533 /* RSBS T1, Rn, #0
10534 ADC Rd, Rn, T1. */
10536 case NE:
10537 /* SUBS T1, Rn, #1
10538 SBC Rd, Rn, T1. */
10539 *cost += COSTS_N_INSNS (1);
10540 break;
10542 case LE:
10543 /* RSBS T1, Rn, Rn, LSR #31
10544 ADC Rd, Rn, T1. */
10545 *cost += COSTS_N_INSNS (1);
10546 if (speed_p)
10547 *cost += extra_cost->alu.arith_shift;
10548 break;
10550 case GT:
10551 /* RSB Rd, Rn, Rn, ASR #1
10552 LSR Rd, Rd, #31. */
10553 *cost += COSTS_N_INSNS (1);
10554 if (speed_p)
10555 *cost += (extra_cost->alu.arith_shift
10556 + extra_cost->alu.shift);
10557 break;
10559 case GE:
10560 /* ASR Rd, Rn, #31
10561 ADD Rd, Rn, #1. */
10562 *cost += COSTS_N_INSNS (1);
10563 if (speed_p)
10564 *cost += extra_cost->alu.shift;
10565 break;
10567 default:
10568 /* Remaining cases are either meaningless or would take
10569 three insns anyway. */
10570 *cost = COSTS_N_INSNS (3);
10571 break;
10573 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10574 return true;
10576 else
10578 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10579 if (CONST_INT_P (XEXP (x, 1))
10580 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10582 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10583 return true;
10586 return false;
10589 /* Not directly inside a set. If it involves the condition code
10590 register it must be the condition for a branch, cond_exec or
10591 I_T_E operation. Since the comparison is performed elsewhere
10592 this is just the control part which has no additional
10593 cost. */
10594 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10595 && XEXP (x, 1) == const0_rtx)
10597 *cost = 0;
10598 return true;
10600 return false;
10602 case ABS:
10603 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10604 && (mode == SFmode || !TARGET_VFP_SINGLE))
10606 if (speed_p)
10607 *cost += extra_cost->fp[mode != SFmode].neg;
10609 return false;
10611 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10613 *cost = LIBCALL_COST (1);
10614 return false;
10617 if (mode == SImode)
10619 if (speed_p)
10620 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10621 return false;
10623 /* Vector mode? */
10624 *cost = LIBCALL_COST (1);
10625 return false;
10627 case SIGN_EXTEND:
10628 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10629 && MEM_P (XEXP (x, 0)))
10631 if (mode == DImode)
10632 *cost += COSTS_N_INSNS (1);
10634 if (!speed_p)
10635 return true;
10637 if (GET_MODE (XEXP (x, 0)) == SImode)
10638 *cost += extra_cost->ldst.load;
10639 else
10640 *cost += extra_cost->ldst.load_sign_extend;
10642 if (mode == DImode)
10643 *cost += extra_cost->alu.shift;
10645 return true;
10648 /* Widening from less than 32-bits requires an extend operation. */
10649 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10651 /* We have SXTB/SXTH. */
10652 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10653 if (speed_p)
10654 *cost += extra_cost->alu.extend;
10656 else if (GET_MODE (XEXP (x, 0)) != SImode)
10658 /* Needs two shifts. */
10659 *cost += COSTS_N_INSNS (1);
10660 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10661 if (speed_p)
10662 *cost += 2 * extra_cost->alu.shift;
10665 /* Widening beyond 32-bits requires one more insn. */
10666 if (mode == DImode)
10668 *cost += COSTS_N_INSNS (1);
10669 if (speed_p)
10670 *cost += extra_cost->alu.shift;
10673 return true;
10675 case ZERO_EXTEND:
10676 if ((arm_arch4
10677 || GET_MODE (XEXP (x, 0)) == SImode
10678 || GET_MODE (XEXP (x, 0)) == QImode)
10679 && MEM_P (XEXP (x, 0)))
10681 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10683 if (mode == DImode)
10684 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10686 return true;
10689 /* Widening from less than 32-bits requires an extend operation. */
10690 if (GET_MODE (XEXP (x, 0)) == QImode)
10692 /* UXTB can be a shorter instruction in Thumb2, but it might
10693 be slower than the AND Rd, Rn, #255 alternative. When
10694 optimizing for speed it should never be slower to use
10695 AND, and we don't really model 16-bit vs 32-bit insns
10696 here. */
10697 if (speed_p)
10698 *cost += extra_cost->alu.logical;
10700 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10702 /* We have UXTB/UXTH. */
10703 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10704 if (speed_p)
10705 *cost += extra_cost->alu.extend;
10707 else if (GET_MODE (XEXP (x, 0)) != SImode)
10709 /* Needs two shifts. It's marginally preferable to use
10710 shifts rather than two BIC instructions as the second
10711 shift may merge with a subsequent insn as a shifter
10712 op. */
10713 *cost = COSTS_N_INSNS (2);
10714 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10715 if (speed_p)
10716 *cost += 2 * extra_cost->alu.shift;
10719 /* Widening beyond 32-bits requires one more insn. */
10720 if (mode == DImode)
10722 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10725 return true;
10727 case CONST_INT:
10728 *cost = 0;
10729 /* CONST_INT has no mode, so we cannot tell for sure how many
10730 insns are really going to be needed. The best we can do is
10731 look at the value passed. If it fits in SImode, then assume
10732 that's the mode it will be used for. Otherwise assume it
10733 will be used in DImode. */
10734 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10735 mode = SImode;
10736 else
10737 mode = DImode;
10739 /* Avoid blowing up in arm_gen_constant (). */
10740 if (!(outer_code == PLUS
10741 || outer_code == AND
10742 || outer_code == IOR
10743 || outer_code == XOR
10744 || outer_code == MINUS))
10745 outer_code = SET;
10747 const_int_cost:
10748 if (mode == SImode)
10750 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10751 INTVAL (x), NULL, NULL,
10752 0, 0));
10753 /* Extra costs? */
10755 else
10757 *cost += COSTS_N_INSNS (arm_gen_constant
10758 (outer_code, SImode, NULL,
10759 trunc_int_for_mode (INTVAL (x), SImode),
10760 NULL, NULL, 0, 0)
10761 + arm_gen_constant (outer_code, SImode, NULL,
10762 INTVAL (x) >> 32, NULL,
10763 NULL, 0, 0));
10764 /* Extra costs? */
10767 return true;
10769 case CONST:
10770 case LABEL_REF:
10771 case SYMBOL_REF:
10772 if (speed_p)
10774 if (arm_arch_thumb2 && !flag_pic)
10775 *cost += COSTS_N_INSNS (1);
10776 else
10777 *cost += extra_cost->ldst.load;
10779 else
10780 *cost += COSTS_N_INSNS (1);
10782 if (flag_pic)
10784 *cost += COSTS_N_INSNS (1);
10785 if (speed_p)
10786 *cost += extra_cost->alu.arith;
10789 return true;
10791 case CONST_FIXED:
10792 *cost = COSTS_N_INSNS (4);
10793 /* Fixme. */
10794 return true;
10796 case CONST_DOUBLE:
10797 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10798 && (mode == SFmode || !TARGET_VFP_SINGLE))
10800 if (vfp3_const_double_rtx (x))
10802 if (speed_p)
10803 *cost += extra_cost->fp[mode == DFmode].fpconst;
10804 return true;
10807 if (speed_p)
10809 if (mode == DFmode)
10810 *cost += extra_cost->ldst.loadd;
10811 else
10812 *cost += extra_cost->ldst.loadf;
10814 else
10815 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10817 return true;
10819 *cost = COSTS_N_INSNS (4);
10820 return true;
10822 case CONST_VECTOR:
10823 /* Fixme. */
10824 if (TARGET_NEON
10825 && TARGET_HARD_FLOAT
10826 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10827 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10828 *cost = COSTS_N_INSNS (1);
10829 else
10830 *cost = COSTS_N_INSNS (4);
10831 return true;
10833 case HIGH:
10834 case LO_SUM:
10835 /* When optimizing for size, we prefer constant pool entries to
10836 MOVW/MOVT pairs, so bump the cost of these slightly. */
10837 if (!speed_p)
10838 *cost += 1;
10839 return true;
10841 case CLZ:
10842 if (speed_p)
10843 *cost += extra_cost->alu.clz;
10844 return false;
10846 case SMIN:
10847 if (XEXP (x, 1) == const0_rtx)
10849 if (speed_p)
10850 *cost += extra_cost->alu.log_shift;
10851 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10852 return true;
10854 /* Fall through. */
10855 case SMAX:
10856 case UMIN:
10857 case UMAX:
10858 *cost += COSTS_N_INSNS (1);
10859 return false;
10861 case TRUNCATE:
10862 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10863 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10864 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10865 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10866 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10867 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10868 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10869 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10870 == ZERO_EXTEND))))
10872 if (speed_p)
10873 *cost += extra_cost->mult[1].extend;
10874 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10875 ZERO_EXTEND, 0, speed_p)
10876 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10877 ZERO_EXTEND, 0, speed_p));
10878 return true;
10880 *cost = LIBCALL_COST (1);
10881 return false;
10883 case UNSPEC_VOLATILE:
10884 case UNSPEC:
10885 return arm_unspec_cost (x, outer_code, speed_p, cost);
10887 case PC:
10888 /* Reading the PC is like reading any other register. Writing it
10889 is more expensive, but we take that into account elsewhere. */
10890 *cost = 0;
10891 return true;
10893 case ZERO_EXTRACT:
10894 /* TODO: Simple zero_extract of bottom bits using AND. */
10895 /* Fall through. */
10896 case SIGN_EXTRACT:
10897 if (arm_arch6
10898 && mode == SImode
10899 && CONST_INT_P (XEXP (x, 1))
10900 && CONST_INT_P (XEXP (x, 2)))
10902 if (speed_p)
10903 *cost += extra_cost->alu.bfx;
10904 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10905 return true;
10907 /* Without UBFX/SBFX, need to resort to shift operations. */
10908 *cost += COSTS_N_INSNS (1);
10909 if (speed_p)
10910 *cost += 2 * extra_cost->alu.shift;
10911 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10912 return true;
10914 case FLOAT_EXTEND:
10915 if (TARGET_HARD_FLOAT)
10917 if (speed_p)
10918 *cost += extra_cost->fp[mode == DFmode].widen;
10919 if (!TARGET_VFP5
10920 && GET_MODE (XEXP (x, 0)) == HFmode)
10922 /* Pre v8, widening HF->DF is a two-step process, first
10923 widening to SFmode. */
10924 *cost += COSTS_N_INSNS (1);
10925 if (speed_p)
10926 *cost += extra_cost->fp[0].widen;
10928 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10929 return true;
10932 *cost = LIBCALL_COST (1);
10933 return false;
10935 case FLOAT_TRUNCATE:
10936 if (TARGET_HARD_FLOAT)
10938 if (speed_p)
10939 *cost += extra_cost->fp[mode == DFmode].narrow;
10940 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10941 return true;
10942 /* Vector modes? */
10944 *cost = LIBCALL_COST (1);
10945 return false;
10947 case FMA:
10948 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10950 rtx op0 = XEXP (x, 0);
10951 rtx op1 = XEXP (x, 1);
10952 rtx op2 = XEXP (x, 2);
10955 /* vfms or vfnma. */
10956 if (GET_CODE (op0) == NEG)
10957 op0 = XEXP (op0, 0);
10959 /* vfnms or vfnma. */
10960 if (GET_CODE (op2) == NEG)
10961 op2 = XEXP (op2, 0);
10963 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10964 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10965 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10967 if (speed_p)
10968 *cost += extra_cost->fp[mode ==DFmode].fma;
10970 return true;
10973 *cost = LIBCALL_COST (3);
10974 return false;
10976 case FIX:
10977 case UNSIGNED_FIX:
10978 if (TARGET_HARD_FLOAT)
10980 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10981 a vcvt fixed-point conversion. */
10982 if (code == FIX && mode == SImode
10983 && GET_CODE (XEXP (x, 0)) == FIX
10984 && GET_MODE (XEXP (x, 0)) == SFmode
10985 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10986 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10987 > 0)
10989 if (speed_p)
10990 *cost += extra_cost->fp[0].toint;
10992 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10993 code, 0, speed_p);
10994 return true;
10997 if (GET_MODE_CLASS (mode) == MODE_INT)
10999 mode = GET_MODE (XEXP (x, 0));
11000 if (speed_p)
11001 *cost += extra_cost->fp[mode == DFmode].toint;
11002 /* Strip of the 'cost' of rounding towards zero. */
11003 if (GET_CODE (XEXP (x, 0)) == FIX)
11004 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11005 0, speed_p);
11006 else
11007 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11008 /* ??? Increase the cost to deal with transferring from
11009 FP -> CORE registers? */
11010 return true;
11012 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11013 && TARGET_VFP5)
11015 if (speed_p)
11016 *cost += extra_cost->fp[mode == DFmode].roundint;
11017 return false;
11019 /* Vector costs? */
11021 *cost = LIBCALL_COST (1);
11022 return false;
11024 case FLOAT:
11025 case UNSIGNED_FLOAT:
11026 if (TARGET_HARD_FLOAT)
11028 /* ??? Increase the cost to deal with transferring from CORE
11029 -> FP registers? */
11030 if (speed_p)
11031 *cost += extra_cost->fp[mode == DFmode].fromint;
11032 return false;
11034 *cost = LIBCALL_COST (1);
11035 return false;
11037 case CALL:
11038 return true;
11040 case ASM_OPERANDS:
11042 /* Just a guess. Guess number of instructions in the asm
11043 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11044 though (see PR60663). */
11045 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11046 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11048 *cost = COSTS_N_INSNS (asm_length + num_operands);
11049 return true;
11051 default:
11052 if (mode != VOIDmode)
11053 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11054 else
11055 *cost = COSTS_N_INSNS (4); /* Who knows? */
11056 return false;
11060 #undef HANDLE_NARROW_SHIFT_ARITH
11062 /* RTX costs entry point. */
11064 static bool
11065 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11066 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11068 bool result;
11069 int code = GET_CODE (x);
11070 gcc_assert (current_tune->insn_extra_cost);
11072 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11073 (enum rtx_code) outer_code,
11074 current_tune->insn_extra_cost,
11075 total, speed);
11077 if (dump_file && arm_verbose_cost)
11079 print_rtl_single (dump_file, x);
11080 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11081 *total, result ? "final" : "partial");
11083 return result;
11086 /* All address computations that can be done are free, but rtx cost returns
11087 the same for practically all of them. So we weight the different types
11088 of address here in the order (most pref first):
11089 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11090 static inline int
11091 arm_arm_address_cost (rtx x)
11093 enum rtx_code c = GET_CODE (x);
11095 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11096 return 0;
11097 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11098 return 10;
11100 if (c == PLUS)
11102 if (CONST_INT_P (XEXP (x, 1)))
11103 return 2;
11105 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11106 return 3;
11108 return 4;
11111 return 6;
11114 static inline int
11115 arm_thumb_address_cost (rtx x)
11117 enum rtx_code c = GET_CODE (x);
11119 if (c == REG)
11120 return 1;
11121 if (c == PLUS
11122 && REG_P (XEXP (x, 0))
11123 && CONST_INT_P (XEXP (x, 1)))
11124 return 1;
11126 return 2;
11129 static int
11130 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11131 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11133 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11136 /* Adjust cost hook for XScale. */
11137 static bool
11138 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11139 int * cost)
11141 /* Some true dependencies can have a higher cost depending
11142 on precisely how certain input operands are used. */
11143 if (dep_type == 0
11144 && recog_memoized (insn) >= 0
11145 && recog_memoized (dep) >= 0)
11147 int shift_opnum = get_attr_shift (insn);
11148 enum attr_type attr_type = get_attr_type (dep);
11150 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11151 operand for INSN. If we have a shifted input operand and the
11152 instruction we depend on is another ALU instruction, then we may
11153 have to account for an additional stall. */
11154 if (shift_opnum != 0
11155 && (attr_type == TYPE_ALU_SHIFT_IMM
11156 || attr_type == TYPE_ALUS_SHIFT_IMM
11157 || attr_type == TYPE_LOGIC_SHIFT_IMM
11158 || attr_type == TYPE_LOGICS_SHIFT_IMM
11159 || attr_type == TYPE_ALU_SHIFT_REG
11160 || attr_type == TYPE_ALUS_SHIFT_REG
11161 || attr_type == TYPE_LOGIC_SHIFT_REG
11162 || attr_type == TYPE_LOGICS_SHIFT_REG
11163 || attr_type == TYPE_MOV_SHIFT
11164 || attr_type == TYPE_MVN_SHIFT
11165 || attr_type == TYPE_MOV_SHIFT_REG
11166 || attr_type == TYPE_MVN_SHIFT_REG))
11168 rtx shifted_operand;
11169 int opno;
11171 /* Get the shifted operand. */
11172 extract_insn (insn);
11173 shifted_operand = recog_data.operand[shift_opnum];
11175 /* Iterate over all the operands in DEP. If we write an operand
11176 that overlaps with SHIFTED_OPERAND, then we have increase the
11177 cost of this dependency. */
11178 extract_insn (dep);
11179 preprocess_constraints (dep);
11180 for (opno = 0; opno < recog_data.n_operands; opno++)
11182 /* We can ignore strict inputs. */
11183 if (recog_data.operand_type[opno] == OP_IN)
11184 continue;
11186 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11187 shifted_operand))
11189 *cost = 2;
11190 return false;
11195 return true;
11198 /* Adjust cost hook for Cortex A9. */
11199 static bool
11200 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11201 int * cost)
11203 switch (dep_type)
11205 case REG_DEP_ANTI:
11206 *cost = 0;
11207 return false;
11209 case REG_DEP_TRUE:
11210 case REG_DEP_OUTPUT:
11211 if (recog_memoized (insn) >= 0
11212 && recog_memoized (dep) >= 0)
11214 if (GET_CODE (PATTERN (insn)) == SET)
11216 if (GET_MODE_CLASS
11217 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11218 || GET_MODE_CLASS
11219 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11221 enum attr_type attr_type_insn = get_attr_type (insn);
11222 enum attr_type attr_type_dep = get_attr_type (dep);
11224 /* By default all dependencies of the form
11225 s0 = s0 <op> s1
11226 s0 = s0 <op> s2
11227 have an extra latency of 1 cycle because
11228 of the input and output dependency in this
11229 case. However this gets modeled as an true
11230 dependency and hence all these checks. */
11231 if (REG_P (SET_DEST (PATTERN (insn)))
11232 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11234 /* FMACS is a special case where the dependent
11235 instruction can be issued 3 cycles before
11236 the normal latency in case of an output
11237 dependency. */
11238 if ((attr_type_insn == TYPE_FMACS
11239 || attr_type_insn == TYPE_FMACD)
11240 && (attr_type_dep == TYPE_FMACS
11241 || attr_type_dep == TYPE_FMACD))
11243 if (dep_type == REG_DEP_OUTPUT)
11244 *cost = insn_default_latency (dep) - 3;
11245 else
11246 *cost = insn_default_latency (dep);
11247 return false;
11249 else
11251 if (dep_type == REG_DEP_OUTPUT)
11252 *cost = insn_default_latency (dep) + 1;
11253 else
11254 *cost = insn_default_latency (dep);
11256 return false;
11261 break;
11263 default:
11264 gcc_unreachable ();
11267 return true;
11270 /* Adjust cost hook for FA726TE. */
11271 static bool
11272 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11273 int * cost)
11275 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11276 have penalty of 3. */
11277 if (dep_type == REG_DEP_TRUE
11278 && recog_memoized (insn) >= 0
11279 && recog_memoized (dep) >= 0
11280 && get_attr_conds (dep) == CONDS_SET)
11282 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11283 if (get_attr_conds (insn) == CONDS_USE
11284 && get_attr_type (insn) != TYPE_BRANCH)
11286 *cost = 3;
11287 return false;
11290 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11291 || get_attr_conds (insn) == CONDS_USE)
11293 *cost = 0;
11294 return false;
11298 return true;
11301 /* Implement TARGET_REGISTER_MOVE_COST.
11303 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11304 it is typically more expensive than a single memory access. We set
11305 the cost to less than two memory accesses so that floating
11306 point to integer conversion does not go through memory. */
11309 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11310 reg_class_t from, reg_class_t to)
11312 if (TARGET_32BIT)
11314 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11315 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11316 return 15;
11317 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11318 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11319 return 4;
11320 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11321 return 20;
11322 else
11323 return 2;
11325 else
11327 if (from == HI_REGS || to == HI_REGS)
11328 return 4;
11329 else
11330 return 2;
11334 /* Implement TARGET_MEMORY_MOVE_COST. */
11337 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11338 bool in ATTRIBUTE_UNUSED)
11340 if (TARGET_32BIT)
11341 return 10;
11342 else
11344 if (GET_MODE_SIZE (mode) < 4)
11345 return 8;
11346 else
11347 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11351 /* Vectorizer cost model implementation. */
11353 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11354 static int
11355 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11356 tree vectype,
11357 int misalign ATTRIBUTE_UNUSED)
11359 unsigned elements;
11361 switch (type_of_cost)
11363 case scalar_stmt:
11364 return current_tune->vec_costs->scalar_stmt_cost;
11366 case scalar_load:
11367 return current_tune->vec_costs->scalar_load_cost;
11369 case scalar_store:
11370 return current_tune->vec_costs->scalar_store_cost;
11372 case vector_stmt:
11373 return current_tune->vec_costs->vec_stmt_cost;
11375 case vector_load:
11376 return current_tune->vec_costs->vec_align_load_cost;
11378 case vector_store:
11379 return current_tune->vec_costs->vec_store_cost;
11381 case vec_to_scalar:
11382 return current_tune->vec_costs->vec_to_scalar_cost;
11384 case scalar_to_vec:
11385 return current_tune->vec_costs->scalar_to_vec_cost;
11387 case unaligned_load:
11388 case vector_gather_load:
11389 return current_tune->vec_costs->vec_unalign_load_cost;
11391 case unaligned_store:
11392 case vector_scatter_store:
11393 return current_tune->vec_costs->vec_unalign_store_cost;
11395 case cond_branch_taken:
11396 return current_tune->vec_costs->cond_taken_branch_cost;
11398 case cond_branch_not_taken:
11399 return current_tune->vec_costs->cond_not_taken_branch_cost;
11401 case vec_perm:
11402 case vec_promote_demote:
11403 return current_tune->vec_costs->vec_stmt_cost;
11405 case vec_construct:
11406 elements = TYPE_VECTOR_SUBPARTS (vectype);
11407 return elements / 2 + 1;
11409 default:
11410 gcc_unreachable ();
11414 /* Implement targetm.vectorize.add_stmt_cost. */
11416 static unsigned
11417 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11418 struct _stmt_vec_info *stmt_info, int misalign,
11419 enum vect_cost_model_location where)
11421 unsigned *cost = (unsigned *) data;
11422 unsigned retval = 0;
11424 if (flag_vect_cost_model)
11426 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11427 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11429 /* Statements in an inner loop relative to the loop being
11430 vectorized are weighted more heavily. The value here is
11431 arbitrary and could potentially be improved with analysis. */
11432 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11433 count *= 50; /* FIXME. */
11435 retval = (unsigned) (count * stmt_cost);
11436 cost[where] += retval;
11439 return retval;
11442 /* Return true if and only if this insn can dual-issue only as older. */
11443 static bool
11444 cortexa7_older_only (rtx_insn *insn)
11446 if (recog_memoized (insn) < 0)
11447 return false;
11449 switch (get_attr_type (insn))
11451 case TYPE_ALU_DSP_REG:
11452 case TYPE_ALU_SREG:
11453 case TYPE_ALUS_SREG:
11454 case TYPE_LOGIC_REG:
11455 case TYPE_LOGICS_REG:
11456 case TYPE_ADC_REG:
11457 case TYPE_ADCS_REG:
11458 case TYPE_ADR:
11459 case TYPE_BFM:
11460 case TYPE_REV:
11461 case TYPE_MVN_REG:
11462 case TYPE_SHIFT_IMM:
11463 case TYPE_SHIFT_REG:
11464 case TYPE_LOAD_BYTE:
11465 case TYPE_LOAD_4:
11466 case TYPE_STORE_4:
11467 case TYPE_FFARITHS:
11468 case TYPE_FADDS:
11469 case TYPE_FFARITHD:
11470 case TYPE_FADDD:
11471 case TYPE_FMOV:
11472 case TYPE_F_CVT:
11473 case TYPE_FCMPS:
11474 case TYPE_FCMPD:
11475 case TYPE_FCONSTS:
11476 case TYPE_FCONSTD:
11477 case TYPE_FMULS:
11478 case TYPE_FMACS:
11479 case TYPE_FMULD:
11480 case TYPE_FMACD:
11481 case TYPE_FDIVS:
11482 case TYPE_FDIVD:
11483 case TYPE_F_MRC:
11484 case TYPE_F_MRRC:
11485 case TYPE_F_FLAG:
11486 case TYPE_F_LOADS:
11487 case TYPE_F_STORES:
11488 return true;
11489 default:
11490 return false;
11494 /* Return true if and only if this insn can dual-issue as younger. */
11495 static bool
11496 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11498 if (recog_memoized (insn) < 0)
11500 if (verbose > 5)
11501 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11502 return false;
11505 switch (get_attr_type (insn))
11507 case TYPE_ALU_IMM:
11508 case TYPE_ALUS_IMM:
11509 case TYPE_LOGIC_IMM:
11510 case TYPE_LOGICS_IMM:
11511 case TYPE_EXTEND:
11512 case TYPE_MVN_IMM:
11513 case TYPE_MOV_IMM:
11514 case TYPE_MOV_REG:
11515 case TYPE_MOV_SHIFT:
11516 case TYPE_MOV_SHIFT_REG:
11517 case TYPE_BRANCH:
11518 case TYPE_CALL:
11519 return true;
11520 default:
11521 return false;
11526 /* Look for an instruction that can dual issue only as an older
11527 instruction, and move it in front of any instructions that can
11528 dual-issue as younger, while preserving the relative order of all
11529 other instructions in the ready list. This is a hueuristic to help
11530 dual-issue in later cycles, by postponing issue of more flexible
11531 instructions. This heuristic may affect dual issue opportunities
11532 in the current cycle. */
11533 static void
11534 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11535 int *n_readyp, int clock)
11537 int i;
11538 int first_older_only = -1, first_younger = -1;
11540 if (verbose > 5)
11541 fprintf (file,
11542 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11543 clock,
11544 *n_readyp);
11546 /* Traverse the ready list from the head (the instruction to issue
11547 first), and looking for the first instruction that can issue as
11548 younger and the first instruction that can dual-issue only as
11549 older. */
11550 for (i = *n_readyp - 1; i >= 0; i--)
11552 rtx_insn *insn = ready[i];
11553 if (cortexa7_older_only (insn))
11555 first_older_only = i;
11556 if (verbose > 5)
11557 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11558 break;
11560 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11561 first_younger = i;
11564 /* Nothing to reorder because either no younger insn found or insn
11565 that can dual-issue only as older appears before any insn that
11566 can dual-issue as younger. */
11567 if (first_younger == -1)
11569 if (verbose > 5)
11570 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11571 return;
11574 /* Nothing to reorder because no older-only insn in the ready list. */
11575 if (first_older_only == -1)
11577 if (verbose > 5)
11578 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11579 return;
11582 /* Move first_older_only insn before first_younger. */
11583 if (verbose > 5)
11584 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11585 INSN_UID(ready [first_older_only]),
11586 INSN_UID(ready [first_younger]));
11587 rtx_insn *first_older_only_insn = ready [first_older_only];
11588 for (i = first_older_only; i < first_younger; i++)
11590 ready[i] = ready[i+1];
11593 ready[i] = first_older_only_insn;
11594 return;
11597 /* Implement TARGET_SCHED_REORDER. */
11598 static int
11599 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11600 int clock)
11602 switch (arm_tune)
11604 case TARGET_CPU_cortexa7:
11605 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11606 break;
11607 default:
11608 /* Do nothing for other cores. */
11609 break;
11612 return arm_issue_rate ();
11615 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11616 It corrects the value of COST based on the relationship between
11617 INSN and DEP through the dependence LINK. It returns the new
11618 value. There is a per-core adjust_cost hook to adjust scheduler costs
11619 and the per-core hook can choose to completely override the generic
11620 adjust_cost function. Only put bits of code into arm_adjust_cost that
11621 are common across all cores. */
11622 static int
11623 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11624 unsigned int)
11626 rtx i_pat, d_pat;
11628 /* When generating Thumb-1 code, we want to place flag-setting operations
11629 close to a conditional branch which depends on them, so that we can
11630 omit the comparison. */
11631 if (TARGET_THUMB1
11632 && dep_type == 0
11633 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11634 && recog_memoized (dep) >= 0
11635 && get_attr_conds (dep) == CONDS_SET)
11636 return 0;
11638 if (current_tune->sched_adjust_cost != NULL)
11640 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11641 return cost;
11644 /* XXX Is this strictly true? */
11645 if (dep_type == REG_DEP_ANTI
11646 || dep_type == REG_DEP_OUTPUT)
11647 return 0;
11649 /* Call insns don't incur a stall, even if they follow a load. */
11650 if (dep_type == 0
11651 && CALL_P (insn))
11652 return 1;
11654 if ((i_pat = single_set (insn)) != NULL
11655 && MEM_P (SET_SRC (i_pat))
11656 && (d_pat = single_set (dep)) != NULL
11657 && MEM_P (SET_DEST (d_pat)))
11659 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11660 /* This is a load after a store, there is no conflict if the load reads
11661 from a cached area. Assume that loads from the stack, and from the
11662 constant pool are cached, and that others will miss. This is a
11663 hack. */
11665 if ((GET_CODE (src_mem) == SYMBOL_REF
11666 && CONSTANT_POOL_ADDRESS_P (src_mem))
11667 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11668 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11669 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11670 return 1;
11673 return cost;
11677 arm_max_conditional_execute (void)
11679 return max_insns_skipped;
11682 static int
11683 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11685 if (TARGET_32BIT)
11686 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11687 else
11688 return (optimize > 0) ? 2 : 0;
11691 static int
11692 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11694 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11697 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11698 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11699 sequences of non-executed instructions in IT blocks probably take the same
11700 amount of time as executed instructions (and the IT instruction itself takes
11701 space in icache). This function was experimentally determined to give good
11702 results on a popular embedded benchmark. */
11704 static int
11705 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11707 return (TARGET_32BIT && speed_p) ? 1
11708 : arm_default_branch_cost (speed_p, predictable_p);
11711 static int
11712 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11714 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11717 static bool fp_consts_inited = false;
11719 static REAL_VALUE_TYPE value_fp0;
11721 static void
11722 init_fp_table (void)
11724 REAL_VALUE_TYPE r;
11726 r = REAL_VALUE_ATOF ("0", DFmode);
11727 value_fp0 = r;
11728 fp_consts_inited = true;
11731 /* Return TRUE if rtx X is a valid immediate FP constant. */
11733 arm_const_double_rtx (rtx x)
11735 const REAL_VALUE_TYPE *r;
11737 if (!fp_consts_inited)
11738 init_fp_table ();
11740 r = CONST_DOUBLE_REAL_VALUE (x);
11741 if (REAL_VALUE_MINUS_ZERO (*r))
11742 return 0;
11744 if (real_equal (r, &value_fp0))
11745 return 1;
11747 return 0;
11750 /* VFPv3 has a fairly wide range of representable immediates, formed from
11751 "quarter-precision" floating-point values. These can be evaluated using this
11752 formula (with ^ for exponentiation):
11754 -1^s * n * 2^-r
11756 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11757 16 <= n <= 31 and 0 <= r <= 7.
11759 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11761 - A (most-significant) is the sign bit.
11762 - BCD are the exponent (encoded as r XOR 3).
11763 - EFGH are the mantissa (encoded as n - 16).
11766 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11767 fconst[sd] instruction, or -1 if X isn't suitable. */
11768 static int
11769 vfp3_const_double_index (rtx x)
11771 REAL_VALUE_TYPE r, m;
11772 int sign, exponent;
11773 unsigned HOST_WIDE_INT mantissa, mant_hi;
11774 unsigned HOST_WIDE_INT mask;
11775 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11776 bool fail;
11778 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11779 return -1;
11781 r = *CONST_DOUBLE_REAL_VALUE (x);
11783 /* We can't represent these things, so detect them first. */
11784 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11785 return -1;
11787 /* Extract sign, exponent and mantissa. */
11788 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11789 r = real_value_abs (&r);
11790 exponent = REAL_EXP (&r);
11791 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11792 highest (sign) bit, with a fixed binary point at bit point_pos.
11793 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11794 bits for the mantissa, this may fail (low bits would be lost). */
11795 real_ldexp (&m, &r, point_pos - exponent);
11796 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11797 mantissa = w.elt (0);
11798 mant_hi = w.elt (1);
11800 /* If there are bits set in the low part of the mantissa, we can't
11801 represent this value. */
11802 if (mantissa != 0)
11803 return -1;
11805 /* Now make it so that mantissa contains the most-significant bits, and move
11806 the point_pos to indicate that the least-significant bits have been
11807 discarded. */
11808 point_pos -= HOST_BITS_PER_WIDE_INT;
11809 mantissa = mant_hi;
11811 /* We can permit four significant bits of mantissa only, plus a high bit
11812 which is always 1. */
11813 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11814 if ((mantissa & mask) != 0)
11815 return -1;
11817 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11818 mantissa >>= point_pos - 5;
11820 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11821 floating-point immediate zero with Neon using an integer-zero load, but
11822 that case is handled elsewhere.) */
11823 if (mantissa == 0)
11824 return -1;
11826 gcc_assert (mantissa >= 16 && mantissa <= 31);
11828 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11829 normalized significands are in the range [1, 2). (Our mantissa is shifted
11830 left 4 places at this point relative to normalized IEEE754 values). GCC
11831 internally uses [0.5, 1) (see real.c), so the exponent returned from
11832 REAL_EXP must be altered. */
11833 exponent = 5 - exponent;
11835 if (exponent < 0 || exponent > 7)
11836 return -1;
11838 /* Sign, mantissa and exponent are now in the correct form to plug into the
11839 formula described in the comment above. */
11840 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11843 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11845 vfp3_const_double_rtx (rtx x)
11847 if (!TARGET_VFP3)
11848 return 0;
11850 return vfp3_const_double_index (x) != -1;
11853 /* Recognize immediates which can be used in various Neon instructions. Legal
11854 immediates are described by the following table (for VMVN variants, the
11855 bitwise inverse of the constant shown is recognized. In either case, VMOV
11856 is output and the correct instruction to use for a given constant is chosen
11857 by the assembler). The constant shown is replicated across all elements of
11858 the destination vector.
11860 insn elems variant constant (binary)
11861 ---- ----- ------- -----------------
11862 vmov i32 0 00000000 00000000 00000000 abcdefgh
11863 vmov i32 1 00000000 00000000 abcdefgh 00000000
11864 vmov i32 2 00000000 abcdefgh 00000000 00000000
11865 vmov i32 3 abcdefgh 00000000 00000000 00000000
11866 vmov i16 4 00000000 abcdefgh
11867 vmov i16 5 abcdefgh 00000000
11868 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11869 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11870 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11871 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11872 vmvn i16 10 00000000 abcdefgh
11873 vmvn i16 11 abcdefgh 00000000
11874 vmov i32 12 00000000 00000000 abcdefgh 11111111
11875 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11876 vmov i32 14 00000000 abcdefgh 11111111 11111111
11877 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11878 vmov i8 16 abcdefgh
11879 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11880 eeeeeeee ffffffff gggggggg hhhhhhhh
11881 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11882 vmov f32 19 00000000 00000000 00000000 00000000
11884 For case 18, B = !b. Representable values are exactly those accepted by
11885 vfp3_const_double_index, but are output as floating-point numbers rather
11886 than indices.
11888 For case 19, we will change it to vmov.i32 when assembling.
11890 Variants 0-5 (inclusive) may also be used as immediates for the second
11891 operand of VORR/VBIC instructions.
11893 The INVERSE argument causes the bitwise inverse of the given operand to be
11894 recognized instead (used for recognizing legal immediates for the VAND/VORN
11895 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11896 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11897 output, rather than the real insns vbic/vorr).
11899 INVERSE makes no difference to the recognition of float vectors.
11901 The return value is the variant of immediate as shown in the above table, or
11902 -1 if the given value doesn't match any of the listed patterns.
11904 static int
11905 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11906 rtx *modconst, int *elementwidth)
11908 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11909 matches = 1; \
11910 for (i = 0; i < idx; i += (STRIDE)) \
11911 if (!(TEST)) \
11912 matches = 0; \
11913 if (matches) \
11915 immtype = (CLASS); \
11916 elsize = (ELSIZE); \
11917 break; \
11920 unsigned int i, elsize = 0, idx = 0, n_elts;
11921 unsigned int innersize;
11922 unsigned char bytes[16];
11923 int immtype = -1, matches;
11924 unsigned int invmask = inverse ? 0xff : 0;
11925 bool vector = GET_CODE (op) == CONST_VECTOR;
11927 if (vector)
11928 n_elts = CONST_VECTOR_NUNITS (op);
11929 else
11931 n_elts = 1;
11932 if (mode == VOIDmode)
11933 mode = DImode;
11936 innersize = GET_MODE_UNIT_SIZE (mode);
11938 /* Vectors of float constants. */
11939 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11941 rtx el0 = CONST_VECTOR_ELT (op, 0);
11943 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11944 return -1;
11946 /* FP16 vectors cannot be represented. */
11947 if (GET_MODE_INNER (mode) == HFmode)
11948 return -1;
11950 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11951 are distinct in this context. */
11952 if (!const_vec_duplicate_p (op))
11953 return -1;
11955 if (modconst)
11956 *modconst = CONST_VECTOR_ELT (op, 0);
11958 if (elementwidth)
11959 *elementwidth = 0;
11961 if (el0 == CONST0_RTX (GET_MODE (el0)))
11962 return 19;
11963 else
11964 return 18;
11967 /* The tricks done in the code below apply for little-endian vector layout.
11968 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11969 FIXME: Implement logic for big-endian vectors. */
11970 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11971 return -1;
11973 /* Splat vector constant out into a byte vector. */
11974 for (i = 0; i < n_elts; i++)
11976 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11977 unsigned HOST_WIDE_INT elpart;
11979 gcc_assert (CONST_INT_P (el));
11980 elpart = INTVAL (el);
11982 for (unsigned int byte = 0; byte < innersize; byte++)
11984 bytes[idx++] = (elpart & 0xff) ^ invmask;
11985 elpart >>= BITS_PER_UNIT;
11989 /* Sanity check. */
11990 gcc_assert (idx == GET_MODE_SIZE (mode));
11994 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11995 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11997 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11998 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12000 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12001 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12003 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12004 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12006 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12008 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12010 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12011 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12013 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12014 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12016 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12017 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12019 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12020 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12022 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12024 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12026 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12027 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12029 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12030 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12032 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12033 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12035 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12036 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12038 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12040 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12041 && bytes[i] == bytes[(i + 8) % idx]);
12043 while (0);
12045 if (immtype == -1)
12046 return -1;
12048 if (elementwidth)
12049 *elementwidth = elsize;
12051 if (modconst)
12053 unsigned HOST_WIDE_INT imm = 0;
12055 /* Un-invert bytes of recognized vector, if necessary. */
12056 if (invmask != 0)
12057 for (i = 0; i < idx; i++)
12058 bytes[i] ^= invmask;
12060 if (immtype == 17)
12062 /* FIXME: Broken on 32-bit H_W_I hosts. */
12063 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12065 for (i = 0; i < 8; i++)
12066 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12067 << (i * BITS_PER_UNIT);
12069 *modconst = GEN_INT (imm);
12071 else
12073 unsigned HOST_WIDE_INT imm = 0;
12075 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12076 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12078 *modconst = GEN_INT (imm);
12082 return immtype;
12083 #undef CHECK
12086 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12087 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12088 float elements), and a modified constant (whatever should be output for a
12089 VMOV) in *MODCONST. */
12092 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12093 rtx *modconst, int *elementwidth)
12095 rtx tmpconst;
12096 int tmpwidth;
12097 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12099 if (retval == -1)
12100 return 0;
12102 if (modconst)
12103 *modconst = tmpconst;
12105 if (elementwidth)
12106 *elementwidth = tmpwidth;
12108 return 1;
12111 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12112 the immediate is valid, write a constant suitable for using as an operand
12113 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12114 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12117 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12118 rtx *modconst, int *elementwidth)
12120 rtx tmpconst;
12121 int tmpwidth;
12122 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12124 if (retval < 0 || retval > 5)
12125 return 0;
12127 if (modconst)
12128 *modconst = tmpconst;
12130 if (elementwidth)
12131 *elementwidth = tmpwidth;
12133 return 1;
12136 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12137 the immediate is valid, write a constant suitable for using as an operand
12138 to VSHR/VSHL to *MODCONST and the corresponding element width to
12139 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12140 because they have different limitations. */
12143 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12144 rtx *modconst, int *elementwidth,
12145 bool isleftshift)
12147 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12148 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12149 unsigned HOST_WIDE_INT last_elt = 0;
12150 unsigned HOST_WIDE_INT maxshift;
12152 /* Split vector constant out into a byte vector. */
12153 for (i = 0; i < n_elts; i++)
12155 rtx el = CONST_VECTOR_ELT (op, i);
12156 unsigned HOST_WIDE_INT elpart;
12158 if (CONST_INT_P (el))
12159 elpart = INTVAL (el);
12160 else if (CONST_DOUBLE_P (el))
12161 return 0;
12162 else
12163 gcc_unreachable ();
12165 if (i != 0 && elpart != last_elt)
12166 return 0;
12168 last_elt = elpart;
12171 /* Shift less than element size. */
12172 maxshift = innersize * 8;
12174 if (isleftshift)
12176 /* Left shift immediate value can be from 0 to <size>-1. */
12177 if (last_elt >= maxshift)
12178 return 0;
12180 else
12182 /* Right shift immediate value can be from 1 to <size>. */
12183 if (last_elt == 0 || last_elt > maxshift)
12184 return 0;
12187 if (elementwidth)
12188 *elementwidth = innersize * 8;
12190 if (modconst)
12191 *modconst = CONST_VECTOR_ELT (op, 0);
12193 return 1;
12196 /* Return a string suitable for output of Neon immediate logic operation
12197 MNEM. */
12199 char *
12200 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12201 int inverse, int quad)
12203 int width, is_valid;
12204 static char templ[40];
12206 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12208 gcc_assert (is_valid != 0);
12210 if (quad)
12211 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12212 else
12213 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12215 return templ;
12218 /* Return a string suitable for output of Neon immediate shift operation
12219 (VSHR or VSHL) MNEM. */
12221 char *
12222 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12223 machine_mode mode, int quad,
12224 bool isleftshift)
12226 int width, is_valid;
12227 static char templ[40];
12229 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12230 gcc_assert (is_valid != 0);
12232 if (quad)
12233 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12234 else
12235 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12237 return templ;
12240 /* Output a sequence of pairwise operations to implement a reduction.
12241 NOTE: We do "too much work" here, because pairwise operations work on two
12242 registers-worth of operands in one go. Unfortunately we can't exploit those
12243 extra calculations to do the full operation in fewer steps, I don't think.
12244 Although all vector elements of the result but the first are ignored, we
12245 actually calculate the same result in each of the elements. An alternative
12246 such as initially loading a vector with zero to use as each of the second
12247 operands would use up an additional register and take an extra instruction,
12248 for no particular gain. */
12250 void
12251 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12252 rtx (*reduc) (rtx, rtx, rtx))
12254 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12255 rtx tmpsum = op1;
12257 for (i = parts / 2; i >= 1; i /= 2)
12259 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12260 emit_insn (reduc (dest, tmpsum, tmpsum));
12261 tmpsum = dest;
12265 /* If VALS is a vector constant that can be loaded into a register
12266 using VDUP, generate instructions to do so and return an RTX to
12267 assign to the register. Otherwise return NULL_RTX. */
12269 static rtx
12270 neon_vdup_constant (rtx vals)
12272 machine_mode mode = GET_MODE (vals);
12273 machine_mode inner_mode = GET_MODE_INNER (mode);
12274 rtx x;
12276 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12277 return NULL_RTX;
12279 if (!const_vec_duplicate_p (vals, &x))
12280 /* The elements are not all the same. We could handle repeating
12281 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12282 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12283 vdup.i16). */
12284 return NULL_RTX;
12286 /* We can load this constant by using VDUP and a constant in a
12287 single ARM register. This will be cheaper than a vector
12288 load. */
12290 x = copy_to_mode_reg (inner_mode, x);
12291 return gen_vec_duplicate (mode, x);
12294 /* Generate code to load VALS, which is a PARALLEL containing only
12295 constants (for vec_init) or CONST_VECTOR, efficiently into a
12296 register. Returns an RTX to copy into the register, or NULL_RTX
12297 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12300 neon_make_constant (rtx vals)
12302 machine_mode mode = GET_MODE (vals);
12303 rtx target;
12304 rtx const_vec = NULL_RTX;
12305 int n_elts = GET_MODE_NUNITS (mode);
12306 int n_const = 0;
12307 int i;
12309 if (GET_CODE (vals) == CONST_VECTOR)
12310 const_vec = vals;
12311 else if (GET_CODE (vals) == PARALLEL)
12313 /* A CONST_VECTOR must contain only CONST_INTs and
12314 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12315 Only store valid constants in a CONST_VECTOR. */
12316 for (i = 0; i < n_elts; ++i)
12318 rtx x = XVECEXP (vals, 0, i);
12319 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12320 n_const++;
12322 if (n_const == n_elts)
12323 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12325 else
12326 gcc_unreachable ();
12328 if (const_vec != NULL
12329 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12330 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12331 return const_vec;
12332 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12333 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12334 pipeline cycle; creating the constant takes one or two ARM
12335 pipeline cycles. */
12336 return target;
12337 else if (const_vec != NULL_RTX)
12338 /* Load from constant pool. On Cortex-A8 this takes two cycles
12339 (for either double or quad vectors). We can not take advantage
12340 of single-cycle VLD1 because we need a PC-relative addressing
12341 mode. */
12342 return const_vec;
12343 else
12344 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12345 We can not construct an initializer. */
12346 return NULL_RTX;
12349 /* Initialize vector TARGET to VALS. */
12351 void
12352 neon_expand_vector_init (rtx target, rtx vals)
12354 machine_mode mode = GET_MODE (target);
12355 machine_mode inner_mode = GET_MODE_INNER (mode);
12356 int n_elts = GET_MODE_NUNITS (mode);
12357 int n_var = 0, one_var = -1;
12358 bool all_same = true;
12359 rtx x, mem;
12360 int i;
12362 for (i = 0; i < n_elts; ++i)
12364 x = XVECEXP (vals, 0, i);
12365 if (!CONSTANT_P (x))
12366 ++n_var, one_var = i;
12368 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12369 all_same = false;
12372 if (n_var == 0)
12374 rtx constant = neon_make_constant (vals);
12375 if (constant != NULL_RTX)
12377 emit_move_insn (target, constant);
12378 return;
12382 /* Splat a single non-constant element if we can. */
12383 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12385 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12386 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12387 return;
12390 /* One field is non-constant. Load constant then overwrite varying
12391 field. This is more efficient than using the stack. */
12392 if (n_var == 1)
12394 rtx copy = copy_rtx (vals);
12395 rtx index = GEN_INT (one_var);
12397 /* Load constant part of vector, substitute neighboring value for
12398 varying element. */
12399 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12400 neon_expand_vector_init (target, copy);
12402 /* Insert variable. */
12403 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12404 switch (mode)
12406 case E_V8QImode:
12407 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12408 break;
12409 case E_V16QImode:
12410 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12411 break;
12412 case E_V4HImode:
12413 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12414 break;
12415 case E_V8HImode:
12416 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12417 break;
12418 case E_V2SImode:
12419 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12420 break;
12421 case E_V4SImode:
12422 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12423 break;
12424 case E_V2SFmode:
12425 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12426 break;
12427 case E_V4SFmode:
12428 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12429 break;
12430 case E_V2DImode:
12431 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12432 break;
12433 default:
12434 gcc_unreachable ();
12436 return;
12439 /* Construct the vector in memory one field at a time
12440 and load the whole vector. */
12441 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12442 for (i = 0; i < n_elts; i++)
12443 emit_move_insn (adjust_address_nv (mem, inner_mode,
12444 i * GET_MODE_SIZE (inner_mode)),
12445 XVECEXP (vals, 0, i));
12446 emit_move_insn (target, mem);
12449 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12450 ERR if it doesn't. EXP indicates the source location, which includes the
12451 inlining history for intrinsics. */
12453 static void
12454 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12455 const_tree exp, const char *desc)
12457 HOST_WIDE_INT lane;
12459 gcc_assert (CONST_INT_P (operand));
12461 lane = INTVAL (operand);
12463 if (lane < low || lane >= high)
12465 if (exp)
12466 error ("%K%s %wd out of range %wd - %wd",
12467 exp, desc, lane, low, high - 1);
12468 else
12469 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12473 /* Bounds-check lanes. */
12475 void
12476 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12477 const_tree exp)
12479 bounds_check (operand, low, high, exp, "lane");
12482 /* Bounds-check constants. */
12484 void
12485 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12487 bounds_check (operand, low, high, NULL_TREE, "constant");
12490 HOST_WIDE_INT
12491 neon_element_bits (machine_mode mode)
12493 return GET_MODE_UNIT_BITSIZE (mode);
12497 /* Predicates for `match_operand' and `match_operator'. */
12499 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12500 WB is true if full writeback address modes are allowed and is false
12501 if limited writeback address modes (POST_INC and PRE_DEC) are
12502 allowed. */
12505 arm_coproc_mem_operand (rtx op, bool wb)
12507 rtx ind;
12509 /* Reject eliminable registers. */
12510 if (! (reload_in_progress || reload_completed || lra_in_progress)
12511 && ( reg_mentioned_p (frame_pointer_rtx, op)
12512 || reg_mentioned_p (arg_pointer_rtx, op)
12513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12517 return FALSE;
12519 /* Constants are converted into offsets from labels. */
12520 if (!MEM_P (op))
12521 return FALSE;
12523 ind = XEXP (op, 0);
12525 if (reload_completed
12526 && (GET_CODE (ind) == LABEL_REF
12527 || (GET_CODE (ind) == CONST
12528 && GET_CODE (XEXP (ind, 0)) == PLUS
12529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12530 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12531 return TRUE;
12533 /* Match: (mem (reg)). */
12534 if (REG_P (ind))
12535 return arm_address_register_rtx_p (ind, 0);
12537 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12538 acceptable in any case (subject to verification by
12539 arm_address_register_rtx_p). We need WB to be true to accept
12540 PRE_INC and POST_DEC. */
12541 if (GET_CODE (ind) == POST_INC
12542 || GET_CODE (ind) == PRE_DEC
12543 || (wb
12544 && (GET_CODE (ind) == PRE_INC
12545 || GET_CODE (ind) == POST_DEC)))
12546 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12548 if (wb
12549 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12550 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12551 && GET_CODE (XEXP (ind, 1)) == PLUS
12552 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12553 ind = XEXP (ind, 1);
12555 /* Match:
12556 (plus (reg)
12557 (const)). */
12558 if (GET_CODE (ind) == PLUS
12559 && REG_P (XEXP (ind, 0))
12560 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12561 && CONST_INT_P (XEXP (ind, 1))
12562 && INTVAL (XEXP (ind, 1)) > -1024
12563 && INTVAL (XEXP (ind, 1)) < 1024
12564 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12565 return TRUE;
12567 return FALSE;
12570 /* Return TRUE if OP is a memory operand which we can load or store a vector
12571 to/from. TYPE is one of the following values:
12572 0 - Vector load/stor (vldr)
12573 1 - Core registers (ldm)
12574 2 - Element/structure loads (vld1)
12577 neon_vector_mem_operand (rtx op, int type, bool strict)
12579 rtx ind;
12581 /* Reject eliminable registers. */
12582 if (strict && ! (reload_in_progress || reload_completed)
12583 && (reg_mentioned_p (frame_pointer_rtx, op)
12584 || reg_mentioned_p (arg_pointer_rtx, op)
12585 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12586 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12587 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12588 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12589 return FALSE;
12591 /* Constants are converted into offsets from labels. */
12592 if (!MEM_P (op))
12593 return FALSE;
12595 ind = XEXP (op, 0);
12597 if (reload_completed
12598 && (GET_CODE (ind) == LABEL_REF
12599 || (GET_CODE (ind) == CONST
12600 && GET_CODE (XEXP (ind, 0)) == PLUS
12601 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12602 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12603 return TRUE;
12605 /* Match: (mem (reg)). */
12606 if (REG_P (ind))
12607 return arm_address_register_rtx_p (ind, 0);
12609 /* Allow post-increment with Neon registers. */
12610 if ((type != 1 && GET_CODE (ind) == POST_INC)
12611 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12612 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12614 /* Allow post-increment by register for VLDn */
12615 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12616 && GET_CODE (XEXP (ind, 1)) == PLUS
12617 && REG_P (XEXP (XEXP (ind, 1), 1)))
12618 return true;
12620 /* Match:
12621 (plus (reg)
12622 (const)). */
12623 if (type == 0
12624 && GET_CODE (ind) == PLUS
12625 && REG_P (XEXP (ind, 0))
12626 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12627 && CONST_INT_P (XEXP (ind, 1))
12628 && INTVAL (XEXP (ind, 1)) > -1024
12629 /* For quad modes, we restrict the constant offset to be slightly less
12630 than what the instruction format permits. We have no such constraint
12631 on double mode offsets. (This must match arm_legitimate_index_p.) */
12632 && (INTVAL (XEXP (ind, 1))
12633 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12634 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12635 return TRUE;
12637 return FALSE;
12640 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12641 type. */
12643 neon_struct_mem_operand (rtx op)
12645 rtx ind;
12647 /* Reject eliminable registers. */
12648 if (! (reload_in_progress || reload_completed)
12649 && ( reg_mentioned_p (frame_pointer_rtx, op)
12650 || reg_mentioned_p (arg_pointer_rtx, op)
12651 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12652 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12653 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12654 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12655 return FALSE;
12657 /* Constants are converted into offsets from labels. */
12658 if (!MEM_P (op))
12659 return FALSE;
12661 ind = XEXP (op, 0);
12663 if (reload_completed
12664 && (GET_CODE (ind) == LABEL_REF
12665 || (GET_CODE (ind) == CONST
12666 && GET_CODE (XEXP (ind, 0)) == PLUS
12667 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12668 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12669 return TRUE;
12671 /* Match: (mem (reg)). */
12672 if (REG_P (ind))
12673 return arm_address_register_rtx_p (ind, 0);
12675 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12676 if (GET_CODE (ind) == POST_INC
12677 || GET_CODE (ind) == PRE_DEC)
12678 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12680 return FALSE;
12683 /* Return true if X is a register that will be eliminated later on. */
12685 arm_eliminable_register (rtx x)
12687 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12688 || REGNO (x) == ARG_POINTER_REGNUM
12689 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12690 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12693 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12694 coprocessor registers. Otherwise return NO_REGS. */
12696 enum reg_class
12697 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12699 if (mode == HFmode)
12701 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12702 return GENERAL_REGS;
12703 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12704 return NO_REGS;
12705 return GENERAL_REGS;
12708 /* The neon move patterns handle all legitimate vector and struct
12709 addresses. */
12710 if (TARGET_NEON
12711 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12712 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12713 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12714 || VALID_NEON_STRUCT_MODE (mode)))
12715 return NO_REGS;
12717 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12718 return NO_REGS;
12720 return GENERAL_REGS;
12723 /* Values which must be returned in the most-significant end of the return
12724 register. */
12726 static bool
12727 arm_return_in_msb (const_tree valtype)
12729 return (TARGET_AAPCS_BASED
12730 && BYTES_BIG_ENDIAN
12731 && (AGGREGATE_TYPE_P (valtype)
12732 || TREE_CODE (valtype) == COMPLEX_TYPE
12733 || FIXED_POINT_TYPE_P (valtype)));
12736 /* Return TRUE if X references a SYMBOL_REF. */
12738 symbol_mentioned_p (rtx x)
12740 const char * fmt;
12741 int i;
12743 if (GET_CODE (x) == SYMBOL_REF)
12744 return 1;
12746 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12747 are constant offsets, not symbols. */
12748 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12749 return 0;
12751 fmt = GET_RTX_FORMAT (GET_CODE (x));
12753 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12755 if (fmt[i] == 'E')
12757 int j;
12759 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12760 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12761 return 1;
12763 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12764 return 1;
12767 return 0;
12770 /* Return TRUE if X references a LABEL_REF. */
12772 label_mentioned_p (rtx x)
12774 const char * fmt;
12775 int i;
12777 if (GET_CODE (x) == LABEL_REF)
12778 return 1;
12780 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12781 instruction, but they are constant offsets, not symbols. */
12782 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12783 return 0;
12785 fmt = GET_RTX_FORMAT (GET_CODE (x));
12786 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12788 if (fmt[i] == 'E')
12790 int j;
12792 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12793 if (label_mentioned_p (XVECEXP (x, i, j)))
12794 return 1;
12796 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12797 return 1;
12800 return 0;
12804 tls_mentioned_p (rtx x)
12806 switch (GET_CODE (x))
12808 case CONST:
12809 return tls_mentioned_p (XEXP (x, 0));
12811 case UNSPEC:
12812 if (XINT (x, 1) == UNSPEC_TLS)
12813 return 1;
12815 /* Fall through. */
12816 default:
12817 return 0;
12821 /* Must not copy any rtx that uses a pc-relative address.
12822 Also, disallow copying of load-exclusive instructions that
12823 may appear after splitting of compare-and-swap-style operations
12824 so as to prevent those loops from being transformed away from their
12825 canonical forms (see PR 69904). */
12827 static bool
12828 arm_cannot_copy_insn_p (rtx_insn *insn)
12830 /* The tls call insn cannot be copied, as it is paired with a data
12831 word. */
12832 if (recog_memoized (insn) == CODE_FOR_tlscall)
12833 return true;
12835 subrtx_iterator::array_type array;
12836 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12838 const_rtx x = *iter;
12839 if (GET_CODE (x) == UNSPEC
12840 && (XINT (x, 1) == UNSPEC_PIC_BASE
12841 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12842 return true;
12845 rtx set = single_set (insn);
12846 if (set)
12848 rtx src = SET_SRC (set);
12849 if (GET_CODE (src) == ZERO_EXTEND)
12850 src = XEXP (src, 0);
12852 /* Catch the load-exclusive and load-acquire operations. */
12853 if (GET_CODE (src) == UNSPEC_VOLATILE
12854 && (XINT (src, 1) == VUNSPEC_LL
12855 || XINT (src, 1) == VUNSPEC_LAX))
12856 return true;
12858 return false;
12861 enum rtx_code
12862 minmax_code (rtx x)
12864 enum rtx_code code = GET_CODE (x);
12866 switch (code)
12868 case SMAX:
12869 return GE;
12870 case SMIN:
12871 return LE;
12872 case UMIN:
12873 return LEU;
12874 case UMAX:
12875 return GEU;
12876 default:
12877 gcc_unreachable ();
12881 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12883 bool
12884 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12885 int *mask, bool *signed_sat)
12887 /* The high bound must be a power of two minus one. */
12888 int log = exact_log2 (INTVAL (hi_bound) + 1);
12889 if (log == -1)
12890 return false;
12892 /* The low bound is either zero (for usat) or one less than the
12893 negation of the high bound (for ssat). */
12894 if (INTVAL (lo_bound) == 0)
12896 if (mask)
12897 *mask = log;
12898 if (signed_sat)
12899 *signed_sat = false;
12901 return true;
12904 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12906 if (mask)
12907 *mask = log + 1;
12908 if (signed_sat)
12909 *signed_sat = true;
12911 return true;
12914 return false;
12917 /* Return 1 if memory locations are adjacent. */
12919 adjacent_mem_locations (rtx a, rtx b)
12921 /* We don't guarantee to preserve the order of these memory refs. */
12922 if (volatile_refs_p (a) || volatile_refs_p (b))
12923 return 0;
12925 if ((REG_P (XEXP (a, 0))
12926 || (GET_CODE (XEXP (a, 0)) == PLUS
12927 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12928 && (REG_P (XEXP (b, 0))
12929 || (GET_CODE (XEXP (b, 0)) == PLUS
12930 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12932 HOST_WIDE_INT val0 = 0, val1 = 0;
12933 rtx reg0, reg1;
12934 int val_diff;
12936 if (GET_CODE (XEXP (a, 0)) == PLUS)
12938 reg0 = XEXP (XEXP (a, 0), 0);
12939 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12941 else
12942 reg0 = XEXP (a, 0);
12944 if (GET_CODE (XEXP (b, 0)) == PLUS)
12946 reg1 = XEXP (XEXP (b, 0), 0);
12947 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12949 else
12950 reg1 = XEXP (b, 0);
12952 /* Don't accept any offset that will require multiple
12953 instructions to handle, since this would cause the
12954 arith_adjacentmem pattern to output an overlong sequence. */
12955 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12956 return 0;
12958 /* Don't allow an eliminable register: register elimination can make
12959 the offset too large. */
12960 if (arm_eliminable_register (reg0))
12961 return 0;
12963 val_diff = val1 - val0;
12965 if (arm_ld_sched)
12967 /* If the target has load delay slots, then there's no benefit
12968 to using an ldm instruction unless the offset is zero and
12969 we are optimizing for size. */
12970 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12971 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12972 && (val_diff == 4 || val_diff == -4));
12975 return ((REGNO (reg0) == REGNO (reg1))
12976 && (val_diff == 4 || val_diff == -4));
12979 return 0;
12982 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12983 for load operations, false for store operations. CONSECUTIVE is true
12984 if the register numbers in the operation must be consecutive in the register
12985 bank. RETURN_PC is true if value is to be loaded in PC.
12986 The pattern we are trying to match for load is:
12987 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12988 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12991 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12993 where
12994 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12995 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12996 3. If consecutive is TRUE, then for kth register being loaded,
12997 REGNO (R_dk) = REGNO (R_d0) + k.
12998 The pattern for store is similar. */
12999 bool
13000 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13001 bool consecutive, bool return_pc)
13003 HOST_WIDE_INT count = XVECLEN (op, 0);
13004 rtx reg, mem, addr;
13005 unsigned regno;
13006 unsigned first_regno;
13007 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13008 rtx elt;
13009 bool addr_reg_in_reglist = false;
13010 bool update = false;
13011 int reg_increment;
13012 int offset_adj;
13013 int regs_per_val;
13015 /* If not in SImode, then registers must be consecutive
13016 (e.g., VLDM instructions for DFmode). */
13017 gcc_assert ((mode == SImode) || consecutive);
13018 /* Setting return_pc for stores is illegal. */
13019 gcc_assert (!return_pc || load);
13021 /* Set up the increments and the regs per val based on the mode. */
13022 reg_increment = GET_MODE_SIZE (mode);
13023 regs_per_val = reg_increment / 4;
13024 offset_adj = return_pc ? 1 : 0;
13026 if (count <= 1
13027 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13028 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13029 return false;
13031 /* Check if this is a write-back. */
13032 elt = XVECEXP (op, 0, offset_adj);
13033 if (GET_CODE (SET_SRC (elt)) == PLUS)
13035 i++;
13036 base = 1;
13037 update = true;
13039 /* The offset adjustment must be the number of registers being
13040 popped times the size of a single register. */
13041 if (!REG_P (SET_DEST (elt))
13042 || !REG_P (XEXP (SET_SRC (elt), 0))
13043 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13044 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13045 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13046 ((count - 1 - offset_adj) * reg_increment))
13047 return false;
13050 i = i + offset_adj;
13051 base = base + offset_adj;
13052 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13053 success depends on the type: VLDM can do just one reg,
13054 LDM must do at least two. */
13055 if ((count <= i) && (mode == SImode))
13056 return false;
13058 elt = XVECEXP (op, 0, i - 1);
13059 if (GET_CODE (elt) != SET)
13060 return false;
13062 if (load)
13064 reg = SET_DEST (elt);
13065 mem = SET_SRC (elt);
13067 else
13069 reg = SET_SRC (elt);
13070 mem = SET_DEST (elt);
13073 if (!REG_P (reg) || !MEM_P (mem))
13074 return false;
13076 regno = REGNO (reg);
13077 first_regno = regno;
13078 addr = XEXP (mem, 0);
13079 if (GET_CODE (addr) == PLUS)
13081 if (!CONST_INT_P (XEXP (addr, 1)))
13082 return false;
13084 offset = INTVAL (XEXP (addr, 1));
13085 addr = XEXP (addr, 0);
13088 if (!REG_P (addr))
13089 return false;
13091 /* Don't allow SP to be loaded unless it is also the base register. It
13092 guarantees that SP is reset correctly when an LDM instruction
13093 is interrupted. Otherwise, we might end up with a corrupt stack. */
13094 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13095 return false;
13097 for (; i < count; i++)
13099 elt = XVECEXP (op, 0, i);
13100 if (GET_CODE (elt) != SET)
13101 return false;
13103 if (load)
13105 reg = SET_DEST (elt);
13106 mem = SET_SRC (elt);
13108 else
13110 reg = SET_SRC (elt);
13111 mem = SET_DEST (elt);
13114 if (!REG_P (reg)
13115 || GET_MODE (reg) != mode
13116 || REGNO (reg) <= regno
13117 || (consecutive
13118 && (REGNO (reg) !=
13119 (unsigned int) (first_regno + regs_per_val * (i - base))))
13120 /* Don't allow SP to be loaded unless it is also the base register. It
13121 guarantees that SP is reset correctly when an LDM instruction
13122 is interrupted. Otherwise, we might end up with a corrupt stack. */
13123 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13124 || !MEM_P (mem)
13125 || GET_MODE (mem) != mode
13126 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13127 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13128 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13129 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13130 offset + (i - base) * reg_increment))
13131 && (!REG_P (XEXP (mem, 0))
13132 || offset + (i - base) * reg_increment != 0)))
13133 return false;
13135 regno = REGNO (reg);
13136 if (regno == REGNO (addr))
13137 addr_reg_in_reglist = true;
13140 if (load)
13142 if (update && addr_reg_in_reglist)
13143 return false;
13145 /* For Thumb-1, address register is always modified - either by write-back
13146 or by explicit load. If the pattern does not describe an update,
13147 then the address register must be in the list of loaded registers. */
13148 if (TARGET_THUMB1)
13149 return update || addr_reg_in_reglist;
13152 return true;
13155 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13156 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13157 instruction. ADD_OFFSET is nonzero if the base address register needs
13158 to be modified with an add instruction before we can use it. */
13160 static bool
13161 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13162 int nops, HOST_WIDE_INT add_offset)
13164 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13165 if the offset isn't small enough. The reason 2 ldrs are faster
13166 is because these ARMs are able to do more than one cache access
13167 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13168 whilst the ARM8 has a double bandwidth cache. This means that
13169 these cores can do both an instruction fetch and a data fetch in
13170 a single cycle, so the trick of calculating the address into a
13171 scratch register (one of the result regs) and then doing a load
13172 multiple actually becomes slower (and no smaller in code size).
13173 That is the transformation
13175 ldr rd1, [rbase + offset]
13176 ldr rd2, [rbase + offset + 4]
13180 add rd1, rbase, offset
13181 ldmia rd1, {rd1, rd2}
13183 produces worse code -- '3 cycles + any stalls on rd2' instead of
13184 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13185 access per cycle, the first sequence could never complete in less
13186 than 6 cycles, whereas the ldm sequence would only take 5 and
13187 would make better use of sequential accesses if not hitting the
13188 cache.
13190 We cheat here and test 'arm_ld_sched' which we currently know to
13191 only be true for the ARM8, ARM9 and StrongARM. If this ever
13192 changes, then the test below needs to be reworked. */
13193 if (nops == 2 && arm_ld_sched && add_offset != 0)
13194 return false;
13196 /* XScale has load-store double instructions, but they have stricter
13197 alignment requirements than load-store multiple, so we cannot
13198 use them.
13200 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13201 the pipeline until completion.
13203 NREGS CYCLES
13209 An ldr instruction takes 1-3 cycles, but does not block the
13210 pipeline.
13212 NREGS CYCLES
13213 1 1-3
13214 2 2-6
13215 3 3-9
13216 4 4-12
13218 Best case ldr will always win. However, the more ldr instructions
13219 we issue, the less likely we are to be able to schedule them well.
13220 Using ldr instructions also increases code size.
13222 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13223 for counts of 3 or 4 regs. */
13224 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13225 return false;
13226 return true;
13229 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13230 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13231 an array ORDER which describes the sequence to use when accessing the
13232 offsets that produces an ascending order. In this sequence, each
13233 offset must be larger by exactly 4 than the previous one. ORDER[0]
13234 must have been filled in with the lowest offset by the caller.
13235 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13236 we use to verify that ORDER produces an ascending order of registers.
13237 Return true if it was possible to construct such an order, false if
13238 not. */
13240 static bool
13241 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13242 int *unsorted_regs)
13244 int i;
13245 for (i = 1; i < nops; i++)
13247 int j;
13249 order[i] = order[i - 1];
13250 for (j = 0; j < nops; j++)
13251 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13253 /* We must find exactly one offset that is higher than the
13254 previous one by 4. */
13255 if (order[i] != order[i - 1])
13256 return false;
13257 order[i] = j;
13259 if (order[i] == order[i - 1])
13260 return false;
13261 /* The register numbers must be ascending. */
13262 if (unsorted_regs != NULL
13263 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13264 return false;
13266 return true;
13269 /* Used to determine in a peephole whether a sequence of load
13270 instructions can be changed into a load-multiple instruction.
13271 NOPS is the number of separate load instructions we are examining. The
13272 first NOPS entries in OPERANDS are the destination registers, the
13273 next NOPS entries are memory operands. If this function is
13274 successful, *BASE is set to the common base register of the memory
13275 accesses; *LOAD_OFFSET is set to the first memory location's offset
13276 from that base register.
13277 REGS is an array filled in with the destination register numbers.
13278 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13279 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13280 the sequence of registers in REGS matches the loads from ascending memory
13281 locations, and the function verifies that the register numbers are
13282 themselves ascending. If CHECK_REGS is false, the register numbers
13283 are stored in the order they are found in the operands. */
13284 static int
13285 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13286 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13288 int unsorted_regs[MAX_LDM_STM_OPS];
13289 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13290 int order[MAX_LDM_STM_OPS];
13291 rtx base_reg_rtx = NULL;
13292 int base_reg = -1;
13293 int i, ldm_case;
13295 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13296 easily extended if required. */
13297 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13299 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13301 /* Loop over the operands and check that the memory references are
13302 suitable (i.e. immediate offsets from the same base register). At
13303 the same time, extract the target register, and the memory
13304 offsets. */
13305 for (i = 0; i < nops; i++)
13307 rtx reg;
13308 rtx offset;
13310 /* Convert a subreg of a mem into the mem itself. */
13311 if (GET_CODE (operands[nops + i]) == SUBREG)
13312 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13314 gcc_assert (MEM_P (operands[nops + i]));
13316 /* Don't reorder volatile memory references; it doesn't seem worth
13317 looking for the case where the order is ok anyway. */
13318 if (MEM_VOLATILE_P (operands[nops + i]))
13319 return 0;
13321 offset = const0_rtx;
13323 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13324 || (GET_CODE (reg) == SUBREG
13325 && REG_P (reg = SUBREG_REG (reg))))
13326 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13327 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13328 || (GET_CODE (reg) == SUBREG
13329 && REG_P (reg = SUBREG_REG (reg))))
13330 && (CONST_INT_P (offset
13331 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13333 if (i == 0)
13335 base_reg = REGNO (reg);
13336 base_reg_rtx = reg;
13337 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13338 return 0;
13340 else if (base_reg != (int) REGNO (reg))
13341 /* Not addressed from the same base register. */
13342 return 0;
13344 unsorted_regs[i] = (REG_P (operands[i])
13345 ? REGNO (operands[i])
13346 : REGNO (SUBREG_REG (operands[i])));
13348 /* If it isn't an integer register, or if it overwrites the
13349 base register but isn't the last insn in the list, then
13350 we can't do this. */
13351 if (unsorted_regs[i] < 0
13352 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13353 || unsorted_regs[i] > 14
13354 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13355 return 0;
13357 /* Don't allow SP to be loaded unless it is also the base
13358 register. It guarantees that SP is reset correctly when
13359 an LDM instruction is interrupted. Otherwise, we might
13360 end up with a corrupt stack. */
13361 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13362 return 0;
13364 unsorted_offsets[i] = INTVAL (offset);
13365 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13366 order[0] = i;
13368 else
13369 /* Not a suitable memory address. */
13370 return 0;
13373 /* All the useful information has now been extracted from the
13374 operands into unsorted_regs and unsorted_offsets; additionally,
13375 order[0] has been set to the lowest offset in the list. Sort
13376 the offsets into order, verifying that they are adjacent, and
13377 check that the register numbers are ascending. */
13378 if (!compute_offset_order (nops, unsorted_offsets, order,
13379 check_regs ? unsorted_regs : NULL))
13380 return 0;
13382 if (saved_order)
13383 memcpy (saved_order, order, sizeof order);
13385 if (base)
13387 *base = base_reg;
13389 for (i = 0; i < nops; i++)
13390 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13392 *load_offset = unsorted_offsets[order[0]];
13395 if (TARGET_THUMB1
13396 && !peep2_reg_dead_p (nops, base_reg_rtx))
13397 return 0;
13399 if (unsorted_offsets[order[0]] == 0)
13400 ldm_case = 1; /* ldmia */
13401 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13402 ldm_case = 2; /* ldmib */
13403 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13404 ldm_case = 3; /* ldmda */
13405 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13406 ldm_case = 4; /* ldmdb */
13407 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13408 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13409 ldm_case = 5;
13410 else
13411 return 0;
13413 if (!multiple_operation_profitable_p (false, nops,
13414 ldm_case == 5
13415 ? unsorted_offsets[order[0]] : 0))
13416 return 0;
13418 return ldm_case;
13421 /* Used to determine in a peephole whether a sequence of store instructions can
13422 be changed into a store-multiple instruction.
13423 NOPS is the number of separate store instructions we are examining.
13424 NOPS_TOTAL is the total number of instructions recognized by the peephole
13425 pattern.
13426 The first NOPS entries in OPERANDS are the source registers, the next
13427 NOPS entries are memory operands. If this function is successful, *BASE is
13428 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13429 to the first memory location's offset from that base register. REGS is an
13430 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13431 likewise filled with the corresponding rtx's.
13432 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13433 numbers to an ascending order of stores.
13434 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13435 from ascending memory locations, and the function verifies that the register
13436 numbers are themselves ascending. If CHECK_REGS is false, the register
13437 numbers are stored in the order they are found in the operands. */
13438 static int
13439 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13440 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13441 HOST_WIDE_INT *load_offset, bool check_regs)
13443 int unsorted_regs[MAX_LDM_STM_OPS];
13444 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13445 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13446 int order[MAX_LDM_STM_OPS];
13447 int base_reg = -1;
13448 rtx base_reg_rtx = NULL;
13449 int i, stm_case;
13451 /* Write back of base register is currently only supported for Thumb 1. */
13452 int base_writeback = TARGET_THUMB1;
13454 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13455 easily extended if required. */
13456 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13458 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13460 /* Loop over the operands and check that the memory references are
13461 suitable (i.e. immediate offsets from the same base register). At
13462 the same time, extract the target register, and the memory
13463 offsets. */
13464 for (i = 0; i < nops; i++)
13466 rtx reg;
13467 rtx offset;
13469 /* Convert a subreg of a mem into the mem itself. */
13470 if (GET_CODE (operands[nops + i]) == SUBREG)
13471 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13473 gcc_assert (MEM_P (operands[nops + i]));
13475 /* Don't reorder volatile memory references; it doesn't seem worth
13476 looking for the case where the order is ok anyway. */
13477 if (MEM_VOLATILE_P (operands[nops + i]))
13478 return 0;
13480 offset = const0_rtx;
13482 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13483 || (GET_CODE (reg) == SUBREG
13484 && REG_P (reg = SUBREG_REG (reg))))
13485 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13486 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13487 || (GET_CODE (reg) == SUBREG
13488 && REG_P (reg = SUBREG_REG (reg))))
13489 && (CONST_INT_P (offset
13490 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13492 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13493 ? operands[i] : SUBREG_REG (operands[i]));
13494 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13496 if (i == 0)
13498 base_reg = REGNO (reg);
13499 base_reg_rtx = reg;
13500 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13501 return 0;
13503 else if (base_reg != (int) REGNO (reg))
13504 /* Not addressed from the same base register. */
13505 return 0;
13507 /* If it isn't an integer register, then we can't do this. */
13508 if (unsorted_regs[i] < 0
13509 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13510 /* The effects are unpredictable if the base register is
13511 both updated and stored. */
13512 || (base_writeback && unsorted_regs[i] == base_reg)
13513 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13514 || unsorted_regs[i] > 14)
13515 return 0;
13517 unsorted_offsets[i] = INTVAL (offset);
13518 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13519 order[0] = i;
13521 else
13522 /* Not a suitable memory address. */
13523 return 0;
13526 /* All the useful information has now been extracted from the
13527 operands into unsorted_regs and unsorted_offsets; additionally,
13528 order[0] has been set to the lowest offset in the list. Sort
13529 the offsets into order, verifying that they are adjacent, and
13530 check that the register numbers are ascending. */
13531 if (!compute_offset_order (nops, unsorted_offsets, order,
13532 check_regs ? unsorted_regs : NULL))
13533 return 0;
13535 if (saved_order)
13536 memcpy (saved_order, order, sizeof order);
13538 if (base)
13540 *base = base_reg;
13542 for (i = 0; i < nops; i++)
13544 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13545 if (reg_rtxs)
13546 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13549 *load_offset = unsorted_offsets[order[0]];
13552 if (TARGET_THUMB1
13553 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13554 return 0;
13556 if (unsorted_offsets[order[0]] == 0)
13557 stm_case = 1; /* stmia */
13558 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13559 stm_case = 2; /* stmib */
13560 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13561 stm_case = 3; /* stmda */
13562 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13563 stm_case = 4; /* stmdb */
13564 else
13565 return 0;
13567 if (!multiple_operation_profitable_p (false, nops, 0))
13568 return 0;
13570 return stm_case;
13573 /* Routines for use in generating RTL. */
13575 /* Generate a load-multiple instruction. COUNT is the number of loads in
13576 the instruction; REGS and MEMS are arrays containing the operands.
13577 BASEREG is the base register to be used in addressing the memory operands.
13578 WBACK_OFFSET is nonzero if the instruction should update the base
13579 register. */
13581 static rtx
13582 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13583 HOST_WIDE_INT wback_offset)
13585 int i = 0, j;
13586 rtx result;
13588 if (!multiple_operation_profitable_p (false, count, 0))
13590 rtx seq;
13592 start_sequence ();
13594 for (i = 0; i < count; i++)
13595 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13597 if (wback_offset != 0)
13598 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13600 seq = get_insns ();
13601 end_sequence ();
13603 return seq;
13606 result = gen_rtx_PARALLEL (VOIDmode,
13607 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13608 if (wback_offset != 0)
13610 XVECEXP (result, 0, 0)
13611 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13612 i = 1;
13613 count++;
13616 for (j = 0; i < count; i++, j++)
13617 XVECEXP (result, 0, i)
13618 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13620 return result;
13623 /* Generate a store-multiple instruction. COUNT is the number of stores in
13624 the instruction; REGS and MEMS are arrays containing the operands.
13625 BASEREG is the base register to be used in addressing the memory operands.
13626 WBACK_OFFSET is nonzero if the instruction should update the base
13627 register. */
13629 static rtx
13630 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13631 HOST_WIDE_INT wback_offset)
13633 int i = 0, j;
13634 rtx result;
13636 if (GET_CODE (basereg) == PLUS)
13637 basereg = XEXP (basereg, 0);
13639 if (!multiple_operation_profitable_p (false, count, 0))
13641 rtx seq;
13643 start_sequence ();
13645 for (i = 0; i < count; i++)
13646 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13648 if (wback_offset != 0)
13649 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13651 seq = get_insns ();
13652 end_sequence ();
13654 return seq;
13657 result = gen_rtx_PARALLEL (VOIDmode,
13658 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13659 if (wback_offset != 0)
13661 XVECEXP (result, 0, 0)
13662 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13663 i = 1;
13664 count++;
13667 for (j = 0; i < count; i++, j++)
13668 XVECEXP (result, 0, i)
13669 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13671 return result;
13674 /* Generate either a load-multiple or a store-multiple instruction. This
13675 function can be used in situations where we can start with a single MEM
13676 rtx and adjust its address upwards.
13677 COUNT is the number of operations in the instruction, not counting a
13678 possible update of the base register. REGS is an array containing the
13679 register operands.
13680 BASEREG is the base register to be used in addressing the memory operands,
13681 which are constructed from BASEMEM.
13682 WRITE_BACK specifies whether the generated instruction should include an
13683 update of the base register.
13684 OFFSETP is used to pass an offset to and from this function; this offset
13685 is not used when constructing the address (instead BASEMEM should have an
13686 appropriate offset in its address), it is used only for setting
13687 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13689 static rtx
13690 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13691 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13693 rtx mems[MAX_LDM_STM_OPS];
13694 HOST_WIDE_INT offset = *offsetp;
13695 int i;
13697 gcc_assert (count <= MAX_LDM_STM_OPS);
13699 if (GET_CODE (basereg) == PLUS)
13700 basereg = XEXP (basereg, 0);
13702 for (i = 0; i < count; i++)
13704 rtx addr = plus_constant (Pmode, basereg, i * 4);
13705 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13706 offset += 4;
13709 if (write_back)
13710 *offsetp = offset;
13712 if (is_load)
13713 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13714 write_back ? 4 * count : 0);
13715 else
13716 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13717 write_back ? 4 * count : 0);
13721 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13722 rtx basemem, HOST_WIDE_INT *offsetp)
13724 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13725 offsetp);
13729 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13730 rtx basemem, HOST_WIDE_INT *offsetp)
13732 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13733 offsetp);
13736 /* Called from a peephole2 expander to turn a sequence of loads into an
13737 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13738 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13739 is true if we can reorder the registers because they are used commutatively
13740 subsequently.
13741 Returns true iff we could generate a new instruction. */
13743 bool
13744 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13746 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13747 rtx mems[MAX_LDM_STM_OPS];
13748 int i, j, base_reg;
13749 rtx base_reg_rtx;
13750 HOST_WIDE_INT offset;
13751 int write_back = FALSE;
13752 int ldm_case;
13753 rtx addr;
13755 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13756 &base_reg, &offset, !sort_regs);
13758 if (ldm_case == 0)
13759 return false;
13761 if (sort_regs)
13762 for (i = 0; i < nops - 1; i++)
13763 for (j = i + 1; j < nops; j++)
13764 if (regs[i] > regs[j])
13766 int t = regs[i];
13767 regs[i] = regs[j];
13768 regs[j] = t;
13770 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13772 if (TARGET_THUMB1)
13774 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13775 gcc_assert (ldm_case == 1 || ldm_case == 5);
13776 write_back = TRUE;
13779 if (ldm_case == 5)
13781 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13782 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13783 offset = 0;
13784 if (!TARGET_THUMB1)
13785 base_reg_rtx = newbase;
13788 for (i = 0; i < nops; i++)
13790 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13791 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13792 SImode, addr, 0);
13794 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13795 write_back ? offset + i * 4 : 0));
13796 return true;
13799 /* Called from a peephole2 expander to turn a sequence of stores into an
13800 STM instruction. OPERANDS are the operands found by the peephole matcher;
13801 NOPS indicates how many separate stores we are trying to combine.
13802 Returns true iff we could generate a new instruction. */
13804 bool
13805 gen_stm_seq (rtx *operands, int nops)
13807 int i;
13808 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13809 rtx mems[MAX_LDM_STM_OPS];
13810 int base_reg;
13811 rtx base_reg_rtx;
13812 HOST_WIDE_INT offset;
13813 int write_back = FALSE;
13814 int stm_case;
13815 rtx addr;
13816 bool base_reg_dies;
13818 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13819 mem_order, &base_reg, &offset, true);
13821 if (stm_case == 0)
13822 return false;
13824 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13826 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13827 if (TARGET_THUMB1)
13829 gcc_assert (base_reg_dies);
13830 write_back = TRUE;
13833 if (stm_case == 5)
13835 gcc_assert (base_reg_dies);
13836 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13837 offset = 0;
13840 addr = plus_constant (Pmode, base_reg_rtx, offset);
13842 for (i = 0; i < nops; i++)
13844 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13845 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13846 SImode, addr, 0);
13848 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13849 write_back ? offset + i * 4 : 0));
13850 return true;
13853 /* Called from a peephole2 expander to turn a sequence of stores that are
13854 preceded by constant loads into an STM instruction. OPERANDS are the
13855 operands found by the peephole matcher; NOPS indicates how many
13856 separate stores we are trying to combine; there are 2 * NOPS
13857 instructions in the peephole.
13858 Returns true iff we could generate a new instruction. */
13860 bool
13861 gen_const_stm_seq (rtx *operands, int nops)
13863 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13864 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13865 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13866 rtx mems[MAX_LDM_STM_OPS];
13867 int base_reg;
13868 rtx base_reg_rtx;
13869 HOST_WIDE_INT offset;
13870 int write_back = FALSE;
13871 int stm_case;
13872 rtx addr;
13873 bool base_reg_dies;
13874 int i, j;
13875 HARD_REG_SET allocated;
13877 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13878 mem_order, &base_reg, &offset, false);
13880 if (stm_case == 0)
13881 return false;
13883 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13885 /* If the same register is used more than once, try to find a free
13886 register. */
13887 CLEAR_HARD_REG_SET (allocated);
13888 for (i = 0; i < nops; i++)
13890 for (j = i + 1; j < nops; j++)
13891 if (regs[i] == regs[j])
13893 rtx t = peep2_find_free_register (0, nops * 2,
13894 TARGET_THUMB1 ? "l" : "r",
13895 SImode, &allocated);
13896 if (t == NULL_RTX)
13897 return false;
13898 reg_rtxs[i] = t;
13899 regs[i] = REGNO (t);
13903 /* Compute an ordering that maps the register numbers to an ascending
13904 sequence. */
13905 reg_order[0] = 0;
13906 for (i = 0; i < nops; i++)
13907 if (regs[i] < regs[reg_order[0]])
13908 reg_order[0] = i;
13910 for (i = 1; i < nops; i++)
13912 int this_order = reg_order[i - 1];
13913 for (j = 0; j < nops; j++)
13914 if (regs[j] > regs[reg_order[i - 1]]
13915 && (this_order == reg_order[i - 1]
13916 || regs[j] < regs[this_order]))
13917 this_order = j;
13918 reg_order[i] = this_order;
13921 /* Ensure that registers that must be live after the instruction end
13922 up with the correct value. */
13923 for (i = 0; i < nops; i++)
13925 int this_order = reg_order[i];
13926 if ((this_order != mem_order[i]
13927 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13928 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13929 return false;
13932 /* Load the constants. */
13933 for (i = 0; i < nops; i++)
13935 rtx op = operands[2 * nops + mem_order[i]];
13936 sorted_regs[i] = regs[reg_order[i]];
13937 emit_move_insn (reg_rtxs[reg_order[i]], op);
13940 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13942 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13943 if (TARGET_THUMB1)
13945 gcc_assert (base_reg_dies);
13946 write_back = TRUE;
13949 if (stm_case == 5)
13951 gcc_assert (base_reg_dies);
13952 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13953 offset = 0;
13956 addr = plus_constant (Pmode, base_reg_rtx, offset);
13958 for (i = 0; i < nops; i++)
13960 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13961 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13962 SImode, addr, 0);
13964 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13965 write_back ? offset + i * 4 : 0));
13966 return true;
13969 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13970 unaligned copies on processors which support unaligned semantics for those
13971 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13972 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13973 An interleave factor of 1 (the minimum) will perform no interleaving.
13974 Load/store multiple are used for aligned addresses where possible. */
13976 static void
13977 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13978 HOST_WIDE_INT length,
13979 unsigned int interleave_factor)
13981 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13982 int *regnos = XALLOCAVEC (int, interleave_factor);
13983 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13984 HOST_WIDE_INT i, j;
13985 HOST_WIDE_INT remaining = length, words;
13986 rtx halfword_tmp = NULL, byte_tmp = NULL;
13987 rtx dst, src;
13988 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13989 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13990 HOST_WIDE_INT srcoffset, dstoffset;
13991 HOST_WIDE_INT src_autoinc, dst_autoinc;
13992 rtx mem, addr;
13994 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13996 /* Use hard registers if we have aligned source or destination so we can use
13997 load/store multiple with contiguous registers. */
13998 if (dst_aligned || src_aligned)
13999 for (i = 0; i < interleave_factor; i++)
14000 regs[i] = gen_rtx_REG (SImode, i);
14001 else
14002 for (i = 0; i < interleave_factor; i++)
14003 regs[i] = gen_reg_rtx (SImode);
14005 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14006 src = copy_addr_to_reg (XEXP (srcbase, 0));
14008 srcoffset = dstoffset = 0;
14010 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14011 For copying the last bytes we want to subtract this offset again. */
14012 src_autoinc = dst_autoinc = 0;
14014 for (i = 0; i < interleave_factor; i++)
14015 regnos[i] = i;
14017 /* Copy BLOCK_SIZE_BYTES chunks. */
14019 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14021 /* Load words. */
14022 if (src_aligned && interleave_factor > 1)
14024 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14025 TRUE, srcbase, &srcoffset));
14026 src_autoinc += UNITS_PER_WORD * interleave_factor;
14028 else
14030 for (j = 0; j < interleave_factor; j++)
14032 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14033 - src_autoinc));
14034 mem = adjust_automodify_address (srcbase, SImode, addr,
14035 srcoffset + j * UNITS_PER_WORD);
14036 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14038 srcoffset += block_size_bytes;
14041 /* Store words. */
14042 if (dst_aligned && interleave_factor > 1)
14044 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14045 TRUE, dstbase, &dstoffset));
14046 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14048 else
14050 for (j = 0; j < interleave_factor; j++)
14052 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14053 - dst_autoinc));
14054 mem = adjust_automodify_address (dstbase, SImode, addr,
14055 dstoffset + j * UNITS_PER_WORD);
14056 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14058 dstoffset += block_size_bytes;
14061 remaining -= block_size_bytes;
14064 /* Copy any whole words left (note these aren't interleaved with any
14065 subsequent halfword/byte load/stores in the interests of simplicity). */
14067 words = remaining / UNITS_PER_WORD;
14069 gcc_assert (words < interleave_factor);
14071 if (src_aligned && words > 1)
14073 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14074 &srcoffset));
14075 src_autoinc += UNITS_PER_WORD * words;
14077 else
14079 for (j = 0; j < words; j++)
14081 addr = plus_constant (Pmode, src,
14082 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14083 mem = adjust_automodify_address (srcbase, SImode, addr,
14084 srcoffset + j * UNITS_PER_WORD);
14085 if (src_aligned)
14086 emit_move_insn (regs[j], mem);
14087 else
14088 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14090 srcoffset += words * UNITS_PER_WORD;
14093 if (dst_aligned && words > 1)
14095 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14096 &dstoffset));
14097 dst_autoinc += words * UNITS_PER_WORD;
14099 else
14101 for (j = 0; j < words; j++)
14103 addr = plus_constant (Pmode, dst,
14104 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14105 mem = adjust_automodify_address (dstbase, SImode, addr,
14106 dstoffset + j * UNITS_PER_WORD);
14107 if (dst_aligned)
14108 emit_move_insn (mem, regs[j]);
14109 else
14110 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14112 dstoffset += words * UNITS_PER_WORD;
14115 remaining -= words * UNITS_PER_WORD;
14117 gcc_assert (remaining < 4);
14119 /* Copy a halfword if necessary. */
14121 if (remaining >= 2)
14123 halfword_tmp = gen_reg_rtx (SImode);
14125 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14126 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14127 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14129 /* Either write out immediately, or delay until we've loaded the last
14130 byte, depending on interleave factor. */
14131 if (interleave_factor == 1)
14133 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14134 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14135 emit_insn (gen_unaligned_storehi (mem,
14136 gen_lowpart (HImode, halfword_tmp)));
14137 halfword_tmp = NULL;
14138 dstoffset += 2;
14141 remaining -= 2;
14142 srcoffset += 2;
14145 gcc_assert (remaining < 2);
14147 /* Copy last byte. */
14149 if ((remaining & 1) != 0)
14151 byte_tmp = gen_reg_rtx (SImode);
14153 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14154 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14155 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14157 if (interleave_factor == 1)
14159 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14160 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14161 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14162 byte_tmp = NULL;
14163 dstoffset++;
14166 remaining--;
14167 srcoffset++;
14170 /* Store last halfword if we haven't done so already. */
14172 if (halfword_tmp)
14174 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14175 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14176 emit_insn (gen_unaligned_storehi (mem,
14177 gen_lowpart (HImode, halfword_tmp)));
14178 dstoffset += 2;
14181 /* Likewise for last byte. */
14183 if (byte_tmp)
14185 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14186 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14187 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14188 dstoffset++;
14191 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14194 /* From mips_adjust_block_mem:
14196 Helper function for doing a loop-based block operation on memory
14197 reference MEM. Each iteration of the loop will operate on LENGTH
14198 bytes of MEM.
14200 Create a new base register for use within the loop and point it to
14201 the start of MEM. Create a new memory reference that uses this
14202 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14204 static void
14205 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14206 rtx *loop_mem)
14208 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14210 /* Although the new mem does not refer to a known location,
14211 it does keep up to LENGTH bytes of alignment. */
14212 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14213 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14216 /* From mips_block_move_loop:
14218 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14219 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14220 the memory regions do not overlap. */
14222 static void
14223 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14224 unsigned int interleave_factor,
14225 HOST_WIDE_INT bytes_per_iter)
14227 rtx src_reg, dest_reg, final_src, test;
14228 HOST_WIDE_INT leftover;
14230 leftover = length % bytes_per_iter;
14231 length -= leftover;
14233 /* Create registers and memory references for use within the loop. */
14234 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14235 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14237 /* Calculate the value that SRC_REG should have after the last iteration of
14238 the loop. */
14239 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14240 0, 0, OPTAB_WIDEN);
14242 /* Emit the start of the loop. */
14243 rtx_code_label *label = gen_label_rtx ();
14244 emit_label (label);
14246 /* Emit the loop body. */
14247 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14248 interleave_factor);
14250 /* Move on to the next block. */
14251 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14252 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14254 /* Emit the loop condition. */
14255 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14256 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14258 /* Mop up any left-over bytes. */
14259 if (leftover)
14260 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14263 /* Emit a block move when either the source or destination is unaligned (not
14264 aligned to a four-byte boundary). This may need further tuning depending on
14265 core type, optimize_size setting, etc. */
14267 static int
14268 arm_movmemqi_unaligned (rtx *operands)
14270 HOST_WIDE_INT length = INTVAL (operands[2]);
14272 if (optimize_size)
14274 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14275 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14276 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14277 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14278 or dst_aligned though: allow more interleaving in those cases since the
14279 resulting code can be smaller. */
14280 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14281 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14283 if (length > 12)
14284 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14285 interleave_factor, bytes_per_iter);
14286 else
14287 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14288 interleave_factor);
14290 else
14292 /* Note that the loop created by arm_block_move_unaligned_loop may be
14293 subject to loop unrolling, which makes tuning this condition a little
14294 redundant. */
14295 if (length > 32)
14296 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14297 else
14298 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14301 return 1;
14305 arm_gen_movmemqi (rtx *operands)
14307 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14308 HOST_WIDE_INT srcoffset, dstoffset;
14309 rtx src, dst, srcbase, dstbase;
14310 rtx part_bytes_reg = NULL;
14311 rtx mem;
14313 if (!CONST_INT_P (operands[2])
14314 || !CONST_INT_P (operands[3])
14315 || INTVAL (operands[2]) > 64)
14316 return 0;
14318 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14319 return arm_movmemqi_unaligned (operands);
14321 if (INTVAL (operands[3]) & 3)
14322 return 0;
14324 dstbase = operands[0];
14325 srcbase = operands[1];
14327 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14328 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14330 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14331 out_words_to_go = INTVAL (operands[2]) / 4;
14332 last_bytes = INTVAL (operands[2]) & 3;
14333 dstoffset = srcoffset = 0;
14335 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14336 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14338 while (in_words_to_go >= 2)
14340 if (in_words_to_go > 4)
14341 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14342 TRUE, srcbase, &srcoffset));
14343 else
14344 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14345 src, FALSE, srcbase,
14346 &srcoffset));
14348 if (out_words_to_go)
14350 if (out_words_to_go > 4)
14351 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14352 TRUE, dstbase, &dstoffset));
14353 else if (out_words_to_go != 1)
14354 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14355 out_words_to_go, dst,
14356 (last_bytes == 0
14357 ? FALSE : TRUE),
14358 dstbase, &dstoffset));
14359 else
14361 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14362 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14363 if (last_bytes != 0)
14365 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14366 dstoffset += 4;
14371 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14372 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14375 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14376 if (out_words_to_go)
14378 rtx sreg;
14380 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14381 sreg = copy_to_reg (mem);
14383 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14384 emit_move_insn (mem, sreg);
14385 in_words_to_go--;
14387 gcc_assert (!in_words_to_go); /* Sanity check */
14390 if (in_words_to_go)
14392 gcc_assert (in_words_to_go > 0);
14394 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14395 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14398 gcc_assert (!last_bytes || part_bytes_reg);
14400 if (BYTES_BIG_ENDIAN && last_bytes)
14402 rtx tmp = gen_reg_rtx (SImode);
14404 /* The bytes we want are in the top end of the word. */
14405 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14406 GEN_INT (8 * (4 - last_bytes))));
14407 part_bytes_reg = tmp;
14409 while (last_bytes)
14411 mem = adjust_automodify_address (dstbase, QImode,
14412 plus_constant (Pmode, dst,
14413 last_bytes - 1),
14414 dstoffset + last_bytes - 1);
14415 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14417 if (--last_bytes)
14419 tmp = gen_reg_rtx (SImode);
14420 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14421 part_bytes_reg = tmp;
14426 else
14428 if (last_bytes > 1)
14430 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14431 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14432 last_bytes -= 2;
14433 if (last_bytes)
14435 rtx tmp = gen_reg_rtx (SImode);
14436 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14437 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14438 part_bytes_reg = tmp;
14439 dstoffset += 2;
14443 if (last_bytes)
14445 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14446 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14450 return 1;
14453 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14454 by mode size. */
14455 inline static rtx
14456 next_consecutive_mem (rtx mem)
14458 machine_mode mode = GET_MODE (mem);
14459 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14460 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14462 return adjust_automodify_address (mem, mode, addr, offset);
14465 /* Copy using LDRD/STRD instructions whenever possible.
14466 Returns true upon success. */
14467 bool
14468 gen_movmem_ldrd_strd (rtx *operands)
14470 unsigned HOST_WIDE_INT len;
14471 HOST_WIDE_INT align;
14472 rtx src, dst, base;
14473 rtx reg0;
14474 bool src_aligned, dst_aligned;
14475 bool src_volatile, dst_volatile;
14477 gcc_assert (CONST_INT_P (operands[2]));
14478 gcc_assert (CONST_INT_P (operands[3]));
14480 len = UINTVAL (operands[2]);
14481 if (len > 64)
14482 return false;
14484 /* Maximum alignment we can assume for both src and dst buffers. */
14485 align = INTVAL (operands[3]);
14487 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14488 return false;
14490 /* Place src and dst addresses in registers
14491 and update the corresponding mem rtx. */
14492 dst = operands[0];
14493 dst_volatile = MEM_VOLATILE_P (dst);
14494 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14495 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14496 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14498 src = operands[1];
14499 src_volatile = MEM_VOLATILE_P (src);
14500 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14501 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14502 src = adjust_automodify_address (src, VOIDmode, base, 0);
14504 if (!unaligned_access && !(src_aligned && dst_aligned))
14505 return false;
14507 if (src_volatile || dst_volatile)
14508 return false;
14510 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14511 if (!(dst_aligned || src_aligned))
14512 return arm_gen_movmemqi (operands);
14514 /* If the either src or dst is unaligned we'll be accessing it as pairs
14515 of unaligned SImode accesses. Otherwise we can generate DImode
14516 ldrd/strd instructions. */
14517 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14518 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14520 while (len >= 8)
14522 len -= 8;
14523 reg0 = gen_reg_rtx (DImode);
14524 rtx low_reg = NULL_RTX;
14525 rtx hi_reg = NULL_RTX;
14527 if (!src_aligned || !dst_aligned)
14529 low_reg = gen_lowpart (SImode, reg0);
14530 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14532 if (src_aligned)
14533 emit_move_insn (reg0, src);
14534 else
14536 emit_insn (gen_unaligned_loadsi (low_reg, src));
14537 src = next_consecutive_mem (src);
14538 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14541 if (dst_aligned)
14542 emit_move_insn (dst, reg0);
14543 else
14545 emit_insn (gen_unaligned_storesi (dst, low_reg));
14546 dst = next_consecutive_mem (dst);
14547 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14550 src = next_consecutive_mem (src);
14551 dst = next_consecutive_mem (dst);
14554 gcc_assert (len < 8);
14555 if (len >= 4)
14557 /* More than a word but less than a double-word to copy. Copy a word. */
14558 reg0 = gen_reg_rtx (SImode);
14559 src = adjust_address (src, SImode, 0);
14560 dst = adjust_address (dst, SImode, 0);
14561 if (src_aligned)
14562 emit_move_insn (reg0, src);
14563 else
14564 emit_insn (gen_unaligned_loadsi (reg0, src));
14566 if (dst_aligned)
14567 emit_move_insn (dst, reg0);
14568 else
14569 emit_insn (gen_unaligned_storesi (dst, reg0));
14571 src = next_consecutive_mem (src);
14572 dst = next_consecutive_mem (dst);
14573 len -= 4;
14576 if (len == 0)
14577 return true;
14579 /* Copy the remaining bytes. */
14580 if (len >= 2)
14582 dst = adjust_address (dst, HImode, 0);
14583 src = adjust_address (src, HImode, 0);
14584 reg0 = gen_reg_rtx (SImode);
14585 if (src_aligned)
14586 emit_insn (gen_zero_extendhisi2 (reg0, src));
14587 else
14588 emit_insn (gen_unaligned_loadhiu (reg0, src));
14590 if (dst_aligned)
14591 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14592 else
14593 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14595 src = next_consecutive_mem (src);
14596 dst = next_consecutive_mem (dst);
14597 if (len == 2)
14598 return true;
14601 dst = adjust_address (dst, QImode, 0);
14602 src = adjust_address (src, QImode, 0);
14603 reg0 = gen_reg_rtx (QImode);
14604 emit_move_insn (reg0, src);
14605 emit_move_insn (dst, reg0);
14606 return true;
14609 /* Select a dominance comparison mode if possible for a test of the general
14610 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14611 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14612 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14613 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14614 In all cases OP will be either EQ or NE, but we don't need to know which
14615 here. If we are unable to support a dominance comparison we return
14616 CC mode. This will then fail to match for the RTL expressions that
14617 generate this call. */
14618 machine_mode
14619 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14621 enum rtx_code cond1, cond2;
14622 int swapped = 0;
14624 /* Currently we will probably get the wrong result if the individual
14625 comparisons are not simple. This also ensures that it is safe to
14626 reverse a comparison if necessary. */
14627 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14628 != CCmode)
14629 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14630 != CCmode))
14631 return CCmode;
14633 /* The if_then_else variant of this tests the second condition if the
14634 first passes, but is true if the first fails. Reverse the first
14635 condition to get a true "inclusive-or" expression. */
14636 if (cond_or == DOM_CC_NX_OR_Y)
14637 cond1 = reverse_condition (cond1);
14639 /* If the comparisons are not equal, and one doesn't dominate the other,
14640 then we can't do this. */
14641 if (cond1 != cond2
14642 && !comparison_dominates_p (cond1, cond2)
14643 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14644 return CCmode;
14646 if (swapped)
14647 std::swap (cond1, cond2);
14649 switch (cond1)
14651 case EQ:
14652 if (cond_or == DOM_CC_X_AND_Y)
14653 return CC_DEQmode;
14655 switch (cond2)
14657 case EQ: return CC_DEQmode;
14658 case LE: return CC_DLEmode;
14659 case LEU: return CC_DLEUmode;
14660 case GE: return CC_DGEmode;
14661 case GEU: return CC_DGEUmode;
14662 default: gcc_unreachable ();
14665 case LT:
14666 if (cond_or == DOM_CC_X_AND_Y)
14667 return CC_DLTmode;
14669 switch (cond2)
14671 case LT:
14672 return CC_DLTmode;
14673 case LE:
14674 return CC_DLEmode;
14675 case NE:
14676 return CC_DNEmode;
14677 default:
14678 gcc_unreachable ();
14681 case GT:
14682 if (cond_or == DOM_CC_X_AND_Y)
14683 return CC_DGTmode;
14685 switch (cond2)
14687 case GT:
14688 return CC_DGTmode;
14689 case GE:
14690 return CC_DGEmode;
14691 case NE:
14692 return CC_DNEmode;
14693 default:
14694 gcc_unreachable ();
14697 case LTU:
14698 if (cond_or == DOM_CC_X_AND_Y)
14699 return CC_DLTUmode;
14701 switch (cond2)
14703 case LTU:
14704 return CC_DLTUmode;
14705 case LEU:
14706 return CC_DLEUmode;
14707 case NE:
14708 return CC_DNEmode;
14709 default:
14710 gcc_unreachable ();
14713 case GTU:
14714 if (cond_or == DOM_CC_X_AND_Y)
14715 return CC_DGTUmode;
14717 switch (cond2)
14719 case GTU:
14720 return CC_DGTUmode;
14721 case GEU:
14722 return CC_DGEUmode;
14723 case NE:
14724 return CC_DNEmode;
14725 default:
14726 gcc_unreachable ();
14729 /* The remaining cases only occur when both comparisons are the
14730 same. */
14731 case NE:
14732 gcc_assert (cond1 == cond2);
14733 return CC_DNEmode;
14735 case LE:
14736 gcc_assert (cond1 == cond2);
14737 return CC_DLEmode;
14739 case GE:
14740 gcc_assert (cond1 == cond2);
14741 return CC_DGEmode;
14743 case LEU:
14744 gcc_assert (cond1 == cond2);
14745 return CC_DLEUmode;
14747 case GEU:
14748 gcc_assert (cond1 == cond2);
14749 return CC_DGEUmode;
14751 default:
14752 gcc_unreachable ();
14756 machine_mode
14757 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14759 /* All floating point compares return CCFP if it is an equality
14760 comparison, and CCFPE otherwise. */
14761 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14763 switch (op)
14765 case EQ:
14766 case NE:
14767 case UNORDERED:
14768 case ORDERED:
14769 case UNLT:
14770 case UNLE:
14771 case UNGT:
14772 case UNGE:
14773 case UNEQ:
14774 case LTGT:
14775 return CCFPmode;
14777 case LT:
14778 case LE:
14779 case GT:
14780 case GE:
14781 return CCFPEmode;
14783 default:
14784 gcc_unreachable ();
14788 /* A compare with a shifted operand. Because of canonicalization, the
14789 comparison will have to be swapped when we emit the assembler. */
14790 if (GET_MODE (y) == SImode
14791 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14792 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14793 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14794 || GET_CODE (x) == ROTATERT))
14795 return CC_SWPmode;
14797 /* This operation is performed swapped, but since we only rely on the Z
14798 flag we don't need an additional mode. */
14799 if (GET_MODE (y) == SImode
14800 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14801 && GET_CODE (x) == NEG
14802 && (op == EQ || op == NE))
14803 return CC_Zmode;
14805 /* This is a special case that is used by combine to allow a
14806 comparison of a shifted byte load to be split into a zero-extend
14807 followed by a comparison of the shifted integer (only valid for
14808 equalities and unsigned inequalities). */
14809 if (GET_MODE (x) == SImode
14810 && GET_CODE (x) == ASHIFT
14811 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14812 && GET_CODE (XEXP (x, 0)) == SUBREG
14813 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14814 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14815 && (op == EQ || op == NE
14816 || op == GEU || op == GTU || op == LTU || op == LEU)
14817 && CONST_INT_P (y))
14818 return CC_Zmode;
14820 /* A construct for a conditional compare, if the false arm contains
14821 0, then both conditions must be true, otherwise either condition
14822 must be true. Not all conditions are possible, so CCmode is
14823 returned if it can't be done. */
14824 if (GET_CODE (x) == IF_THEN_ELSE
14825 && (XEXP (x, 2) == const0_rtx
14826 || XEXP (x, 2) == const1_rtx)
14827 && COMPARISON_P (XEXP (x, 0))
14828 && COMPARISON_P (XEXP (x, 1)))
14829 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14830 INTVAL (XEXP (x, 2)));
14832 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14833 if (GET_CODE (x) == AND
14834 && (op == EQ || op == NE)
14835 && COMPARISON_P (XEXP (x, 0))
14836 && COMPARISON_P (XEXP (x, 1)))
14837 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14838 DOM_CC_X_AND_Y);
14840 if (GET_CODE (x) == IOR
14841 && (op == EQ || op == NE)
14842 && COMPARISON_P (XEXP (x, 0))
14843 && COMPARISON_P (XEXP (x, 1)))
14844 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14845 DOM_CC_X_OR_Y);
14847 /* An operation (on Thumb) where we want to test for a single bit.
14848 This is done by shifting that bit up into the top bit of a
14849 scratch register; we can then branch on the sign bit. */
14850 if (TARGET_THUMB1
14851 && GET_MODE (x) == SImode
14852 && (op == EQ || op == NE)
14853 && GET_CODE (x) == ZERO_EXTRACT
14854 && XEXP (x, 1) == const1_rtx)
14855 return CC_Nmode;
14857 /* An operation that sets the condition codes as a side-effect, the
14858 V flag is not set correctly, so we can only use comparisons where
14859 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14860 instead.) */
14861 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14862 if (GET_MODE (x) == SImode
14863 && y == const0_rtx
14864 && (op == EQ || op == NE || op == LT || op == GE)
14865 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14866 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14867 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14868 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14869 || GET_CODE (x) == LSHIFTRT
14870 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14871 || GET_CODE (x) == ROTATERT
14872 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14873 return CC_NOOVmode;
14875 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14876 return CC_Zmode;
14878 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14879 && GET_CODE (x) == PLUS
14880 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14881 return CC_Cmode;
14883 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14885 switch (op)
14887 case EQ:
14888 case NE:
14889 /* A DImode comparison against zero can be implemented by
14890 or'ing the two halves together. */
14891 if (y == const0_rtx)
14892 return CC_Zmode;
14894 /* We can do an equality test in three Thumb instructions. */
14895 if (!TARGET_32BIT)
14896 return CC_Zmode;
14898 /* FALLTHROUGH */
14900 case LTU:
14901 case LEU:
14902 case GTU:
14903 case GEU:
14904 /* DImode unsigned comparisons can be implemented by cmp +
14905 cmpeq without a scratch register. Not worth doing in
14906 Thumb-2. */
14907 if (TARGET_32BIT)
14908 return CC_CZmode;
14910 /* FALLTHROUGH */
14912 case LT:
14913 case LE:
14914 case GT:
14915 case GE:
14916 /* DImode signed and unsigned comparisons can be implemented
14917 by cmp + sbcs with a scratch register, but that does not
14918 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14919 gcc_assert (op != EQ && op != NE);
14920 return CC_NCVmode;
14922 default:
14923 gcc_unreachable ();
14927 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14928 return GET_MODE (x);
14930 return CCmode;
14933 /* X and Y are two things to compare using CODE. Emit the compare insn and
14934 return the rtx for register 0 in the proper mode. FP means this is a
14935 floating point compare: I don't think that it is needed on the arm. */
14937 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14939 machine_mode mode;
14940 rtx cc_reg;
14941 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14943 /* We might have X as a constant, Y as a register because of the predicates
14944 used for cmpdi. If so, force X to a register here. */
14945 if (dimode_comparison && !REG_P (x))
14946 x = force_reg (DImode, x);
14948 mode = SELECT_CC_MODE (code, x, y);
14949 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14951 if (dimode_comparison
14952 && mode != CC_CZmode)
14954 rtx clobber, set;
14956 /* To compare two non-zero values for equality, XOR them and
14957 then compare against zero. Not used for ARM mode; there
14958 CC_CZmode is cheaper. */
14959 if (mode == CC_Zmode && y != const0_rtx)
14961 gcc_assert (!reload_completed);
14962 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14963 y = const0_rtx;
14966 /* A scratch register is required. */
14967 if (reload_completed)
14968 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14969 else
14970 scratch = gen_rtx_SCRATCH (SImode);
14972 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14973 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14974 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14976 else
14977 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14979 return cc_reg;
14982 /* Generate a sequence of insns that will generate the correct return
14983 address mask depending on the physical architecture that the program
14984 is running on. */
14986 arm_gen_return_addr_mask (void)
14988 rtx reg = gen_reg_rtx (Pmode);
14990 emit_insn (gen_return_addr_mask (reg));
14991 return reg;
14994 void
14995 arm_reload_in_hi (rtx *operands)
14997 rtx ref = operands[1];
14998 rtx base, scratch;
14999 HOST_WIDE_INT offset = 0;
15001 if (GET_CODE (ref) == SUBREG)
15003 offset = SUBREG_BYTE (ref);
15004 ref = SUBREG_REG (ref);
15007 if (REG_P (ref))
15009 /* We have a pseudo which has been spilt onto the stack; there
15010 are two cases here: the first where there is a simple
15011 stack-slot replacement and a second where the stack-slot is
15012 out of range, or is used as a subreg. */
15013 if (reg_equiv_mem (REGNO (ref)))
15015 ref = reg_equiv_mem (REGNO (ref));
15016 base = find_replacement (&XEXP (ref, 0));
15018 else
15019 /* The slot is out of range, or was dressed up in a SUBREG. */
15020 base = reg_equiv_address (REGNO (ref));
15022 /* PR 62554: If there is no equivalent memory location then just move
15023 the value as an SImode register move. This happens when the target
15024 architecture variant does not have an HImode register move. */
15025 if (base == NULL)
15027 gcc_assert (REG_P (operands[0]));
15028 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15029 gen_rtx_SUBREG (SImode, ref, 0)));
15030 return;
15033 else
15034 base = find_replacement (&XEXP (ref, 0));
15036 /* Handle the case where the address is too complex to be offset by 1. */
15037 if (GET_CODE (base) == MINUS
15038 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15040 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15042 emit_set_insn (base_plus, base);
15043 base = base_plus;
15045 else if (GET_CODE (base) == PLUS)
15047 /* The addend must be CONST_INT, or we would have dealt with it above. */
15048 HOST_WIDE_INT hi, lo;
15050 offset += INTVAL (XEXP (base, 1));
15051 base = XEXP (base, 0);
15053 /* Rework the address into a legal sequence of insns. */
15054 /* Valid range for lo is -4095 -> 4095 */
15055 lo = (offset >= 0
15056 ? (offset & 0xfff)
15057 : -((-offset) & 0xfff));
15059 /* Corner case, if lo is the max offset then we would be out of range
15060 once we have added the additional 1 below, so bump the msb into the
15061 pre-loading insn(s). */
15062 if (lo == 4095)
15063 lo &= 0x7ff;
15065 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15066 ^ (HOST_WIDE_INT) 0x80000000)
15067 - (HOST_WIDE_INT) 0x80000000);
15069 gcc_assert (hi + lo == offset);
15071 if (hi != 0)
15073 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15075 /* Get the base address; addsi3 knows how to handle constants
15076 that require more than one insn. */
15077 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15078 base = base_plus;
15079 offset = lo;
15083 /* Operands[2] may overlap operands[0] (though it won't overlap
15084 operands[1]), that's why we asked for a DImode reg -- so we can
15085 use the bit that does not overlap. */
15086 if (REGNO (operands[2]) == REGNO (operands[0]))
15087 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15088 else
15089 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15091 emit_insn (gen_zero_extendqisi2 (scratch,
15092 gen_rtx_MEM (QImode,
15093 plus_constant (Pmode, base,
15094 offset))));
15095 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15096 gen_rtx_MEM (QImode,
15097 plus_constant (Pmode, base,
15098 offset + 1))));
15099 if (!BYTES_BIG_ENDIAN)
15100 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15101 gen_rtx_IOR (SImode,
15102 gen_rtx_ASHIFT
15103 (SImode,
15104 gen_rtx_SUBREG (SImode, operands[0], 0),
15105 GEN_INT (8)),
15106 scratch));
15107 else
15108 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15109 gen_rtx_IOR (SImode,
15110 gen_rtx_ASHIFT (SImode, scratch,
15111 GEN_INT (8)),
15112 gen_rtx_SUBREG (SImode, operands[0], 0)));
15115 /* Handle storing a half-word to memory during reload by synthesizing as two
15116 byte stores. Take care not to clobber the input values until after we
15117 have moved them somewhere safe. This code assumes that if the DImode
15118 scratch in operands[2] overlaps either the input value or output address
15119 in some way, then that value must die in this insn (we absolutely need
15120 two scratch registers for some corner cases). */
15121 void
15122 arm_reload_out_hi (rtx *operands)
15124 rtx ref = operands[0];
15125 rtx outval = operands[1];
15126 rtx base, scratch;
15127 HOST_WIDE_INT offset = 0;
15129 if (GET_CODE (ref) == SUBREG)
15131 offset = SUBREG_BYTE (ref);
15132 ref = SUBREG_REG (ref);
15135 if (REG_P (ref))
15137 /* We have a pseudo which has been spilt onto the stack; there
15138 are two cases here: the first where there is a simple
15139 stack-slot replacement and a second where the stack-slot is
15140 out of range, or is used as a subreg. */
15141 if (reg_equiv_mem (REGNO (ref)))
15143 ref = reg_equiv_mem (REGNO (ref));
15144 base = find_replacement (&XEXP (ref, 0));
15146 else
15147 /* The slot is out of range, or was dressed up in a SUBREG. */
15148 base = reg_equiv_address (REGNO (ref));
15150 /* PR 62254: If there is no equivalent memory location then just move
15151 the value as an SImode register move. This happens when the target
15152 architecture variant does not have an HImode register move. */
15153 if (base == NULL)
15155 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15157 if (REG_P (outval))
15159 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15160 gen_rtx_SUBREG (SImode, outval, 0)));
15162 else /* SUBREG_P (outval) */
15164 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15165 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15166 SUBREG_REG (outval)));
15167 else
15168 /* FIXME: Handle other cases ? */
15169 gcc_unreachable ();
15171 return;
15174 else
15175 base = find_replacement (&XEXP (ref, 0));
15177 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15179 /* Handle the case where the address is too complex to be offset by 1. */
15180 if (GET_CODE (base) == MINUS
15181 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15183 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15185 /* Be careful not to destroy OUTVAL. */
15186 if (reg_overlap_mentioned_p (base_plus, outval))
15188 /* Updating base_plus might destroy outval, see if we can
15189 swap the scratch and base_plus. */
15190 if (!reg_overlap_mentioned_p (scratch, outval))
15191 std::swap (scratch, base_plus);
15192 else
15194 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15196 /* Be conservative and copy OUTVAL into the scratch now,
15197 this should only be necessary if outval is a subreg
15198 of something larger than a word. */
15199 /* XXX Might this clobber base? I can't see how it can,
15200 since scratch is known to overlap with OUTVAL, and
15201 must be wider than a word. */
15202 emit_insn (gen_movhi (scratch_hi, outval));
15203 outval = scratch_hi;
15207 emit_set_insn (base_plus, base);
15208 base = base_plus;
15210 else if (GET_CODE (base) == PLUS)
15212 /* The addend must be CONST_INT, or we would have dealt with it above. */
15213 HOST_WIDE_INT hi, lo;
15215 offset += INTVAL (XEXP (base, 1));
15216 base = XEXP (base, 0);
15218 /* Rework the address into a legal sequence of insns. */
15219 /* Valid range for lo is -4095 -> 4095 */
15220 lo = (offset >= 0
15221 ? (offset & 0xfff)
15222 : -((-offset) & 0xfff));
15224 /* Corner case, if lo is the max offset then we would be out of range
15225 once we have added the additional 1 below, so bump the msb into the
15226 pre-loading insn(s). */
15227 if (lo == 4095)
15228 lo &= 0x7ff;
15230 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15231 ^ (HOST_WIDE_INT) 0x80000000)
15232 - (HOST_WIDE_INT) 0x80000000);
15234 gcc_assert (hi + lo == offset);
15236 if (hi != 0)
15238 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15240 /* Be careful not to destroy OUTVAL. */
15241 if (reg_overlap_mentioned_p (base_plus, outval))
15243 /* Updating base_plus might destroy outval, see if we
15244 can swap the scratch and base_plus. */
15245 if (!reg_overlap_mentioned_p (scratch, outval))
15246 std::swap (scratch, base_plus);
15247 else
15249 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15251 /* Be conservative and copy outval into scratch now,
15252 this should only be necessary if outval is a
15253 subreg of something larger than a word. */
15254 /* XXX Might this clobber base? I can't see how it
15255 can, since scratch is known to overlap with
15256 outval. */
15257 emit_insn (gen_movhi (scratch_hi, outval));
15258 outval = scratch_hi;
15262 /* Get the base address; addsi3 knows how to handle constants
15263 that require more than one insn. */
15264 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15265 base = base_plus;
15266 offset = lo;
15270 if (BYTES_BIG_ENDIAN)
15272 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15273 plus_constant (Pmode, base,
15274 offset + 1)),
15275 gen_lowpart (QImode, outval)));
15276 emit_insn (gen_lshrsi3 (scratch,
15277 gen_rtx_SUBREG (SImode, outval, 0),
15278 GEN_INT (8)));
15279 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15280 offset)),
15281 gen_lowpart (QImode, scratch)));
15283 else
15285 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15286 offset)),
15287 gen_lowpart (QImode, outval)));
15288 emit_insn (gen_lshrsi3 (scratch,
15289 gen_rtx_SUBREG (SImode, outval, 0),
15290 GEN_INT (8)));
15291 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15292 plus_constant (Pmode, base,
15293 offset + 1)),
15294 gen_lowpart (QImode, scratch)));
15298 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15299 (padded to the size of a word) should be passed in a register. */
15301 static bool
15302 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15304 if (TARGET_AAPCS_BASED)
15305 return must_pass_in_stack_var_size (mode, type);
15306 else
15307 return must_pass_in_stack_var_size_or_pad (mode, type);
15311 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15312 byte of a stack argument has useful data. For legacy APCS ABIs we use
15313 the default. For AAPCS based ABIs small aggregate types are placed
15314 in the lowest memory address. */
15316 static pad_direction
15317 arm_function_arg_padding (machine_mode mode, const_tree type)
15319 if (!TARGET_AAPCS_BASED)
15320 return default_function_arg_padding (mode, type);
15322 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15323 return PAD_DOWNWARD;
15325 return PAD_UPWARD;
15329 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15330 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15331 register has useful data, and return the opposite if the most
15332 significant byte does. */
15334 bool
15335 arm_pad_reg_upward (machine_mode mode,
15336 tree type, int first ATTRIBUTE_UNUSED)
15338 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15340 /* For AAPCS, small aggregates, small fixed-point types,
15341 and small complex types are always padded upwards. */
15342 if (type)
15344 if ((AGGREGATE_TYPE_P (type)
15345 || TREE_CODE (type) == COMPLEX_TYPE
15346 || FIXED_POINT_TYPE_P (type))
15347 && int_size_in_bytes (type) <= 4)
15348 return true;
15350 else
15352 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15353 && GET_MODE_SIZE (mode) <= 4)
15354 return true;
15358 /* Otherwise, use default padding. */
15359 return !BYTES_BIG_ENDIAN;
15362 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15363 assuming that the address in the base register is word aligned. */
15364 bool
15365 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15367 HOST_WIDE_INT max_offset;
15369 /* Offset must be a multiple of 4 in Thumb mode. */
15370 if (TARGET_THUMB2 && ((offset & 3) != 0))
15371 return false;
15373 if (TARGET_THUMB2)
15374 max_offset = 1020;
15375 else if (TARGET_ARM)
15376 max_offset = 255;
15377 else
15378 return false;
15380 return ((offset <= max_offset) && (offset >= -max_offset));
15383 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15384 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15385 Assumes that the address in the base register RN is word aligned. Pattern
15386 guarantees that both memory accesses use the same base register,
15387 the offsets are constants within the range, and the gap between the offsets is 4.
15388 If preload complete then check that registers are legal. WBACK indicates whether
15389 address is updated. LOAD indicates whether memory access is load or store. */
15390 bool
15391 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15392 bool wback, bool load)
15394 unsigned int t, t2, n;
15396 if (!reload_completed)
15397 return true;
15399 if (!offset_ok_for_ldrd_strd (offset))
15400 return false;
15402 t = REGNO (rt);
15403 t2 = REGNO (rt2);
15404 n = REGNO (rn);
15406 if ((TARGET_THUMB2)
15407 && ((wback && (n == t || n == t2))
15408 || (t == SP_REGNUM)
15409 || (t == PC_REGNUM)
15410 || (t2 == SP_REGNUM)
15411 || (t2 == PC_REGNUM)
15412 || (!load && (n == PC_REGNUM))
15413 || (load && (t == t2))
15414 /* Triggers Cortex-M3 LDRD errata. */
15415 || (!wback && load && fix_cm3_ldrd && (n == t))))
15416 return false;
15418 if ((TARGET_ARM)
15419 && ((wback && (n == t || n == t2))
15420 || (t2 == PC_REGNUM)
15421 || (t % 2 != 0) /* First destination register is not even. */
15422 || (t2 != t + 1)
15423 /* PC can be used as base register (for offset addressing only),
15424 but it is depricated. */
15425 || (n == PC_REGNUM)))
15426 return false;
15428 return true;
15431 /* Return true if a 64-bit access with alignment ALIGN and with a
15432 constant offset OFFSET from the base pointer is permitted on this
15433 architecture. */
15434 static bool
15435 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15437 return (unaligned_access
15438 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15439 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15442 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15443 operand MEM's address contains an immediate offset from the base
15444 register and has no side effects, in which case it sets BASE,
15445 OFFSET and ALIGN accordingly. */
15446 static bool
15447 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15449 rtx addr;
15451 gcc_assert (base != NULL && offset != NULL);
15453 /* TODO: Handle more general memory operand patterns, such as
15454 PRE_DEC and PRE_INC. */
15456 if (side_effects_p (mem))
15457 return false;
15459 /* Can't deal with subregs. */
15460 if (GET_CODE (mem) == SUBREG)
15461 return false;
15463 gcc_assert (MEM_P (mem));
15465 *offset = const0_rtx;
15466 *align = MEM_ALIGN (mem);
15468 addr = XEXP (mem, 0);
15470 /* If addr isn't valid for DImode, then we can't handle it. */
15471 if (!arm_legitimate_address_p (DImode, addr,
15472 reload_in_progress || reload_completed))
15473 return false;
15475 if (REG_P (addr))
15477 *base = addr;
15478 return true;
15480 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15482 *base = XEXP (addr, 0);
15483 *offset = XEXP (addr, 1);
15484 return (REG_P (*base) && CONST_INT_P (*offset));
15487 return false;
15490 /* Called from a peephole2 to replace two word-size accesses with a
15491 single LDRD/STRD instruction. Returns true iff we can generate a
15492 new instruction sequence. That is, both accesses use the same base
15493 register and the gap between constant offsets is 4. This function
15494 may reorder its operands to match ldrd/strd RTL templates.
15495 OPERANDS are the operands found by the peephole matcher;
15496 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15497 corresponding memory operands. LOAD indicaates whether the access
15498 is load or store. CONST_STORE indicates a store of constant
15499 integer values held in OPERANDS[4,5] and assumes that the pattern
15500 is of length 4 insn, for the purpose of checking dead registers.
15501 COMMUTE indicates that register operands may be reordered. */
15502 bool
15503 gen_operands_ldrd_strd (rtx *operands, bool load,
15504 bool const_store, bool commute)
15506 int nops = 2;
15507 HOST_WIDE_INT offsets[2], offset, align[2];
15508 rtx base = NULL_RTX;
15509 rtx cur_base, cur_offset, tmp;
15510 int i, gap;
15511 HARD_REG_SET regset;
15513 gcc_assert (!const_store || !load);
15514 /* Check that the memory references are immediate offsets from the
15515 same base register. Extract the base register, the destination
15516 registers, and the corresponding memory offsets. */
15517 for (i = 0; i < nops; i++)
15519 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15520 &align[i]))
15521 return false;
15523 if (i == 0)
15524 base = cur_base;
15525 else if (REGNO (base) != REGNO (cur_base))
15526 return false;
15528 offsets[i] = INTVAL (cur_offset);
15529 if (GET_CODE (operands[i]) == SUBREG)
15531 tmp = SUBREG_REG (operands[i]);
15532 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15533 operands[i] = tmp;
15537 /* Make sure there is no dependency between the individual loads. */
15538 if (load && REGNO (operands[0]) == REGNO (base))
15539 return false; /* RAW */
15541 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15542 return false; /* WAW */
15544 /* If the same input register is used in both stores
15545 when storing different constants, try to find a free register.
15546 For example, the code
15547 mov r0, 0
15548 str r0, [r2]
15549 mov r0, 1
15550 str r0, [r2, #4]
15551 can be transformed into
15552 mov r1, 0
15553 mov r0, 1
15554 strd r1, r0, [r2]
15555 in Thumb mode assuming that r1 is free.
15556 For ARM mode do the same but only if the starting register
15557 can be made to be even. */
15558 if (const_store
15559 && REGNO (operands[0]) == REGNO (operands[1])
15560 && INTVAL (operands[4]) != INTVAL (operands[5]))
15562 if (TARGET_THUMB2)
15564 CLEAR_HARD_REG_SET (regset);
15565 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15566 if (tmp == NULL_RTX)
15567 return false;
15569 /* Use the new register in the first load to ensure that
15570 if the original input register is not dead after peephole,
15571 then it will have the correct constant value. */
15572 operands[0] = tmp;
15574 else if (TARGET_ARM)
15576 int regno = REGNO (operands[0]);
15577 if (!peep2_reg_dead_p (4, operands[0]))
15579 /* When the input register is even and is not dead after the
15580 pattern, it has to hold the second constant but we cannot
15581 form a legal STRD in ARM mode with this register as the second
15582 register. */
15583 if (regno % 2 == 0)
15584 return false;
15586 /* Is regno-1 free? */
15587 SET_HARD_REG_SET (regset);
15588 CLEAR_HARD_REG_BIT(regset, regno - 1);
15589 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15590 if (tmp == NULL_RTX)
15591 return false;
15593 operands[0] = tmp;
15595 else
15597 /* Find a DImode register. */
15598 CLEAR_HARD_REG_SET (regset);
15599 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15600 if (tmp != NULL_RTX)
15602 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15603 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15605 else
15607 /* Can we use the input register to form a DI register? */
15608 SET_HARD_REG_SET (regset);
15609 CLEAR_HARD_REG_BIT(regset,
15610 regno % 2 == 0 ? regno + 1 : regno - 1);
15611 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15612 if (tmp == NULL_RTX)
15613 return false;
15614 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15618 gcc_assert (operands[0] != NULL_RTX);
15619 gcc_assert (operands[1] != NULL_RTX);
15620 gcc_assert (REGNO (operands[0]) % 2 == 0);
15621 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15625 /* Make sure the instructions are ordered with lower memory access first. */
15626 if (offsets[0] > offsets[1])
15628 gap = offsets[0] - offsets[1];
15629 offset = offsets[1];
15631 /* Swap the instructions such that lower memory is accessed first. */
15632 std::swap (operands[0], operands[1]);
15633 std::swap (operands[2], operands[3]);
15634 std::swap (align[0], align[1]);
15635 if (const_store)
15636 std::swap (operands[4], operands[5]);
15638 else
15640 gap = offsets[1] - offsets[0];
15641 offset = offsets[0];
15644 /* Make sure accesses are to consecutive memory locations. */
15645 if (gap != 4)
15646 return false;
15648 if (!align_ok_ldrd_strd (align[0], offset))
15649 return false;
15651 /* Make sure we generate legal instructions. */
15652 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15653 false, load))
15654 return true;
15656 /* In Thumb state, where registers are almost unconstrained, there
15657 is little hope to fix it. */
15658 if (TARGET_THUMB2)
15659 return false;
15661 if (load && commute)
15663 /* Try reordering registers. */
15664 std::swap (operands[0], operands[1]);
15665 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15666 false, load))
15667 return true;
15670 if (const_store)
15672 /* If input registers are dead after this pattern, they can be
15673 reordered or replaced by other registers that are free in the
15674 current pattern. */
15675 if (!peep2_reg_dead_p (4, operands[0])
15676 || !peep2_reg_dead_p (4, operands[1]))
15677 return false;
15679 /* Try to reorder the input registers. */
15680 /* For example, the code
15681 mov r0, 0
15682 mov r1, 1
15683 str r1, [r2]
15684 str r0, [r2, #4]
15685 can be transformed into
15686 mov r1, 0
15687 mov r0, 1
15688 strd r0, [r2]
15690 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15691 false, false))
15693 std::swap (operands[0], operands[1]);
15694 return true;
15697 /* Try to find a free DI register. */
15698 CLEAR_HARD_REG_SET (regset);
15699 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15700 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15701 while (true)
15703 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15704 if (tmp == NULL_RTX)
15705 return false;
15707 /* DREG must be an even-numbered register in DImode.
15708 Split it into SI registers. */
15709 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15710 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15711 gcc_assert (operands[0] != NULL_RTX);
15712 gcc_assert (operands[1] != NULL_RTX);
15713 gcc_assert (REGNO (operands[0]) % 2 == 0);
15714 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15716 return (operands_ok_ldrd_strd (operands[0], operands[1],
15717 base, offset,
15718 false, load));
15722 return false;
15728 /* Print a symbolic form of X to the debug file, F. */
15729 static void
15730 arm_print_value (FILE *f, rtx x)
15732 switch (GET_CODE (x))
15734 case CONST_INT:
15735 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15736 return;
15738 case CONST_DOUBLE:
15739 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15740 return;
15742 case CONST_VECTOR:
15744 int i;
15746 fprintf (f, "<");
15747 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15749 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15750 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15751 fputc (',', f);
15753 fprintf (f, ">");
15755 return;
15757 case CONST_STRING:
15758 fprintf (f, "\"%s\"", XSTR (x, 0));
15759 return;
15761 case SYMBOL_REF:
15762 fprintf (f, "`%s'", XSTR (x, 0));
15763 return;
15765 case LABEL_REF:
15766 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15767 return;
15769 case CONST:
15770 arm_print_value (f, XEXP (x, 0));
15771 return;
15773 case PLUS:
15774 arm_print_value (f, XEXP (x, 0));
15775 fprintf (f, "+");
15776 arm_print_value (f, XEXP (x, 1));
15777 return;
15779 case PC:
15780 fprintf (f, "pc");
15781 return;
15783 default:
15784 fprintf (f, "????");
15785 return;
15789 /* Routines for manipulation of the constant pool. */
15791 /* Arm instructions cannot load a large constant directly into a
15792 register; they have to come from a pc relative load. The constant
15793 must therefore be placed in the addressable range of the pc
15794 relative load. Depending on the precise pc relative load
15795 instruction the range is somewhere between 256 bytes and 4k. This
15796 means that we often have to dump a constant inside a function, and
15797 generate code to branch around it.
15799 It is important to minimize this, since the branches will slow
15800 things down and make the code larger.
15802 Normally we can hide the table after an existing unconditional
15803 branch so that there is no interruption of the flow, but in the
15804 worst case the code looks like this:
15806 ldr rn, L1
15808 b L2
15809 align
15810 L1: .long value
15814 ldr rn, L3
15816 b L4
15817 align
15818 L3: .long value
15822 We fix this by performing a scan after scheduling, which notices
15823 which instructions need to have their operands fetched from the
15824 constant table and builds the table.
15826 The algorithm starts by building a table of all the constants that
15827 need fixing up and all the natural barriers in the function (places
15828 where a constant table can be dropped without breaking the flow).
15829 For each fixup we note how far the pc-relative replacement will be
15830 able to reach and the offset of the instruction into the function.
15832 Having built the table we then group the fixes together to form
15833 tables that are as large as possible (subject to addressing
15834 constraints) and emit each table of constants after the last
15835 barrier that is within range of all the instructions in the group.
15836 If a group does not contain a barrier, then we forcibly create one
15837 by inserting a jump instruction into the flow. Once the table has
15838 been inserted, the insns are then modified to reference the
15839 relevant entry in the pool.
15841 Possible enhancements to the algorithm (not implemented) are:
15843 1) For some processors and object formats, there may be benefit in
15844 aligning the pools to the start of cache lines; this alignment
15845 would need to be taken into account when calculating addressability
15846 of a pool. */
15848 /* These typedefs are located at the start of this file, so that
15849 they can be used in the prototypes there. This comment is to
15850 remind readers of that fact so that the following structures
15851 can be understood more easily.
15853 typedef struct minipool_node Mnode;
15854 typedef struct minipool_fixup Mfix; */
15856 struct minipool_node
15858 /* Doubly linked chain of entries. */
15859 Mnode * next;
15860 Mnode * prev;
15861 /* The maximum offset into the code that this entry can be placed. While
15862 pushing fixes for forward references, all entries are sorted in order
15863 of increasing max_address. */
15864 HOST_WIDE_INT max_address;
15865 /* Similarly for an entry inserted for a backwards ref. */
15866 HOST_WIDE_INT min_address;
15867 /* The number of fixes referencing this entry. This can become zero
15868 if we "unpush" an entry. In this case we ignore the entry when we
15869 come to emit the code. */
15870 int refcount;
15871 /* The offset from the start of the minipool. */
15872 HOST_WIDE_INT offset;
15873 /* The value in table. */
15874 rtx value;
15875 /* The mode of value. */
15876 machine_mode mode;
15877 /* The size of the value. With iWMMXt enabled
15878 sizes > 4 also imply an alignment of 8-bytes. */
15879 int fix_size;
15882 struct minipool_fixup
15884 Mfix * next;
15885 rtx_insn * insn;
15886 HOST_WIDE_INT address;
15887 rtx * loc;
15888 machine_mode mode;
15889 int fix_size;
15890 rtx value;
15891 Mnode * minipool;
15892 HOST_WIDE_INT forwards;
15893 HOST_WIDE_INT backwards;
15896 /* Fixes less than a word need padding out to a word boundary. */
15897 #define MINIPOOL_FIX_SIZE(mode) \
15898 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15900 static Mnode * minipool_vector_head;
15901 static Mnode * minipool_vector_tail;
15902 static rtx_code_label *minipool_vector_label;
15903 static int minipool_pad;
15905 /* The linked list of all minipool fixes required for this function. */
15906 Mfix * minipool_fix_head;
15907 Mfix * minipool_fix_tail;
15908 /* The fix entry for the current minipool, once it has been placed. */
15909 Mfix * minipool_barrier;
15911 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15912 #define JUMP_TABLES_IN_TEXT_SECTION 0
15913 #endif
15915 static HOST_WIDE_INT
15916 get_jump_table_size (rtx_jump_table_data *insn)
15918 /* ADDR_VECs only take room if read-only data does into the text
15919 section. */
15920 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15922 rtx body = PATTERN (insn);
15923 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15924 HOST_WIDE_INT size;
15925 HOST_WIDE_INT modesize;
15927 modesize = GET_MODE_SIZE (GET_MODE (body));
15928 size = modesize * XVECLEN (body, elt);
15929 switch (modesize)
15931 case 1:
15932 /* Round up size of TBB table to a halfword boundary. */
15933 size = (size + 1) & ~HOST_WIDE_INT_1;
15934 break;
15935 case 2:
15936 /* No padding necessary for TBH. */
15937 break;
15938 case 4:
15939 /* Add two bytes for alignment on Thumb. */
15940 if (TARGET_THUMB)
15941 size += 2;
15942 break;
15943 default:
15944 gcc_unreachable ();
15946 return size;
15949 return 0;
15952 /* Return the maximum amount of padding that will be inserted before
15953 label LABEL. */
15955 static HOST_WIDE_INT
15956 get_label_padding (rtx label)
15958 HOST_WIDE_INT align, min_insn_size;
15960 align = 1 << label_to_alignment (label);
15961 min_insn_size = TARGET_THUMB ? 2 : 4;
15962 return align > min_insn_size ? align - min_insn_size : 0;
15965 /* Move a minipool fix MP from its current location to before MAX_MP.
15966 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15967 constraints may need updating. */
15968 static Mnode *
15969 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15970 HOST_WIDE_INT max_address)
15972 /* The code below assumes these are different. */
15973 gcc_assert (mp != max_mp);
15975 if (max_mp == NULL)
15977 if (max_address < mp->max_address)
15978 mp->max_address = max_address;
15980 else
15982 if (max_address > max_mp->max_address - mp->fix_size)
15983 mp->max_address = max_mp->max_address - mp->fix_size;
15984 else
15985 mp->max_address = max_address;
15987 /* Unlink MP from its current position. Since max_mp is non-null,
15988 mp->prev must be non-null. */
15989 mp->prev->next = mp->next;
15990 if (mp->next != NULL)
15991 mp->next->prev = mp->prev;
15992 else
15993 minipool_vector_tail = mp->prev;
15995 /* Re-insert it before MAX_MP. */
15996 mp->next = max_mp;
15997 mp->prev = max_mp->prev;
15998 max_mp->prev = mp;
16000 if (mp->prev != NULL)
16001 mp->prev->next = mp;
16002 else
16003 minipool_vector_head = mp;
16006 /* Save the new entry. */
16007 max_mp = mp;
16009 /* Scan over the preceding entries and adjust their addresses as
16010 required. */
16011 while (mp->prev != NULL
16012 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16014 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16015 mp = mp->prev;
16018 return max_mp;
16021 /* Add a constant to the minipool for a forward reference. Returns the
16022 node added or NULL if the constant will not fit in this pool. */
16023 static Mnode *
16024 add_minipool_forward_ref (Mfix *fix)
16026 /* If set, max_mp is the first pool_entry that has a lower
16027 constraint than the one we are trying to add. */
16028 Mnode * max_mp = NULL;
16029 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16030 Mnode * mp;
16032 /* If the minipool starts before the end of FIX->INSN then this FIX
16033 can not be placed into the current pool. Furthermore, adding the
16034 new constant pool entry may cause the pool to start FIX_SIZE bytes
16035 earlier. */
16036 if (minipool_vector_head &&
16037 (fix->address + get_attr_length (fix->insn)
16038 >= minipool_vector_head->max_address - fix->fix_size))
16039 return NULL;
16041 /* Scan the pool to see if a constant with the same value has
16042 already been added. While we are doing this, also note the
16043 location where we must insert the constant if it doesn't already
16044 exist. */
16045 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16047 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16048 && fix->mode == mp->mode
16049 && (!LABEL_P (fix->value)
16050 || (CODE_LABEL_NUMBER (fix->value)
16051 == CODE_LABEL_NUMBER (mp->value)))
16052 && rtx_equal_p (fix->value, mp->value))
16054 /* More than one fix references this entry. */
16055 mp->refcount++;
16056 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16059 /* Note the insertion point if necessary. */
16060 if (max_mp == NULL
16061 && mp->max_address > max_address)
16062 max_mp = mp;
16064 /* If we are inserting an 8-bytes aligned quantity and
16065 we have not already found an insertion point, then
16066 make sure that all such 8-byte aligned quantities are
16067 placed at the start of the pool. */
16068 if (ARM_DOUBLEWORD_ALIGN
16069 && max_mp == NULL
16070 && fix->fix_size >= 8
16071 && mp->fix_size < 8)
16073 max_mp = mp;
16074 max_address = mp->max_address;
16078 /* The value is not currently in the minipool, so we need to create
16079 a new entry for it. If MAX_MP is NULL, the entry will be put on
16080 the end of the list since the placement is less constrained than
16081 any existing entry. Otherwise, we insert the new fix before
16082 MAX_MP and, if necessary, adjust the constraints on the other
16083 entries. */
16084 mp = XNEW (Mnode);
16085 mp->fix_size = fix->fix_size;
16086 mp->mode = fix->mode;
16087 mp->value = fix->value;
16088 mp->refcount = 1;
16089 /* Not yet required for a backwards ref. */
16090 mp->min_address = -65536;
16092 if (max_mp == NULL)
16094 mp->max_address = max_address;
16095 mp->next = NULL;
16096 mp->prev = minipool_vector_tail;
16098 if (mp->prev == NULL)
16100 minipool_vector_head = mp;
16101 minipool_vector_label = gen_label_rtx ();
16103 else
16104 mp->prev->next = mp;
16106 minipool_vector_tail = mp;
16108 else
16110 if (max_address > max_mp->max_address - mp->fix_size)
16111 mp->max_address = max_mp->max_address - mp->fix_size;
16112 else
16113 mp->max_address = max_address;
16115 mp->next = max_mp;
16116 mp->prev = max_mp->prev;
16117 max_mp->prev = mp;
16118 if (mp->prev != NULL)
16119 mp->prev->next = mp;
16120 else
16121 minipool_vector_head = mp;
16124 /* Save the new entry. */
16125 max_mp = mp;
16127 /* Scan over the preceding entries and adjust their addresses as
16128 required. */
16129 while (mp->prev != NULL
16130 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16132 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16133 mp = mp->prev;
16136 return max_mp;
16139 static Mnode *
16140 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16141 HOST_WIDE_INT min_address)
16143 HOST_WIDE_INT offset;
16145 /* The code below assumes these are different. */
16146 gcc_assert (mp != min_mp);
16148 if (min_mp == NULL)
16150 if (min_address > mp->min_address)
16151 mp->min_address = min_address;
16153 else
16155 /* We will adjust this below if it is too loose. */
16156 mp->min_address = min_address;
16158 /* Unlink MP from its current position. Since min_mp is non-null,
16159 mp->next must be non-null. */
16160 mp->next->prev = mp->prev;
16161 if (mp->prev != NULL)
16162 mp->prev->next = mp->next;
16163 else
16164 minipool_vector_head = mp->next;
16166 /* Reinsert it after MIN_MP. */
16167 mp->prev = min_mp;
16168 mp->next = min_mp->next;
16169 min_mp->next = mp;
16170 if (mp->next != NULL)
16171 mp->next->prev = mp;
16172 else
16173 minipool_vector_tail = mp;
16176 min_mp = mp;
16178 offset = 0;
16179 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16181 mp->offset = offset;
16182 if (mp->refcount > 0)
16183 offset += mp->fix_size;
16185 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16186 mp->next->min_address = mp->min_address + mp->fix_size;
16189 return min_mp;
16192 /* Add a constant to the minipool for a backward reference. Returns the
16193 node added or NULL if the constant will not fit in this pool.
16195 Note that the code for insertion for a backwards reference can be
16196 somewhat confusing because the calculated offsets for each fix do
16197 not take into account the size of the pool (which is still under
16198 construction. */
16199 static Mnode *
16200 add_minipool_backward_ref (Mfix *fix)
16202 /* If set, min_mp is the last pool_entry that has a lower constraint
16203 than the one we are trying to add. */
16204 Mnode *min_mp = NULL;
16205 /* This can be negative, since it is only a constraint. */
16206 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16207 Mnode *mp;
16209 /* If we can't reach the current pool from this insn, or if we can't
16210 insert this entry at the end of the pool without pushing other
16211 fixes out of range, then we don't try. This ensures that we
16212 can't fail later on. */
16213 if (min_address >= minipool_barrier->address
16214 || (minipool_vector_tail->min_address + fix->fix_size
16215 >= minipool_barrier->address))
16216 return NULL;
16218 /* Scan the pool to see if a constant with the same value has
16219 already been added. While we are doing this, also note the
16220 location where we must insert the constant if it doesn't already
16221 exist. */
16222 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16224 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16225 && fix->mode == mp->mode
16226 && (!LABEL_P (fix->value)
16227 || (CODE_LABEL_NUMBER (fix->value)
16228 == CODE_LABEL_NUMBER (mp->value)))
16229 && rtx_equal_p (fix->value, mp->value)
16230 /* Check that there is enough slack to move this entry to the
16231 end of the table (this is conservative). */
16232 && (mp->max_address
16233 > (minipool_barrier->address
16234 + minipool_vector_tail->offset
16235 + minipool_vector_tail->fix_size)))
16237 mp->refcount++;
16238 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16241 if (min_mp != NULL)
16242 mp->min_address += fix->fix_size;
16243 else
16245 /* Note the insertion point if necessary. */
16246 if (mp->min_address < min_address)
16248 /* For now, we do not allow the insertion of 8-byte alignment
16249 requiring nodes anywhere but at the start of the pool. */
16250 if (ARM_DOUBLEWORD_ALIGN
16251 && fix->fix_size >= 8 && mp->fix_size < 8)
16252 return NULL;
16253 else
16254 min_mp = mp;
16256 else if (mp->max_address
16257 < minipool_barrier->address + mp->offset + fix->fix_size)
16259 /* Inserting before this entry would push the fix beyond
16260 its maximum address (which can happen if we have
16261 re-located a forwards fix); force the new fix to come
16262 after it. */
16263 if (ARM_DOUBLEWORD_ALIGN
16264 && fix->fix_size >= 8 && mp->fix_size < 8)
16265 return NULL;
16266 else
16268 min_mp = mp;
16269 min_address = mp->min_address + fix->fix_size;
16272 /* Do not insert a non-8-byte aligned quantity before 8-byte
16273 aligned quantities. */
16274 else if (ARM_DOUBLEWORD_ALIGN
16275 && fix->fix_size < 8
16276 && mp->fix_size >= 8)
16278 min_mp = mp;
16279 min_address = mp->min_address + fix->fix_size;
16284 /* We need to create a new entry. */
16285 mp = XNEW (Mnode);
16286 mp->fix_size = fix->fix_size;
16287 mp->mode = fix->mode;
16288 mp->value = fix->value;
16289 mp->refcount = 1;
16290 mp->max_address = minipool_barrier->address + 65536;
16292 mp->min_address = min_address;
16294 if (min_mp == NULL)
16296 mp->prev = NULL;
16297 mp->next = minipool_vector_head;
16299 if (mp->next == NULL)
16301 minipool_vector_tail = mp;
16302 minipool_vector_label = gen_label_rtx ();
16304 else
16305 mp->next->prev = mp;
16307 minipool_vector_head = mp;
16309 else
16311 mp->next = min_mp->next;
16312 mp->prev = min_mp;
16313 min_mp->next = mp;
16315 if (mp->next != NULL)
16316 mp->next->prev = mp;
16317 else
16318 minipool_vector_tail = mp;
16321 /* Save the new entry. */
16322 min_mp = mp;
16324 if (mp->prev)
16325 mp = mp->prev;
16326 else
16327 mp->offset = 0;
16329 /* Scan over the following entries and adjust their offsets. */
16330 while (mp->next != NULL)
16332 if (mp->next->min_address < mp->min_address + mp->fix_size)
16333 mp->next->min_address = mp->min_address + mp->fix_size;
16335 if (mp->refcount)
16336 mp->next->offset = mp->offset + mp->fix_size;
16337 else
16338 mp->next->offset = mp->offset;
16340 mp = mp->next;
16343 return min_mp;
16346 static void
16347 assign_minipool_offsets (Mfix *barrier)
16349 HOST_WIDE_INT offset = 0;
16350 Mnode *mp;
16352 minipool_barrier = barrier;
16354 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16356 mp->offset = offset;
16358 if (mp->refcount > 0)
16359 offset += mp->fix_size;
16363 /* Output the literal table */
16364 static void
16365 dump_minipool (rtx_insn *scan)
16367 Mnode * mp;
16368 Mnode * nmp;
16369 int align64 = 0;
16371 if (ARM_DOUBLEWORD_ALIGN)
16372 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16373 if (mp->refcount > 0 && mp->fix_size >= 8)
16375 align64 = 1;
16376 break;
16379 if (dump_file)
16380 fprintf (dump_file,
16381 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16382 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16384 scan = emit_label_after (gen_label_rtx (), scan);
16385 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16386 scan = emit_label_after (minipool_vector_label, scan);
16388 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16390 if (mp->refcount > 0)
16392 if (dump_file)
16394 fprintf (dump_file,
16395 ";; Offset %u, min %ld, max %ld ",
16396 (unsigned) mp->offset, (unsigned long) mp->min_address,
16397 (unsigned long) mp->max_address);
16398 arm_print_value (dump_file, mp->value);
16399 fputc ('\n', dump_file);
16402 rtx val = copy_rtx (mp->value);
16404 switch (GET_MODE_SIZE (mp->mode))
16406 #ifdef HAVE_consttable_1
16407 case 1:
16408 scan = emit_insn_after (gen_consttable_1 (val), scan);
16409 break;
16411 #endif
16412 #ifdef HAVE_consttable_2
16413 case 2:
16414 scan = emit_insn_after (gen_consttable_2 (val), scan);
16415 break;
16417 #endif
16418 #ifdef HAVE_consttable_4
16419 case 4:
16420 scan = emit_insn_after (gen_consttable_4 (val), scan);
16421 break;
16423 #endif
16424 #ifdef HAVE_consttable_8
16425 case 8:
16426 scan = emit_insn_after (gen_consttable_8 (val), scan);
16427 break;
16429 #endif
16430 #ifdef HAVE_consttable_16
16431 case 16:
16432 scan = emit_insn_after (gen_consttable_16 (val), scan);
16433 break;
16435 #endif
16436 default:
16437 gcc_unreachable ();
16441 nmp = mp->next;
16442 free (mp);
16445 minipool_vector_head = minipool_vector_tail = NULL;
16446 scan = emit_insn_after (gen_consttable_end (), scan);
16447 scan = emit_barrier_after (scan);
16450 /* Return the cost of forcibly inserting a barrier after INSN. */
16451 static int
16452 arm_barrier_cost (rtx_insn *insn)
16454 /* Basing the location of the pool on the loop depth is preferable,
16455 but at the moment, the basic block information seems to be
16456 corrupt by this stage of the compilation. */
16457 int base_cost = 50;
16458 rtx_insn *next = next_nonnote_insn (insn);
16460 if (next != NULL && LABEL_P (next))
16461 base_cost -= 20;
16463 switch (GET_CODE (insn))
16465 case CODE_LABEL:
16466 /* It will always be better to place the table before the label, rather
16467 than after it. */
16468 return 50;
16470 case INSN:
16471 case CALL_INSN:
16472 return base_cost;
16474 case JUMP_INSN:
16475 return base_cost - 10;
16477 default:
16478 return base_cost + 10;
16482 /* Find the best place in the insn stream in the range
16483 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16484 Create the barrier by inserting a jump and add a new fix entry for
16485 it. */
16486 static Mfix *
16487 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16489 HOST_WIDE_INT count = 0;
16490 rtx_barrier *barrier;
16491 rtx_insn *from = fix->insn;
16492 /* The instruction after which we will insert the jump. */
16493 rtx_insn *selected = NULL;
16494 int selected_cost;
16495 /* The address at which the jump instruction will be placed. */
16496 HOST_WIDE_INT selected_address;
16497 Mfix * new_fix;
16498 HOST_WIDE_INT max_count = max_address - fix->address;
16499 rtx_code_label *label = gen_label_rtx ();
16501 selected_cost = arm_barrier_cost (from);
16502 selected_address = fix->address;
16504 while (from && count < max_count)
16506 rtx_jump_table_data *tmp;
16507 int new_cost;
16509 /* This code shouldn't have been called if there was a natural barrier
16510 within range. */
16511 gcc_assert (!BARRIER_P (from));
16513 /* Count the length of this insn. This must stay in sync with the
16514 code that pushes minipool fixes. */
16515 if (LABEL_P (from))
16516 count += get_label_padding (from);
16517 else
16518 count += get_attr_length (from);
16520 /* If there is a jump table, add its length. */
16521 if (tablejump_p (from, NULL, &tmp))
16523 count += get_jump_table_size (tmp);
16525 /* Jump tables aren't in a basic block, so base the cost on
16526 the dispatch insn. If we select this location, we will
16527 still put the pool after the table. */
16528 new_cost = arm_barrier_cost (from);
16530 if (count < max_count
16531 && (!selected || new_cost <= selected_cost))
16533 selected = tmp;
16534 selected_cost = new_cost;
16535 selected_address = fix->address + count;
16538 /* Continue after the dispatch table. */
16539 from = NEXT_INSN (tmp);
16540 continue;
16543 new_cost = arm_barrier_cost (from);
16545 if (count < max_count
16546 && (!selected || new_cost <= selected_cost))
16548 selected = from;
16549 selected_cost = new_cost;
16550 selected_address = fix->address + count;
16553 from = NEXT_INSN (from);
16556 /* Make sure that we found a place to insert the jump. */
16557 gcc_assert (selected);
16559 /* Create a new JUMP_INSN that branches around a barrier. */
16560 from = emit_jump_insn_after (gen_jump (label), selected);
16561 JUMP_LABEL (from) = label;
16562 barrier = emit_barrier_after (from);
16563 emit_label_after (label, barrier);
16565 /* Create a minipool barrier entry for the new barrier. */
16566 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16567 new_fix->insn = barrier;
16568 new_fix->address = selected_address;
16569 new_fix->next = fix->next;
16570 fix->next = new_fix;
16572 return new_fix;
16575 /* Record that there is a natural barrier in the insn stream at
16576 ADDRESS. */
16577 static void
16578 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16580 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16582 fix->insn = insn;
16583 fix->address = address;
16585 fix->next = NULL;
16586 if (minipool_fix_head != NULL)
16587 minipool_fix_tail->next = fix;
16588 else
16589 minipool_fix_head = fix;
16591 minipool_fix_tail = fix;
16594 /* Record INSN, which will need fixing up to load a value from the
16595 minipool. ADDRESS is the offset of the insn since the start of the
16596 function; LOC is a pointer to the part of the insn which requires
16597 fixing; VALUE is the constant that must be loaded, which is of type
16598 MODE. */
16599 static void
16600 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16601 machine_mode mode, rtx value)
16603 gcc_assert (!arm_disable_literal_pool);
16604 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16606 fix->insn = insn;
16607 fix->address = address;
16608 fix->loc = loc;
16609 fix->mode = mode;
16610 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16611 fix->value = value;
16612 fix->forwards = get_attr_pool_range (insn);
16613 fix->backwards = get_attr_neg_pool_range (insn);
16614 fix->minipool = NULL;
16616 /* If an insn doesn't have a range defined for it, then it isn't
16617 expecting to be reworked by this code. Better to stop now than
16618 to generate duff assembly code. */
16619 gcc_assert (fix->forwards || fix->backwards);
16621 /* If an entry requires 8-byte alignment then assume all constant pools
16622 require 4 bytes of padding. Trying to do this later on a per-pool
16623 basis is awkward because existing pool entries have to be modified. */
16624 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16625 minipool_pad = 4;
16627 if (dump_file)
16629 fprintf (dump_file,
16630 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16631 GET_MODE_NAME (mode),
16632 INSN_UID (insn), (unsigned long) address,
16633 -1 * (long)fix->backwards, (long)fix->forwards);
16634 arm_print_value (dump_file, fix->value);
16635 fprintf (dump_file, "\n");
16638 /* Add it to the chain of fixes. */
16639 fix->next = NULL;
16641 if (minipool_fix_head != NULL)
16642 minipool_fix_tail->next = fix;
16643 else
16644 minipool_fix_head = fix;
16646 minipool_fix_tail = fix;
16649 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16650 Returns the number of insns needed, or 99 if we always want to synthesize
16651 the value. */
16653 arm_max_const_double_inline_cost ()
16655 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16658 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16659 Returns the number of insns needed, or 99 if we don't know how to
16660 do it. */
16662 arm_const_double_inline_cost (rtx val)
16664 rtx lowpart, highpart;
16665 machine_mode mode;
16667 mode = GET_MODE (val);
16669 if (mode == VOIDmode)
16670 mode = DImode;
16672 gcc_assert (GET_MODE_SIZE (mode) == 8);
16674 lowpart = gen_lowpart (SImode, val);
16675 highpart = gen_highpart_mode (SImode, mode, val);
16677 gcc_assert (CONST_INT_P (lowpart));
16678 gcc_assert (CONST_INT_P (highpart));
16680 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16681 NULL_RTX, NULL_RTX, 0, 0)
16682 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16683 NULL_RTX, NULL_RTX, 0, 0));
16686 /* Cost of loading a SImode constant. */
16687 static inline int
16688 arm_const_inline_cost (enum rtx_code code, rtx val)
16690 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16691 NULL_RTX, NULL_RTX, 1, 0);
16694 /* Return true if it is worthwhile to split a 64-bit constant into two
16695 32-bit operations. This is the case if optimizing for size, or
16696 if we have load delay slots, or if one 32-bit part can be done with
16697 a single data operation. */
16698 bool
16699 arm_const_double_by_parts (rtx val)
16701 machine_mode mode = GET_MODE (val);
16702 rtx part;
16704 if (optimize_size || arm_ld_sched)
16705 return true;
16707 if (mode == VOIDmode)
16708 mode = DImode;
16710 part = gen_highpart_mode (SImode, mode, val);
16712 gcc_assert (CONST_INT_P (part));
16714 if (const_ok_for_arm (INTVAL (part))
16715 || const_ok_for_arm (~INTVAL (part)))
16716 return true;
16718 part = gen_lowpart (SImode, val);
16720 gcc_assert (CONST_INT_P (part));
16722 if (const_ok_for_arm (INTVAL (part))
16723 || const_ok_for_arm (~INTVAL (part)))
16724 return true;
16726 return false;
16729 /* Return true if it is possible to inline both the high and low parts
16730 of a 64-bit constant into 32-bit data processing instructions. */
16731 bool
16732 arm_const_double_by_immediates (rtx val)
16734 machine_mode mode = GET_MODE (val);
16735 rtx part;
16737 if (mode == VOIDmode)
16738 mode = DImode;
16740 part = gen_highpart_mode (SImode, mode, val);
16742 gcc_assert (CONST_INT_P (part));
16744 if (!const_ok_for_arm (INTVAL (part)))
16745 return false;
16747 part = gen_lowpart (SImode, val);
16749 gcc_assert (CONST_INT_P (part));
16751 if (!const_ok_for_arm (INTVAL (part)))
16752 return false;
16754 return true;
16757 /* Scan INSN and note any of its operands that need fixing.
16758 If DO_PUSHES is false we do not actually push any of the fixups
16759 needed. */
16760 static void
16761 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16763 int opno;
16765 extract_constrain_insn (insn);
16767 if (recog_data.n_alternatives == 0)
16768 return;
16770 /* Fill in recog_op_alt with information about the constraints of
16771 this insn. */
16772 preprocess_constraints (insn);
16774 const operand_alternative *op_alt = which_op_alt ();
16775 for (opno = 0; opno < recog_data.n_operands; opno++)
16777 /* Things we need to fix can only occur in inputs. */
16778 if (recog_data.operand_type[opno] != OP_IN)
16779 continue;
16781 /* If this alternative is a memory reference, then any mention
16782 of constants in this alternative is really to fool reload
16783 into allowing us to accept one there. We need to fix them up
16784 now so that we output the right code. */
16785 if (op_alt[opno].memory_ok)
16787 rtx op = recog_data.operand[opno];
16789 if (CONSTANT_P (op))
16791 if (do_pushes)
16792 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16793 recog_data.operand_mode[opno], op);
16795 else if (MEM_P (op)
16796 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16797 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16799 if (do_pushes)
16801 rtx cop = avoid_constant_pool_reference (op);
16803 /* Casting the address of something to a mode narrower
16804 than a word can cause avoid_constant_pool_reference()
16805 to return the pool reference itself. That's no good to
16806 us here. Lets just hope that we can use the
16807 constant pool value directly. */
16808 if (op == cop)
16809 cop = get_pool_constant (XEXP (op, 0));
16811 push_minipool_fix (insn, address,
16812 recog_data.operand_loc[opno],
16813 recog_data.operand_mode[opno], cop);
16820 return;
16823 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16824 and unions in the context of ARMv8-M Security Extensions. It is used as a
16825 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16826 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16827 or four masks, depending on whether it is being computed for a
16828 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16829 respectively. The tree for the type of the argument or a field within an
16830 argument is passed in ARG_TYPE, the current register this argument or field
16831 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16832 argument or field starts at is passed in STARTING_BIT and the last used bit
16833 is kept in LAST_USED_BIT which is also updated accordingly. */
16835 static unsigned HOST_WIDE_INT
16836 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16837 uint32_t * padding_bits_to_clear,
16838 unsigned starting_bit, int * last_used_bit)
16841 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16843 if (TREE_CODE (arg_type) == RECORD_TYPE)
16845 unsigned current_bit = starting_bit;
16846 tree field;
16847 long int offset, size;
16850 field = TYPE_FIELDS (arg_type);
16851 while (field)
16853 /* The offset within a structure is always an offset from
16854 the start of that structure. Make sure we take that into the
16855 calculation of the register based offset that we use here. */
16856 offset = starting_bit;
16857 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16858 offset %= 32;
16860 /* This is the actual size of the field, for bitfields this is the
16861 bitfield width and not the container size. */
16862 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16864 if (*last_used_bit != offset)
16866 if (offset < *last_used_bit)
16868 /* This field's offset is before the 'last_used_bit', that
16869 means this field goes on the next register. So we need to
16870 pad the rest of the current register and increase the
16871 register number. */
16872 uint32_t mask;
16873 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16874 mask++;
16876 padding_bits_to_clear[*regno] |= mask;
16877 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16878 (*regno)++;
16880 else
16882 /* Otherwise we pad the bits between the last field's end and
16883 the start of the new field. */
16884 uint32_t mask;
16886 mask = ((uint32_t)-1) >> (32 - offset);
16887 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16888 padding_bits_to_clear[*regno] |= mask;
16890 current_bit = offset;
16893 /* Calculate further padding bits for inner structs/unions too. */
16894 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16896 *last_used_bit = current_bit;
16897 not_to_clear_reg_mask
16898 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16899 padding_bits_to_clear, offset,
16900 last_used_bit);
16902 else
16904 /* Update 'current_bit' with this field's size. If the
16905 'current_bit' lies in a subsequent register, update 'regno' and
16906 reset 'current_bit' to point to the current bit in that new
16907 register. */
16908 current_bit += size;
16909 while (current_bit >= 32)
16911 current_bit-=32;
16912 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16913 (*regno)++;
16915 *last_used_bit = current_bit;
16918 field = TREE_CHAIN (field);
16920 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16922 else if (TREE_CODE (arg_type) == UNION_TYPE)
16924 tree field, field_t;
16925 int i, regno_t, field_size;
16926 int max_reg = -1;
16927 int max_bit = -1;
16928 uint32_t mask;
16929 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16930 = {-1, -1, -1, -1};
16932 /* To compute the padding bits in a union we only consider bits as
16933 padding bits if they are always either a padding bit or fall outside a
16934 fields size for all fields in the union. */
16935 field = TYPE_FIELDS (arg_type);
16936 while (field)
16938 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16939 = {0U, 0U, 0U, 0U};
16940 int last_used_bit_t = *last_used_bit;
16941 regno_t = *regno;
16942 field_t = TREE_TYPE (field);
16944 /* If the field's type is either a record or a union make sure to
16945 compute their padding bits too. */
16946 if (RECORD_OR_UNION_TYPE_P (field_t))
16947 not_to_clear_reg_mask
16948 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16949 &padding_bits_to_clear_t[0],
16950 starting_bit, &last_used_bit_t);
16951 else
16953 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16954 regno_t = (field_size / 32) + *regno;
16955 last_used_bit_t = (starting_bit + field_size) % 32;
16958 for (i = *regno; i < regno_t; i++)
16960 /* For all but the last register used by this field only keep the
16961 padding bits that were padding bits in this field. */
16962 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16965 /* For the last register, keep all padding bits that were padding
16966 bits in this field and any padding bits that are still valid
16967 as padding bits but fall outside of this field's size. */
16968 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16969 padding_bits_to_clear_res[regno_t]
16970 &= padding_bits_to_clear_t[regno_t] | mask;
16972 /* Update the maximum size of the fields in terms of registers used
16973 ('max_reg') and the 'last_used_bit' in said register. */
16974 if (max_reg < regno_t)
16976 max_reg = regno_t;
16977 max_bit = last_used_bit_t;
16979 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16980 max_bit = last_used_bit_t;
16982 field = TREE_CHAIN (field);
16985 /* Update the current padding_bits_to_clear using the intersection of the
16986 padding bits of all the fields. */
16987 for (i=*regno; i < max_reg; i++)
16988 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16990 /* Do not keep trailing padding bits, we do not know yet whether this
16991 is the end of the argument. */
16992 mask = ((uint32_t) 1 << max_bit) - 1;
16993 padding_bits_to_clear[max_reg]
16994 |= padding_bits_to_clear_res[max_reg] & mask;
16996 *regno = max_reg;
16997 *last_used_bit = max_bit;
16999 else
17000 /* This function should only be used for structs and unions. */
17001 gcc_unreachable ();
17003 return not_to_clear_reg_mask;
17006 /* In the context of ARMv8-M Security Extensions, this function is used for both
17007 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17008 registers are used when returning or passing arguments, which is then
17009 returned as a mask. It will also compute a mask to indicate padding/unused
17010 bits for each of these registers, and passes this through the
17011 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17012 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17013 the starting register used to pass this argument or return value is passed
17014 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17015 for struct and union types. */
17017 static unsigned HOST_WIDE_INT
17018 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17019 uint32_t * padding_bits_to_clear)
17022 int last_used_bit = 0;
17023 unsigned HOST_WIDE_INT not_to_clear_mask;
17025 if (RECORD_OR_UNION_TYPE_P (arg_type))
17027 not_to_clear_mask
17028 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17029 padding_bits_to_clear, 0,
17030 &last_used_bit);
17033 /* If the 'last_used_bit' is not zero, that means we are still using a
17034 part of the last 'regno'. In such cases we must clear the trailing
17035 bits. Otherwise we are not using regno and we should mark it as to
17036 clear. */
17037 if (last_used_bit != 0)
17038 padding_bits_to_clear[regno]
17039 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17040 else
17041 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17043 else
17045 not_to_clear_mask = 0;
17046 /* We are not dealing with structs nor unions. So these arguments may be
17047 passed in floating point registers too. In some cases a BLKmode is
17048 used when returning or passing arguments in multiple VFP registers. */
17049 if (GET_MODE (arg_rtx) == BLKmode)
17051 int i, arg_regs;
17052 rtx reg;
17054 /* This should really only occur when dealing with the hard-float
17055 ABI. */
17056 gcc_assert (TARGET_HARD_FLOAT_ABI);
17058 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17060 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17061 gcc_assert (REG_P (reg));
17063 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17065 /* If we are dealing with DF mode, make sure we don't
17066 clear either of the registers it addresses. */
17067 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17068 if (arg_regs > 1)
17070 unsigned HOST_WIDE_INT mask;
17071 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17072 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17073 not_to_clear_mask |= mask;
17077 else
17079 /* Otherwise we can rely on the MODE to determine how many registers
17080 are being used by this argument. */
17081 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17082 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17083 if (arg_regs > 1)
17085 unsigned HOST_WIDE_INT
17086 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17087 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17088 not_to_clear_mask |= mask;
17093 return not_to_clear_mask;
17096 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17097 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17098 are to be fully cleared, using the value in register CLEARING_REG if more
17099 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17100 the bits that needs to be cleared in caller-saved core registers, with
17101 SCRATCH_REG used as a scratch register for that clearing.
17103 NOTE: one of three following assertions must hold:
17104 - SCRATCH_REG is a low register
17105 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17106 in TO_CLEAR_BITMAP)
17107 - CLEARING_REG is a low register. */
17109 static void
17110 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17111 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17113 bool saved_clearing = false;
17114 rtx saved_clearing_reg = NULL_RTX;
17115 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17117 gcc_assert (arm_arch_cmse);
17119 if (!bitmap_empty_p (to_clear_bitmap))
17121 minregno = bitmap_first_set_bit (to_clear_bitmap);
17122 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17124 clearing_regno = REGNO (clearing_reg);
17126 /* Clear padding bits. */
17127 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17128 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17130 uint64_t mask;
17131 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17133 if (padding_bits_to_clear[i] == 0)
17134 continue;
17136 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17137 CLEARING_REG as scratch. */
17138 if (TARGET_THUMB1
17139 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17141 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17142 such that we can use clearing_reg to clear the unused bits in the
17143 arguments. */
17144 if ((clearing_regno > maxregno
17145 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17146 && !saved_clearing)
17148 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17149 emit_move_insn (scratch_reg, clearing_reg);
17150 saved_clearing = true;
17151 saved_clearing_reg = scratch_reg;
17153 scratch_reg = clearing_reg;
17156 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17157 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17158 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17160 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17161 mask = (~padding_bits_to_clear[i]) >> 16;
17162 rtx16 = gen_int_mode (16, SImode);
17163 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17164 if (mask)
17165 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17167 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17169 if (saved_clearing)
17170 emit_move_insn (clearing_reg, saved_clearing_reg);
17173 /* Clear full registers. */
17175 /* If not marked for clearing, clearing_reg already does not contain
17176 any secret. */
17177 if (clearing_regno <= maxregno
17178 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17180 emit_move_insn (clearing_reg, const0_rtx);
17181 emit_use (clearing_reg);
17182 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17185 for (regno = minregno; regno <= maxregno; regno++)
17187 if (!bitmap_bit_p (to_clear_bitmap, regno))
17188 continue;
17190 if (IS_VFP_REGNUM (regno))
17192 /* If regno is an even vfp register and its successor is also to
17193 be cleared, use vmov. */
17194 if (TARGET_VFP_DOUBLE
17195 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17196 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17198 emit_move_insn (gen_rtx_REG (DFmode, regno),
17199 CONST1_RTX (DFmode));
17200 emit_use (gen_rtx_REG (DFmode, regno));
17201 regno++;
17203 else
17205 emit_move_insn (gen_rtx_REG (SFmode, regno),
17206 CONST1_RTX (SFmode));
17207 emit_use (gen_rtx_REG (SFmode, regno));
17210 else
17212 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17213 emit_use (gen_rtx_REG (SImode, regno));
17218 /* Clears caller saved registers not used to pass arguments before a
17219 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17220 registers is done in __gnu_cmse_nonsecure_call libcall.
17221 See libgcc/config/arm/cmse_nonsecure_call.S. */
17223 static void
17224 cmse_nonsecure_call_clear_caller_saved (void)
17226 basic_block bb;
17228 FOR_EACH_BB_FN (bb, cfun)
17230 rtx_insn *insn;
17232 FOR_BB_INSNS (bb, insn)
17234 unsigned address_regnum, regno, maxregno =
17235 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17236 auto_sbitmap to_clear_bitmap (maxregno + 1);
17237 rtx_insn *seq;
17238 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17239 rtx address;
17240 CUMULATIVE_ARGS args_so_far_v;
17241 cumulative_args_t args_so_far;
17242 tree arg_type, fntype;
17243 bool first_param = true;
17244 function_args_iterator args_iter;
17245 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17247 if (!NONDEBUG_INSN_P (insn))
17248 continue;
17250 if (!CALL_P (insn))
17251 continue;
17253 pat = PATTERN (insn);
17254 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17255 call = XVECEXP (pat, 0, 0);
17257 /* Get the real call RTX if the insn sets a value, ie. returns. */
17258 if (GET_CODE (call) == SET)
17259 call = SET_SRC (call);
17261 /* Check if it is a cmse_nonsecure_call. */
17262 unspec = XEXP (call, 0);
17263 if (GET_CODE (unspec) != UNSPEC
17264 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17265 continue;
17267 /* Determine the caller-saved registers we need to clear. */
17268 bitmap_clear (to_clear_bitmap);
17269 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17271 /* Only look at the caller-saved floating point registers in case of
17272 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17273 lazy store and loads which clear both caller- and callee-saved
17274 registers. */
17275 if (TARGET_HARD_FLOAT_ABI)
17277 auto_sbitmap float_bitmap (maxregno + 1);
17279 bitmap_clear (float_bitmap);
17280 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17281 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17282 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17285 /* Make sure the register used to hold the function address is not
17286 cleared. */
17287 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17288 gcc_assert (MEM_P (address));
17289 gcc_assert (REG_P (XEXP (address, 0)));
17290 address_regnum = REGNO (XEXP (address, 0));
17291 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17292 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17294 /* Set basic block of call insn so that df rescan is performed on
17295 insns inserted here. */
17296 set_block_for_insn (insn, bb);
17297 df_set_flags (DF_DEFER_INSN_RESCAN);
17298 start_sequence ();
17300 /* Make sure the scheduler doesn't schedule other insns beyond
17301 here. */
17302 emit_insn (gen_blockage ());
17304 /* Walk through all arguments and clear registers appropriately.
17306 fntype = TREE_TYPE (MEM_EXPR (address));
17307 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17308 NULL_TREE);
17309 args_so_far = pack_cumulative_args (&args_so_far_v);
17310 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17312 rtx arg_rtx;
17313 uint64_t to_clear_args_mask;
17314 machine_mode arg_mode = TYPE_MODE (arg_type);
17316 if (VOID_TYPE_P (arg_type))
17317 continue;
17319 if (!first_param)
17320 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17321 true);
17323 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17324 true);
17325 gcc_assert (REG_P (arg_rtx));
17326 to_clear_args_mask
17327 = compute_not_to_clear_mask (arg_type, arg_rtx,
17328 REGNO (arg_rtx),
17329 &padding_bits_to_clear[0]);
17330 if (to_clear_args_mask)
17332 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17334 if (to_clear_args_mask & (1ULL << regno))
17335 bitmap_clear_bit (to_clear_bitmap, regno);
17339 first_param = false;
17342 /* We use right shift and left shift to clear the LSB of the address
17343 we jump to instead of using bic, to avoid having to use an extra
17344 register on Thumb-1. */
17345 clearing_reg = XEXP (address, 0);
17346 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17347 emit_insn (gen_rtx_SET (clearing_reg, shift));
17348 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17349 emit_insn (gen_rtx_SET (clearing_reg, shift));
17351 /* Clear caller-saved registers that leak before doing a non-secure
17352 call. */
17353 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17354 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17355 NUM_ARG_REGS, ip_reg, clearing_reg);
17357 seq = get_insns ();
17358 end_sequence ();
17359 emit_insn_before (seq, insn);
17364 /* Rewrite move insn into subtract of 0 if the condition codes will
17365 be useful in next conditional jump insn. */
17367 static void
17368 thumb1_reorg (void)
17370 basic_block bb;
17372 FOR_EACH_BB_FN (bb, cfun)
17374 rtx dest, src;
17375 rtx cmp, op0, op1, set = NULL;
17376 rtx_insn *prev, *insn = BB_END (bb);
17377 bool insn_clobbered = false;
17379 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17380 insn = PREV_INSN (insn);
17382 /* Find the last cbranchsi4_insn in basic block BB. */
17383 if (insn == BB_HEAD (bb)
17384 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17385 continue;
17387 /* Get the register with which we are comparing. */
17388 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17389 op0 = XEXP (cmp, 0);
17390 op1 = XEXP (cmp, 1);
17392 /* Check that comparison is against ZERO. */
17393 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17394 continue;
17396 /* Find the first flag setting insn before INSN in basic block BB. */
17397 gcc_assert (insn != BB_HEAD (bb));
17398 for (prev = PREV_INSN (insn);
17399 (!insn_clobbered
17400 && prev != BB_HEAD (bb)
17401 && (NOTE_P (prev)
17402 || DEBUG_INSN_P (prev)
17403 || ((set = single_set (prev)) != NULL
17404 && get_attr_conds (prev) == CONDS_NOCOND)));
17405 prev = PREV_INSN (prev))
17407 if (reg_set_p (op0, prev))
17408 insn_clobbered = true;
17411 /* Skip if op0 is clobbered by insn other than prev. */
17412 if (insn_clobbered)
17413 continue;
17415 if (!set)
17416 continue;
17418 dest = SET_DEST (set);
17419 src = SET_SRC (set);
17420 if (!low_register_operand (dest, SImode)
17421 || !low_register_operand (src, SImode))
17422 continue;
17424 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17425 in INSN. Both src and dest of the move insn are checked. */
17426 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17428 dest = copy_rtx (dest);
17429 src = copy_rtx (src);
17430 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17431 PATTERN (prev) = gen_rtx_SET (dest, src);
17432 INSN_CODE (prev) = -1;
17433 /* Set test register in INSN to dest. */
17434 XEXP (cmp, 0) = copy_rtx (dest);
17435 INSN_CODE (insn) = -1;
17440 /* Convert instructions to their cc-clobbering variant if possible, since
17441 that allows us to use smaller encodings. */
17443 static void
17444 thumb2_reorg (void)
17446 basic_block bb;
17447 regset_head live;
17449 INIT_REG_SET (&live);
17451 /* We are freeing block_for_insn in the toplev to keep compatibility
17452 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17453 compute_bb_for_insn ();
17454 df_analyze ();
17456 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17458 FOR_EACH_BB_FN (bb, cfun)
17460 if ((current_tune->disparage_flag_setting_t16_encodings
17461 == tune_params::DISPARAGE_FLAGS_ALL)
17462 && optimize_bb_for_speed_p (bb))
17463 continue;
17465 rtx_insn *insn;
17466 Convert_Action action = SKIP;
17467 Convert_Action action_for_partial_flag_setting
17468 = ((current_tune->disparage_flag_setting_t16_encodings
17469 != tune_params::DISPARAGE_FLAGS_NEITHER)
17470 && optimize_bb_for_speed_p (bb))
17471 ? SKIP : CONV;
17473 COPY_REG_SET (&live, DF_LR_OUT (bb));
17474 df_simulate_initialize_backwards (bb, &live);
17475 FOR_BB_INSNS_REVERSE (bb, insn)
17477 if (NONJUMP_INSN_P (insn)
17478 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17479 && GET_CODE (PATTERN (insn)) == SET)
17481 action = SKIP;
17482 rtx pat = PATTERN (insn);
17483 rtx dst = XEXP (pat, 0);
17484 rtx src = XEXP (pat, 1);
17485 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17487 if (UNARY_P (src) || BINARY_P (src))
17488 op0 = XEXP (src, 0);
17490 if (BINARY_P (src))
17491 op1 = XEXP (src, 1);
17493 if (low_register_operand (dst, SImode))
17495 switch (GET_CODE (src))
17497 case PLUS:
17498 /* Adding two registers and storing the result
17499 in the first source is already a 16-bit
17500 operation. */
17501 if (rtx_equal_p (dst, op0)
17502 && register_operand (op1, SImode))
17503 break;
17505 if (low_register_operand (op0, SImode))
17507 /* ADDS <Rd>,<Rn>,<Rm> */
17508 if (low_register_operand (op1, SImode))
17509 action = CONV;
17510 /* ADDS <Rdn>,#<imm8> */
17511 /* SUBS <Rdn>,#<imm8> */
17512 else if (rtx_equal_p (dst, op0)
17513 && CONST_INT_P (op1)
17514 && IN_RANGE (INTVAL (op1), -255, 255))
17515 action = CONV;
17516 /* ADDS <Rd>,<Rn>,#<imm3> */
17517 /* SUBS <Rd>,<Rn>,#<imm3> */
17518 else if (CONST_INT_P (op1)
17519 && IN_RANGE (INTVAL (op1), -7, 7))
17520 action = CONV;
17522 /* ADCS <Rd>, <Rn> */
17523 else if (GET_CODE (XEXP (src, 0)) == PLUS
17524 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17525 && low_register_operand (XEXP (XEXP (src, 0), 1),
17526 SImode)
17527 && COMPARISON_P (op1)
17528 && cc_register (XEXP (op1, 0), VOIDmode)
17529 && maybe_get_arm_condition_code (op1) == ARM_CS
17530 && XEXP (op1, 1) == const0_rtx)
17531 action = CONV;
17532 break;
17534 case MINUS:
17535 /* RSBS <Rd>,<Rn>,#0
17536 Not handled here: see NEG below. */
17537 /* SUBS <Rd>,<Rn>,#<imm3>
17538 SUBS <Rdn>,#<imm8>
17539 Not handled here: see PLUS above. */
17540 /* SUBS <Rd>,<Rn>,<Rm> */
17541 if (low_register_operand (op0, SImode)
17542 && low_register_operand (op1, SImode))
17543 action = CONV;
17544 break;
17546 case MULT:
17547 /* MULS <Rdm>,<Rn>,<Rdm>
17548 As an exception to the rule, this is only used
17549 when optimizing for size since MULS is slow on all
17550 known implementations. We do not even want to use
17551 MULS in cold code, if optimizing for speed, so we
17552 test the global flag here. */
17553 if (!optimize_size)
17554 break;
17555 /* Fall through. */
17556 case AND:
17557 case IOR:
17558 case XOR:
17559 /* ANDS <Rdn>,<Rm> */
17560 if (rtx_equal_p (dst, op0)
17561 && low_register_operand (op1, SImode))
17562 action = action_for_partial_flag_setting;
17563 else if (rtx_equal_p (dst, op1)
17564 && low_register_operand (op0, SImode))
17565 action = action_for_partial_flag_setting == SKIP
17566 ? SKIP : SWAP_CONV;
17567 break;
17569 case ASHIFTRT:
17570 case ASHIFT:
17571 case LSHIFTRT:
17572 /* ASRS <Rdn>,<Rm> */
17573 /* LSRS <Rdn>,<Rm> */
17574 /* LSLS <Rdn>,<Rm> */
17575 if (rtx_equal_p (dst, op0)
17576 && low_register_operand (op1, SImode))
17577 action = action_for_partial_flag_setting;
17578 /* ASRS <Rd>,<Rm>,#<imm5> */
17579 /* LSRS <Rd>,<Rm>,#<imm5> */
17580 /* LSLS <Rd>,<Rm>,#<imm5> */
17581 else if (low_register_operand (op0, SImode)
17582 && CONST_INT_P (op1)
17583 && IN_RANGE (INTVAL (op1), 0, 31))
17584 action = action_for_partial_flag_setting;
17585 break;
17587 case ROTATERT:
17588 /* RORS <Rdn>,<Rm> */
17589 if (rtx_equal_p (dst, op0)
17590 && low_register_operand (op1, SImode))
17591 action = action_for_partial_flag_setting;
17592 break;
17594 case NOT:
17595 /* MVNS <Rd>,<Rm> */
17596 if (low_register_operand (op0, SImode))
17597 action = action_for_partial_flag_setting;
17598 break;
17600 case NEG:
17601 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17602 if (low_register_operand (op0, SImode))
17603 action = CONV;
17604 break;
17606 case CONST_INT:
17607 /* MOVS <Rd>,#<imm8> */
17608 if (CONST_INT_P (src)
17609 && IN_RANGE (INTVAL (src), 0, 255))
17610 action = action_for_partial_flag_setting;
17611 break;
17613 case REG:
17614 /* MOVS and MOV<c> with registers have different
17615 encodings, so are not relevant here. */
17616 break;
17618 default:
17619 break;
17623 if (action != SKIP)
17625 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17626 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17627 rtvec vec;
17629 if (action == SWAP_CONV)
17631 src = copy_rtx (src);
17632 XEXP (src, 0) = op1;
17633 XEXP (src, 1) = op0;
17634 pat = gen_rtx_SET (dst, src);
17635 vec = gen_rtvec (2, pat, clobber);
17637 else /* action == CONV */
17638 vec = gen_rtvec (2, pat, clobber);
17640 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17641 INSN_CODE (insn) = -1;
17645 if (NONDEBUG_INSN_P (insn))
17646 df_simulate_one_insn_backwards (bb, insn, &live);
17650 CLEAR_REG_SET (&live);
17653 /* Gcc puts the pool in the wrong place for ARM, since we can only
17654 load addresses a limited distance around the pc. We do some
17655 special munging to move the constant pool values to the correct
17656 point in the code. */
17657 static void
17658 arm_reorg (void)
17660 rtx_insn *insn;
17661 HOST_WIDE_INT address = 0;
17662 Mfix * fix;
17664 if (use_cmse)
17665 cmse_nonsecure_call_clear_caller_saved ();
17666 if (TARGET_THUMB1)
17667 thumb1_reorg ();
17668 else if (TARGET_THUMB2)
17669 thumb2_reorg ();
17671 /* Ensure all insns that must be split have been split at this point.
17672 Otherwise, the pool placement code below may compute incorrect
17673 insn lengths. Note that when optimizing, all insns have already
17674 been split at this point. */
17675 if (!optimize)
17676 split_all_insns_noflow ();
17678 /* Make sure we do not attempt to create a literal pool even though it should
17679 no longer be necessary to create any. */
17680 if (arm_disable_literal_pool)
17681 return ;
17683 minipool_fix_head = minipool_fix_tail = NULL;
17685 /* The first insn must always be a note, or the code below won't
17686 scan it properly. */
17687 insn = get_insns ();
17688 gcc_assert (NOTE_P (insn));
17689 minipool_pad = 0;
17691 /* Scan all the insns and record the operands that will need fixing. */
17692 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17694 if (BARRIER_P (insn))
17695 push_minipool_barrier (insn, address);
17696 else if (INSN_P (insn))
17698 rtx_jump_table_data *table;
17700 note_invalid_constants (insn, address, true);
17701 address += get_attr_length (insn);
17703 /* If the insn is a vector jump, add the size of the table
17704 and skip the table. */
17705 if (tablejump_p (insn, NULL, &table))
17707 address += get_jump_table_size (table);
17708 insn = table;
17711 else if (LABEL_P (insn))
17712 /* Add the worst-case padding due to alignment. We don't add
17713 the _current_ padding because the minipool insertions
17714 themselves might change it. */
17715 address += get_label_padding (insn);
17718 fix = minipool_fix_head;
17720 /* Now scan the fixups and perform the required changes. */
17721 while (fix)
17723 Mfix * ftmp;
17724 Mfix * fdel;
17725 Mfix * last_added_fix;
17726 Mfix * last_barrier = NULL;
17727 Mfix * this_fix;
17729 /* Skip any further barriers before the next fix. */
17730 while (fix && BARRIER_P (fix->insn))
17731 fix = fix->next;
17733 /* No more fixes. */
17734 if (fix == NULL)
17735 break;
17737 last_added_fix = NULL;
17739 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17741 if (BARRIER_P (ftmp->insn))
17743 if (ftmp->address >= minipool_vector_head->max_address)
17744 break;
17746 last_barrier = ftmp;
17748 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17749 break;
17751 last_added_fix = ftmp; /* Keep track of the last fix added. */
17754 /* If we found a barrier, drop back to that; any fixes that we
17755 could have reached but come after the barrier will now go in
17756 the next mini-pool. */
17757 if (last_barrier != NULL)
17759 /* Reduce the refcount for those fixes that won't go into this
17760 pool after all. */
17761 for (fdel = last_barrier->next;
17762 fdel && fdel != ftmp;
17763 fdel = fdel->next)
17765 fdel->minipool->refcount--;
17766 fdel->minipool = NULL;
17769 ftmp = last_barrier;
17771 else
17773 /* ftmp is first fix that we can't fit into this pool and
17774 there no natural barriers that we could use. Insert a
17775 new barrier in the code somewhere between the previous
17776 fix and this one, and arrange to jump around it. */
17777 HOST_WIDE_INT max_address;
17779 /* The last item on the list of fixes must be a barrier, so
17780 we can never run off the end of the list of fixes without
17781 last_barrier being set. */
17782 gcc_assert (ftmp);
17784 max_address = minipool_vector_head->max_address;
17785 /* Check that there isn't another fix that is in range that
17786 we couldn't fit into this pool because the pool was
17787 already too large: we need to put the pool before such an
17788 instruction. The pool itself may come just after the
17789 fix because create_fix_barrier also allows space for a
17790 jump instruction. */
17791 if (ftmp->address < max_address)
17792 max_address = ftmp->address + 1;
17794 last_barrier = create_fix_barrier (last_added_fix, max_address);
17797 assign_minipool_offsets (last_barrier);
17799 while (ftmp)
17801 if (!BARRIER_P (ftmp->insn)
17802 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17803 == NULL))
17804 break;
17806 ftmp = ftmp->next;
17809 /* Scan over the fixes we have identified for this pool, fixing them
17810 up and adding the constants to the pool itself. */
17811 for (this_fix = fix; this_fix && ftmp != this_fix;
17812 this_fix = this_fix->next)
17813 if (!BARRIER_P (this_fix->insn))
17815 rtx addr
17816 = plus_constant (Pmode,
17817 gen_rtx_LABEL_REF (VOIDmode,
17818 minipool_vector_label),
17819 this_fix->minipool->offset);
17820 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17823 dump_minipool (last_barrier->insn);
17824 fix = ftmp;
17827 /* From now on we must synthesize any constants that we can't handle
17828 directly. This can happen if the RTL gets split during final
17829 instruction generation. */
17830 cfun->machine->after_arm_reorg = 1;
17832 /* Free the minipool memory. */
17833 obstack_free (&minipool_obstack, minipool_startobj);
17836 /* Routines to output assembly language. */
17838 /* Return string representation of passed in real value. */
17839 static const char *
17840 fp_const_from_val (REAL_VALUE_TYPE *r)
17842 if (!fp_consts_inited)
17843 init_fp_table ();
17845 gcc_assert (real_equal (r, &value_fp0));
17846 return "0";
17849 /* OPERANDS[0] is the entire list of insns that constitute pop,
17850 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17851 is in the list, UPDATE is true iff the list contains explicit
17852 update of base register. */
17853 void
17854 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17855 bool update)
17857 int i;
17858 char pattern[100];
17859 int offset;
17860 const char *conditional;
17861 int num_saves = XVECLEN (operands[0], 0);
17862 unsigned int regno;
17863 unsigned int regno_base = REGNO (operands[1]);
17864 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17866 offset = 0;
17867 offset += update ? 1 : 0;
17868 offset += return_pc ? 1 : 0;
17870 /* Is the base register in the list? */
17871 for (i = offset; i < num_saves; i++)
17873 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17874 /* If SP is in the list, then the base register must be SP. */
17875 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17876 /* If base register is in the list, there must be no explicit update. */
17877 if (regno == regno_base)
17878 gcc_assert (!update);
17881 conditional = reverse ? "%?%D0" : "%?%d0";
17882 /* Can't use POP if returning from an interrupt. */
17883 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17884 sprintf (pattern, "pop%s\t{", conditional);
17885 else
17887 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17888 It's just a convention, their semantics are identical. */
17889 if (regno_base == SP_REGNUM)
17890 sprintf (pattern, "ldmfd%s\t", conditional);
17891 else if (update)
17892 sprintf (pattern, "ldmia%s\t", conditional);
17893 else
17894 sprintf (pattern, "ldm%s\t", conditional);
17896 strcat (pattern, reg_names[regno_base]);
17897 if (update)
17898 strcat (pattern, "!, {");
17899 else
17900 strcat (pattern, ", {");
17903 /* Output the first destination register. */
17904 strcat (pattern,
17905 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17907 /* Output the rest of the destination registers. */
17908 for (i = offset + 1; i < num_saves; i++)
17910 strcat (pattern, ", ");
17911 strcat (pattern,
17912 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17915 strcat (pattern, "}");
17917 if (interrupt_p && return_pc)
17918 strcat (pattern, "^");
17920 output_asm_insn (pattern, &cond);
17924 /* Output the assembly for a store multiple. */
17926 const char *
17927 vfp_output_vstmd (rtx * operands)
17929 char pattern[100];
17930 int p;
17931 int base;
17932 int i;
17933 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17934 ? XEXP (operands[0], 0)
17935 : XEXP (XEXP (operands[0], 0), 0);
17936 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17938 if (push_p)
17939 strcpy (pattern, "vpush%?.64\t{%P1");
17940 else
17941 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17943 p = strlen (pattern);
17945 gcc_assert (REG_P (operands[1]));
17947 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17948 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17950 p += sprintf (&pattern[p], ", d%d", base + i);
17952 strcpy (&pattern[p], "}");
17954 output_asm_insn (pattern, operands);
17955 return "";
17959 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17960 number of bytes pushed. */
17962 static int
17963 vfp_emit_fstmd (int base_reg, int count)
17965 rtx par;
17966 rtx dwarf;
17967 rtx tmp, reg;
17968 int i;
17970 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17971 register pairs are stored by a store multiple insn. We avoid this
17972 by pushing an extra pair. */
17973 if (count == 2 && !arm_arch6)
17975 if (base_reg == LAST_VFP_REGNUM - 3)
17976 base_reg -= 2;
17977 count++;
17980 /* FSTMD may not store more than 16 doubleword registers at once. Split
17981 larger stores into multiple parts (up to a maximum of two, in
17982 practice). */
17983 if (count > 16)
17985 int saved;
17986 /* NOTE: base_reg is an internal register number, so each D register
17987 counts as 2. */
17988 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17989 saved += vfp_emit_fstmd (base_reg, 16);
17990 return saved;
17993 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17994 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17996 reg = gen_rtx_REG (DFmode, base_reg);
17997 base_reg += 2;
17999 XVECEXP (par, 0, 0)
18000 = gen_rtx_SET (gen_frame_mem
18001 (BLKmode,
18002 gen_rtx_PRE_MODIFY (Pmode,
18003 stack_pointer_rtx,
18004 plus_constant
18005 (Pmode, stack_pointer_rtx,
18006 - (count * 8)))
18008 gen_rtx_UNSPEC (BLKmode,
18009 gen_rtvec (1, reg),
18010 UNSPEC_PUSH_MULT));
18012 tmp = gen_rtx_SET (stack_pointer_rtx,
18013 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18014 RTX_FRAME_RELATED_P (tmp) = 1;
18015 XVECEXP (dwarf, 0, 0) = tmp;
18017 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18018 RTX_FRAME_RELATED_P (tmp) = 1;
18019 XVECEXP (dwarf, 0, 1) = tmp;
18021 for (i = 1; i < count; i++)
18023 reg = gen_rtx_REG (DFmode, base_reg);
18024 base_reg += 2;
18025 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18027 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18028 plus_constant (Pmode,
18029 stack_pointer_rtx,
18030 i * 8)),
18031 reg);
18032 RTX_FRAME_RELATED_P (tmp) = 1;
18033 XVECEXP (dwarf, 0, i + 1) = tmp;
18036 par = emit_insn (par);
18037 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18038 RTX_FRAME_RELATED_P (par) = 1;
18040 return count * 8;
18043 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18044 has the cmse_nonsecure_call attribute and returns false otherwise. */
18046 bool
18047 detect_cmse_nonsecure_call (tree addr)
18049 if (!addr)
18050 return FALSE;
18052 tree fntype = TREE_TYPE (addr);
18053 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18054 TYPE_ATTRIBUTES (fntype)))
18055 return TRUE;
18056 return FALSE;
18060 /* Emit a call instruction with pattern PAT. ADDR is the address of
18061 the call target. */
18063 void
18064 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18066 rtx insn;
18068 insn = emit_call_insn (pat);
18070 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18071 If the call might use such an entry, add a use of the PIC register
18072 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18073 if (TARGET_VXWORKS_RTP
18074 && flag_pic
18075 && !sibcall
18076 && GET_CODE (addr) == SYMBOL_REF
18077 && (SYMBOL_REF_DECL (addr)
18078 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18079 : !SYMBOL_REF_LOCAL_P (addr)))
18081 require_pic_register ();
18082 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18085 if (TARGET_AAPCS_BASED)
18087 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18088 linker. We need to add an IP clobber to allow setting
18089 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18090 is not needed since it's a fixed register. */
18091 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18092 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18096 /* Output a 'call' insn. */
18097 const char *
18098 output_call (rtx *operands)
18100 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18102 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18103 if (REGNO (operands[0]) == LR_REGNUM)
18105 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18106 output_asm_insn ("mov%?\t%0, %|lr", operands);
18109 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18111 if (TARGET_INTERWORK || arm_arch4t)
18112 output_asm_insn ("bx%?\t%0", operands);
18113 else
18114 output_asm_insn ("mov%?\t%|pc, %0", operands);
18116 return "";
18119 /* Output a move from arm registers to arm registers of a long double
18120 OPERANDS[0] is the destination.
18121 OPERANDS[1] is the source. */
18122 const char *
18123 output_mov_long_double_arm_from_arm (rtx *operands)
18125 /* We have to be careful here because the two might overlap. */
18126 int dest_start = REGNO (operands[0]);
18127 int src_start = REGNO (operands[1]);
18128 rtx ops[2];
18129 int i;
18131 if (dest_start < src_start)
18133 for (i = 0; i < 3; i++)
18135 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18136 ops[1] = gen_rtx_REG (SImode, src_start + i);
18137 output_asm_insn ("mov%?\t%0, %1", ops);
18140 else
18142 for (i = 2; i >= 0; i--)
18144 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18145 ops[1] = gen_rtx_REG (SImode, src_start + i);
18146 output_asm_insn ("mov%?\t%0, %1", ops);
18150 return "";
18153 void
18154 arm_emit_movpair (rtx dest, rtx src)
18156 /* If the src is an immediate, simplify it. */
18157 if (CONST_INT_P (src))
18159 HOST_WIDE_INT val = INTVAL (src);
18160 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18161 if ((val >> 16) & 0x0000ffff)
18163 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18164 GEN_INT (16)),
18165 GEN_INT ((val >> 16) & 0x0000ffff));
18166 rtx_insn *insn = get_last_insn ();
18167 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18169 return;
18171 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18172 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18173 rtx_insn *insn = get_last_insn ();
18174 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18177 /* Output a move between double words. It must be REG<-MEM
18178 or MEM<-REG. */
18179 const char *
18180 output_move_double (rtx *operands, bool emit, int *count)
18182 enum rtx_code code0 = GET_CODE (operands[0]);
18183 enum rtx_code code1 = GET_CODE (operands[1]);
18184 rtx otherops[3];
18185 if (count)
18186 *count = 1;
18188 /* The only case when this might happen is when
18189 you are looking at the length of a DImode instruction
18190 that has an invalid constant in it. */
18191 if (code0 == REG && code1 != MEM)
18193 gcc_assert (!emit);
18194 *count = 2;
18195 return "";
18198 if (code0 == REG)
18200 unsigned int reg0 = REGNO (operands[0]);
18202 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18204 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18206 switch (GET_CODE (XEXP (operands[1], 0)))
18208 case REG:
18210 if (emit)
18212 if (TARGET_LDRD
18213 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18214 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18215 else
18216 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18218 break;
18220 case PRE_INC:
18221 gcc_assert (TARGET_LDRD);
18222 if (emit)
18223 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18224 break;
18226 case PRE_DEC:
18227 if (emit)
18229 if (TARGET_LDRD)
18230 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18231 else
18232 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18234 break;
18236 case POST_INC:
18237 if (emit)
18239 if (TARGET_LDRD)
18240 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18241 else
18242 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18244 break;
18246 case POST_DEC:
18247 gcc_assert (TARGET_LDRD);
18248 if (emit)
18249 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18250 break;
18252 case PRE_MODIFY:
18253 case POST_MODIFY:
18254 /* Autoicrement addressing modes should never have overlapping
18255 base and destination registers, and overlapping index registers
18256 are already prohibited, so this doesn't need to worry about
18257 fix_cm3_ldrd. */
18258 otherops[0] = operands[0];
18259 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18260 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18262 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18264 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18266 /* Registers overlap so split out the increment. */
18267 if (emit)
18269 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18270 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18272 if (count)
18273 *count = 2;
18275 else
18277 /* Use a single insn if we can.
18278 FIXME: IWMMXT allows offsets larger than ldrd can
18279 handle, fix these up with a pair of ldr. */
18280 if (TARGET_THUMB2
18281 || !CONST_INT_P (otherops[2])
18282 || (INTVAL (otherops[2]) > -256
18283 && INTVAL (otherops[2]) < 256))
18285 if (emit)
18286 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18288 else
18290 if (emit)
18292 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18293 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18295 if (count)
18296 *count = 2;
18301 else
18303 /* Use a single insn if we can.
18304 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18305 fix these up with a pair of ldr. */
18306 if (TARGET_THUMB2
18307 || !CONST_INT_P (otherops[2])
18308 || (INTVAL (otherops[2]) > -256
18309 && INTVAL (otherops[2]) < 256))
18311 if (emit)
18312 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18314 else
18316 if (emit)
18318 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18319 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18321 if (count)
18322 *count = 2;
18325 break;
18327 case LABEL_REF:
18328 case CONST:
18329 /* We might be able to use ldrd %0, %1 here. However the range is
18330 different to ldr/adr, and it is broken on some ARMv7-M
18331 implementations. */
18332 /* Use the second register of the pair to avoid problematic
18333 overlap. */
18334 otherops[1] = operands[1];
18335 if (emit)
18336 output_asm_insn ("adr%?\t%0, %1", otherops);
18337 operands[1] = otherops[0];
18338 if (emit)
18340 if (TARGET_LDRD)
18341 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18342 else
18343 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18346 if (count)
18347 *count = 2;
18348 break;
18350 /* ??? This needs checking for thumb2. */
18351 default:
18352 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18353 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18355 otherops[0] = operands[0];
18356 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18357 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18359 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18361 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18363 switch ((int) INTVAL (otherops[2]))
18365 case -8:
18366 if (emit)
18367 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18368 return "";
18369 case -4:
18370 if (TARGET_THUMB2)
18371 break;
18372 if (emit)
18373 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18374 return "";
18375 case 4:
18376 if (TARGET_THUMB2)
18377 break;
18378 if (emit)
18379 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18380 return "";
18383 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18384 operands[1] = otherops[0];
18385 if (TARGET_LDRD
18386 && (REG_P (otherops[2])
18387 || TARGET_THUMB2
18388 || (CONST_INT_P (otherops[2])
18389 && INTVAL (otherops[2]) > -256
18390 && INTVAL (otherops[2]) < 256)))
18392 if (reg_overlap_mentioned_p (operands[0],
18393 otherops[2]))
18395 /* Swap base and index registers over to
18396 avoid a conflict. */
18397 std::swap (otherops[1], otherops[2]);
18399 /* If both registers conflict, it will usually
18400 have been fixed by a splitter. */
18401 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18402 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18404 if (emit)
18406 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18407 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18409 if (count)
18410 *count = 2;
18412 else
18414 otherops[0] = operands[0];
18415 if (emit)
18416 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18418 return "";
18421 if (CONST_INT_P (otherops[2]))
18423 if (emit)
18425 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18426 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18427 else
18428 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18431 else
18433 if (emit)
18434 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18437 else
18439 if (emit)
18440 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18443 if (count)
18444 *count = 2;
18446 if (TARGET_LDRD)
18447 return "ldrd%?\t%0, [%1]";
18449 return "ldmia%?\t%1, %M0";
18451 else
18453 otherops[1] = adjust_address (operands[1], SImode, 4);
18454 /* Take care of overlapping base/data reg. */
18455 if (reg_mentioned_p (operands[0], operands[1]))
18457 if (emit)
18459 output_asm_insn ("ldr%?\t%0, %1", otherops);
18460 output_asm_insn ("ldr%?\t%0, %1", operands);
18462 if (count)
18463 *count = 2;
18466 else
18468 if (emit)
18470 output_asm_insn ("ldr%?\t%0, %1", operands);
18471 output_asm_insn ("ldr%?\t%0, %1", otherops);
18473 if (count)
18474 *count = 2;
18479 else
18481 /* Constraints should ensure this. */
18482 gcc_assert (code0 == MEM && code1 == REG);
18483 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18484 || (TARGET_ARM && TARGET_LDRD));
18486 switch (GET_CODE (XEXP (operands[0], 0)))
18488 case REG:
18489 if (emit)
18491 if (TARGET_LDRD)
18492 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18493 else
18494 output_asm_insn ("stm%?\t%m0, %M1", operands);
18496 break;
18498 case PRE_INC:
18499 gcc_assert (TARGET_LDRD);
18500 if (emit)
18501 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18502 break;
18504 case PRE_DEC:
18505 if (emit)
18507 if (TARGET_LDRD)
18508 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18509 else
18510 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18512 break;
18514 case POST_INC:
18515 if (emit)
18517 if (TARGET_LDRD)
18518 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18519 else
18520 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18522 break;
18524 case POST_DEC:
18525 gcc_assert (TARGET_LDRD);
18526 if (emit)
18527 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18528 break;
18530 case PRE_MODIFY:
18531 case POST_MODIFY:
18532 otherops[0] = operands[1];
18533 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18534 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18536 /* IWMMXT allows offsets larger than ldrd can handle,
18537 fix these up with a pair of ldr. */
18538 if (!TARGET_THUMB2
18539 && CONST_INT_P (otherops[2])
18540 && (INTVAL(otherops[2]) <= -256
18541 || INTVAL(otherops[2]) >= 256))
18543 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18545 if (emit)
18547 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18548 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18550 if (count)
18551 *count = 2;
18553 else
18555 if (emit)
18557 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18558 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18560 if (count)
18561 *count = 2;
18564 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18566 if (emit)
18567 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18569 else
18571 if (emit)
18572 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18574 break;
18576 case PLUS:
18577 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18578 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18580 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18582 case -8:
18583 if (emit)
18584 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18585 return "";
18587 case -4:
18588 if (TARGET_THUMB2)
18589 break;
18590 if (emit)
18591 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18592 return "";
18594 case 4:
18595 if (TARGET_THUMB2)
18596 break;
18597 if (emit)
18598 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18599 return "";
18602 if (TARGET_LDRD
18603 && (REG_P (otherops[2])
18604 || TARGET_THUMB2
18605 || (CONST_INT_P (otherops[2])
18606 && INTVAL (otherops[2]) > -256
18607 && INTVAL (otherops[2]) < 256)))
18609 otherops[0] = operands[1];
18610 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18611 if (emit)
18612 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18613 return "";
18615 /* Fall through */
18617 default:
18618 otherops[0] = adjust_address (operands[0], SImode, 4);
18619 otherops[1] = operands[1];
18620 if (emit)
18622 output_asm_insn ("str%?\t%1, %0", operands);
18623 output_asm_insn ("str%?\t%H1, %0", otherops);
18625 if (count)
18626 *count = 2;
18630 return "";
18633 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18634 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18636 const char *
18637 output_move_quad (rtx *operands)
18639 if (REG_P (operands[0]))
18641 /* Load, or reg->reg move. */
18643 if (MEM_P (operands[1]))
18645 switch (GET_CODE (XEXP (operands[1], 0)))
18647 case REG:
18648 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18649 break;
18651 case LABEL_REF:
18652 case CONST:
18653 output_asm_insn ("adr%?\t%0, %1", operands);
18654 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18655 break;
18657 default:
18658 gcc_unreachable ();
18661 else
18663 rtx ops[2];
18664 int dest, src, i;
18666 gcc_assert (REG_P (operands[1]));
18668 dest = REGNO (operands[0]);
18669 src = REGNO (operands[1]);
18671 /* This seems pretty dumb, but hopefully GCC won't try to do it
18672 very often. */
18673 if (dest < src)
18674 for (i = 0; i < 4; i++)
18676 ops[0] = gen_rtx_REG (SImode, dest + i);
18677 ops[1] = gen_rtx_REG (SImode, src + i);
18678 output_asm_insn ("mov%?\t%0, %1", ops);
18680 else
18681 for (i = 3; i >= 0; i--)
18683 ops[0] = gen_rtx_REG (SImode, dest + i);
18684 ops[1] = gen_rtx_REG (SImode, src + i);
18685 output_asm_insn ("mov%?\t%0, %1", ops);
18689 else
18691 gcc_assert (MEM_P (operands[0]));
18692 gcc_assert (REG_P (operands[1]));
18693 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18695 switch (GET_CODE (XEXP (operands[0], 0)))
18697 case REG:
18698 output_asm_insn ("stm%?\t%m0, %M1", operands);
18699 break;
18701 default:
18702 gcc_unreachable ();
18706 return "";
18709 /* Output a VFP load or store instruction. */
18711 const char *
18712 output_move_vfp (rtx *operands)
18714 rtx reg, mem, addr, ops[2];
18715 int load = REG_P (operands[0]);
18716 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18717 int sp = (!TARGET_VFP_FP16INST
18718 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18719 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18720 const char *templ;
18721 char buff[50];
18722 machine_mode mode;
18724 reg = operands[!load];
18725 mem = operands[load];
18727 mode = GET_MODE (reg);
18729 gcc_assert (REG_P (reg));
18730 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18731 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18732 || mode == SFmode
18733 || mode == DFmode
18734 || mode == HImode
18735 || mode == SImode
18736 || mode == DImode
18737 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18738 gcc_assert (MEM_P (mem));
18740 addr = XEXP (mem, 0);
18742 switch (GET_CODE (addr))
18744 case PRE_DEC:
18745 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18746 ops[0] = XEXP (addr, 0);
18747 ops[1] = reg;
18748 break;
18750 case POST_INC:
18751 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18752 ops[0] = XEXP (addr, 0);
18753 ops[1] = reg;
18754 break;
18756 default:
18757 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18758 ops[0] = reg;
18759 ops[1] = mem;
18760 break;
18763 sprintf (buff, templ,
18764 load ? "ld" : "st",
18765 dp ? "64" : sp ? "32" : "16",
18766 dp ? "P" : "",
18767 integer_p ? "\t%@ int" : "");
18768 output_asm_insn (buff, ops);
18770 return "";
18773 /* Output a Neon double-word or quad-word load or store, or a load
18774 or store for larger structure modes.
18776 WARNING: The ordering of elements is weird in big-endian mode,
18777 because the EABI requires that vectors stored in memory appear
18778 as though they were stored by a VSTM, as required by the EABI.
18779 GCC RTL defines element ordering based on in-memory order.
18780 This can be different from the architectural ordering of elements
18781 within a NEON register. The intrinsics defined in arm_neon.h use the
18782 NEON register element ordering, not the GCC RTL element ordering.
18784 For example, the in-memory ordering of a big-endian a quadword
18785 vector with 16-bit elements when stored from register pair {d0,d1}
18786 will be (lowest address first, d0[N] is NEON register element N):
18788 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18790 When necessary, quadword registers (dN, dN+1) are moved to ARM
18791 registers from rN in the order:
18793 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18795 So that STM/LDM can be used on vectors in ARM registers, and the
18796 same memory layout will result as if VSTM/VLDM were used.
18798 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18799 possible, which allows use of appropriate alignment tags.
18800 Note that the choice of "64" is independent of the actual vector
18801 element size; this size simply ensures that the behavior is
18802 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18804 Due to limitations of those instructions, use of VST1.64/VLD1.64
18805 is not possible if:
18806 - the address contains PRE_DEC, or
18807 - the mode refers to more than 4 double-word registers
18809 In those cases, it would be possible to replace VSTM/VLDM by a
18810 sequence of instructions; this is not currently implemented since
18811 this is not certain to actually improve performance. */
18813 const char *
18814 output_move_neon (rtx *operands)
18816 rtx reg, mem, addr, ops[2];
18817 int regno, nregs, load = REG_P (operands[0]);
18818 const char *templ;
18819 char buff[50];
18820 machine_mode mode;
18822 reg = operands[!load];
18823 mem = operands[load];
18825 mode = GET_MODE (reg);
18827 gcc_assert (REG_P (reg));
18828 regno = REGNO (reg);
18829 nregs = REG_NREGS (reg) / 2;
18830 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18831 || NEON_REGNO_OK_FOR_QUAD (regno));
18832 gcc_assert (VALID_NEON_DREG_MODE (mode)
18833 || VALID_NEON_QREG_MODE (mode)
18834 || VALID_NEON_STRUCT_MODE (mode));
18835 gcc_assert (MEM_P (mem));
18837 addr = XEXP (mem, 0);
18839 /* Strip off const from addresses like (const (plus (...))). */
18840 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18841 addr = XEXP (addr, 0);
18843 switch (GET_CODE (addr))
18845 case POST_INC:
18846 /* We have to use vldm / vstm for too-large modes. */
18847 if (nregs > 4)
18849 templ = "v%smia%%?\t%%0!, %%h1";
18850 ops[0] = XEXP (addr, 0);
18852 else
18854 templ = "v%s1.64\t%%h1, %%A0";
18855 ops[0] = mem;
18857 ops[1] = reg;
18858 break;
18860 case PRE_DEC:
18861 /* We have to use vldm / vstm in this case, since there is no
18862 pre-decrement form of the vld1 / vst1 instructions. */
18863 templ = "v%smdb%%?\t%%0!, %%h1";
18864 ops[0] = XEXP (addr, 0);
18865 ops[1] = reg;
18866 break;
18868 case POST_MODIFY:
18869 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18870 gcc_unreachable ();
18872 case REG:
18873 /* We have to use vldm / vstm for too-large modes. */
18874 if (nregs > 1)
18876 if (nregs > 4)
18877 templ = "v%smia%%?\t%%m0, %%h1";
18878 else
18879 templ = "v%s1.64\t%%h1, %%A0";
18881 ops[0] = mem;
18882 ops[1] = reg;
18883 break;
18885 /* Fall through. */
18886 case LABEL_REF:
18887 case PLUS:
18889 int i;
18890 int overlap = -1;
18891 for (i = 0; i < nregs; i++)
18893 /* We're only using DImode here because it's a convenient size. */
18894 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18895 ops[1] = adjust_address (mem, DImode, 8 * i);
18896 if (reg_overlap_mentioned_p (ops[0], mem))
18898 gcc_assert (overlap == -1);
18899 overlap = i;
18901 else
18903 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18904 output_asm_insn (buff, ops);
18907 if (overlap != -1)
18909 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18910 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18911 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18912 output_asm_insn (buff, ops);
18915 return "";
18918 default:
18919 gcc_unreachable ();
18922 sprintf (buff, templ, load ? "ld" : "st");
18923 output_asm_insn (buff, ops);
18925 return "";
18928 /* Compute and return the length of neon_mov<mode>, where <mode> is
18929 one of VSTRUCT modes: EI, OI, CI or XI. */
18931 arm_attr_length_move_neon (rtx_insn *insn)
18933 rtx reg, mem, addr;
18934 int load;
18935 machine_mode mode;
18937 extract_insn_cached (insn);
18939 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18941 mode = GET_MODE (recog_data.operand[0]);
18942 switch (mode)
18944 case E_EImode:
18945 case E_OImode:
18946 return 8;
18947 case E_CImode:
18948 return 12;
18949 case E_XImode:
18950 return 16;
18951 default:
18952 gcc_unreachable ();
18956 load = REG_P (recog_data.operand[0]);
18957 reg = recog_data.operand[!load];
18958 mem = recog_data.operand[load];
18960 gcc_assert (MEM_P (mem));
18962 addr = XEXP (mem, 0);
18964 /* Strip off const from addresses like (const (plus (...))). */
18965 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18966 addr = XEXP (addr, 0);
18968 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18970 int insns = REG_NREGS (reg) / 2;
18971 return insns * 4;
18973 else
18974 return 4;
18977 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18978 return zero. */
18981 arm_address_offset_is_imm (rtx_insn *insn)
18983 rtx mem, addr;
18985 extract_insn_cached (insn);
18987 if (REG_P (recog_data.operand[0]))
18988 return 0;
18990 mem = recog_data.operand[0];
18992 gcc_assert (MEM_P (mem));
18994 addr = XEXP (mem, 0);
18996 if (REG_P (addr)
18997 || (GET_CODE (addr) == PLUS
18998 && REG_P (XEXP (addr, 0))
18999 && CONST_INT_P (XEXP (addr, 1))))
19000 return 1;
19001 else
19002 return 0;
19005 /* Output an ADD r, s, #n where n may be too big for one instruction.
19006 If adding zero to one register, output nothing. */
19007 const char *
19008 output_add_immediate (rtx *operands)
19010 HOST_WIDE_INT n = INTVAL (operands[2]);
19012 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19014 if (n < 0)
19015 output_multi_immediate (operands,
19016 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19017 -n);
19018 else
19019 output_multi_immediate (operands,
19020 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19024 return "";
19027 /* Output a multiple immediate operation.
19028 OPERANDS is the vector of operands referred to in the output patterns.
19029 INSTR1 is the output pattern to use for the first constant.
19030 INSTR2 is the output pattern to use for subsequent constants.
19031 IMMED_OP is the index of the constant slot in OPERANDS.
19032 N is the constant value. */
19033 static const char *
19034 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19035 int immed_op, HOST_WIDE_INT n)
19037 #if HOST_BITS_PER_WIDE_INT > 32
19038 n &= 0xffffffff;
19039 #endif
19041 if (n == 0)
19043 /* Quick and easy output. */
19044 operands[immed_op] = const0_rtx;
19045 output_asm_insn (instr1, operands);
19047 else
19049 int i;
19050 const char * instr = instr1;
19052 /* Note that n is never zero here (which would give no output). */
19053 for (i = 0; i < 32; i += 2)
19055 if (n & (3 << i))
19057 operands[immed_op] = GEN_INT (n & (255 << i));
19058 output_asm_insn (instr, operands);
19059 instr = instr2;
19060 i += 6;
19065 return "";
19068 /* Return the name of a shifter operation. */
19069 static const char *
19070 arm_shift_nmem(enum rtx_code code)
19072 switch (code)
19074 case ASHIFT:
19075 return ARM_LSL_NAME;
19077 case ASHIFTRT:
19078 return "asr";
19080 case LSHIFTRT:
19081 return "lsr";
19083 case ROTATERT:
19084 return "ror";
19086 default:
19087 abort();
19091 /* Return the appropriate ARM instruction for the operation code.
19092 The returned result should not be overwritten. OP is the rtx of the
19093 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19094 was shifted. */
19095 const char *
19096 arithmetic_instr (rtx op, int shift_first_arg)
19098 switch (GET_CODE (op))
19100 case PLUS:
19101 return "add";
19103 case MINUS:
19104 return shift_first_arg ? "rsb" : "sub";
19106 case IOR:
19107 return "orr";
19109 case XOR:
19110 return "eor";
19112 case AND:
19113 return "and";
19115 case ASHIFT:
19116 case ASHIFTRT:
19117 case LSHIFTRT:
19118 case ROTATERT:
19119 return arm_shift_nmem(GET_CODE(op));
19121 default:
19122 gcc_unreachable ();
19126 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19127 for the operation code. The returned result should not be overwritten.
19128 OP is the rtx code of the shift.
19129 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19130 shift. */
19131 static const char *
19132 shift_op (rtx op, HOST_WIDE_INT *amountp)
19134 const char * mnem;
19135 enum rtx_code code = GET_CODE (op);
19137 switch (code)
19139 case ROTATE:
19140 if (!CONST_INT_P (XEXP (op, 1)))
19142 output_operand_lossage ("invalid shift operand");
19143 return NULL;
19146 code = ROTATERT;
19147 *amountp = 32 - INTVAL (XEXP (op, 1));
19148 mnem = "ror";
19149 break;
19151 case ASHIFT:
19152 case ASHIFTRT:
19153 case LSHIFTRT:
19154 case ROTATERT:
19155 mnem = arm_shift_nmem(code);
19156 if (CONST_INT_P (XEXP (op, 1)))
19158 *amountp = INTVAL (XEXP (op, 1));
19160 else if (REG_P (XEXP (op, 1)))
19162 *amountp = -1;
19163 return mnem;
19165 else
19167 output_operand_lossage ("invalid shift operand");
19168 return NULL;
19170 break;
19172 case MULT:
19173 /* We never have to worry about the amount being other than a
19174 power of 2, since this case can never be reloaded from a reg. */
19175 if (!CONST_INT_P (XEXP (op, 1)))
19177 output_operand_lossage ("invalid shift operand");
19178 return NULL;
19181 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19183 /* Amount must be a power of two. */
19184 if (*amountp & (*amountp - 1))
19186 output_operand_lossage ("invalid shift operand");
19187 return NULL;
19190 *amountp = exact_log2 (*amountp);
19191 gcc_assert (IN_RANGE (*amountp, 0, 31));
19192 return ARM_LSL_NAME;
19194 default:
19195 output_operand_lossage ("invalid shift operand");
19196 return NULL;
19199 /* This is not 100% correct, but follows from the desire to merge
19200 multiplication by a power of 2 with the recognizer for a
19201 shift. >=32 is not a valid shift for "lsl", so we must try and
19202 output a shift that produces the correct arithmetical result.
19203 Using lsr #32 is identical except for the fact that the carry bit
19204 is not set correctly if we set the flags; but we never use the
19205 carry bit from such an operation, so we can ignore that. */
19206 if (code == ROTATERT)
19207 /* Rotate is just modulo 32. */
19208 *amountp &= 31;
19209 else if (*amountp != (*amountp & 31))
19211 if (code == ASHIFT)
19212 mnem = "lsr";
19213 *amountp = 32;
19216 /* Shifts of 0 are no-ops. */
19217 if (*amountp == 0)
19218 return NULL;
19220 return mnem;
19223 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19224 because /bin/as is horribly restrictive. The judgement about
19225 whether or not each character is 'printable' (and can be output as
19226 is) or not (and must be printed with an octal escape) must be made
19227 with reference to the *host* character set -- the situation is
19228 similar to that discussed in the comments above pp_c_char in
19229 c-pretty-print.c. */
19231 #define MAX_ASCII_LEN 51
19233 void
19234 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19236 int i;
19237 int len_so_far = 0;
19239 fputs ("\t.ascii\t\"", stream);
19241 for (i = 0; i < len; i++)
19243 int c = p[i];
19245 if (len_so_far >= MAX_ASCII_LEN)
19247 fputs ("\"\n\t.ascii\t\"", stream);
19248 len_so_far = 0;
19251 if (ISPRINT (c))
19253 if (c == '\\' || c == '\"')
19255 putc ('\\', stream);
19256 len_so_far++;
19258 putc (c, stream);
19259 len_so_far++;
19261 else
19263 fprintf (stream, "\\%03o", c);
19264 len_so_far += 4;
19268 fputs ("\"\n", stream);
19271 /* Whether a register is callee saved or not. This is necessary because high
19272 registers are marked as caller saved when optimizing for size on Thumb-1
19273 targets despite being callee saved in order to avoid using them. */
19274 #define callee_saved_reg_p(reg) \
19275 (!call_used_regs[reg] \
19276 || (TARGET_THUMB1 && optimize_size \
19277 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19279 /* Compute the register save mask for registers 0 through 12
19280 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19282 static unsigned long
19283 arm_compute_save_reg0_reg12_mask (void)
19285 unsigned long func_type = arm_current_func_type ();
19286 unsigned long save_reg_mask = 0;
19287 unsigned int reg;
19289 if (IS_INTERRUPT (func_type))
19291 unsigned int max_reg;
19292 /* Interrupt functions must not corrupt any registers,
19293 even call clobbered ones. If this is a leaf function
19294 we can just examine the registers used by the RTL, but
19295 otherwise we have to assume that whatever function is
19296 called might clobber anything, and so we have to save
19297 all the call-clobbered registers as well. */
19298 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19299 /* FIQ handlers have registers r8 - r12 banked, so
19300 we only need to check r0 - r7, Normal ISRs only
19301 bank r14 and r15, so we must check up to r12.
19302 r13 is the stack pointer which is always preserved,
19303 so we do not need to consider it here. */
19304 max_reg = 7;
19305 else
19306 max_reg = 12;
19308 for (reg = 0; reg <= max_reg; reg++)
19309 if (df_regs_ever_live_p (reg)
19310 || (! crtl->is_leaf && call_used_regs[reg]))
19311 save_reg_mask |= (1 << reg);
19313 /* Also save the pic base register if necessary. */
19314 if (flag_pic
19315 && !TARGET_SINGLE_PIC_BASE
19316 && arm_pic_register != INVALID_REGNUM
19317 && crtl->uses_pic_offset_table)
19318 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19320 else if (IS_VOLATILE(func_type))
19322 /* For noreturn functions we historically omitted register saves
19323 altogether. However this really messes up debugging. As a
19324 compromise save just the frame pointers. Combined with the link
19325 register saved elsewhere this should be sufficient to get
19326 a backtrace. */
19327 if (frame_pointer_needed)
19328 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19329 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19330 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19331 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19332 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19334 else
19336 /* In the normal case we only need to save those registers
19337 which are call saved and which are used by this function. */
19338 for (reg = 0; reg <= 11; reg++)
19339 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19340 save_reg_mask |= (1 << reg);
19342 /* Handle the frame pointer as a special case. */
19343 if (frame_pointer_needed)
19344 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19346 /* If we aren't loading the PIC register,
19347 don't stack it even though it may be live. */
19348 if (flag_pic
19349 && !TARGET_SINGLE_PIC_BASE
19350 && arm_pic_register != INVALID_REGNUM
19351 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19352 || crtl->uses_pic_offset_table))
19353 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19355 /* The prologue will copy SP into R0, so save it. */
19356 if (IS_STACKALIGN (func_type))
19357 save_reg_mask |= 1;
19360 /* Save registers so the exception handler can modify them. */
19361 if (crtl->calls_eh_return)
19363 unsigned int i;
19365 for (i = 0; ; i++)
19367 reg = EH_RETURN_DATA_REGNO (i);
19368 if (reg == INVALID_REGNUM)
19369 break;
19370 save_reg_mask |= 1 << reg;
19374 return save_reg_mask;
19377 /* Return true if r3 is live at the start of the function. */
19379 static bool
19380 arm_r3_live_at_start_p (void)
19382 /* Just look at cfg info, which is still close enough to correct at this
19383 point. This gives false positives for broken functions that might use
19384 uninitialized data that happens to be allocated in r3, but who cares? */
19385 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19388 /* Compute the number of bytes used to store the static chain register on the
19389 stack, above the stack frame. We need to know this accurately to get the
19390 alignment of the rest of the stack frame correct. */
19392 static int
19393 arm_compute_static_chain_stack_bytes (void)
19395 /* Once the value is updated from the init value of -1, do not
19396 re-compute. */
19397 if (cfun->machine->static_chain_stack_bytes != -1)
19398 return cfun->machine->static_chain_stack_bytes;
19400 /* See the defining assertion in arm_expand_prologue. */
19401 if (IS_NESTED (arm_current_func_type ())
19402 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19403 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19404 || flag_stack_clash_protection)
19405 && !df_regs_ever_live_p (LR_REGNUM)))
19406 && arm_r3_live_at_start_p ()
19407 && crtl->args.pretend_args_size == 0)
19408 return 4;
19410 return 0;
19413 /* Compute a bit mask of which core registers need to be
19414 saved on the stack for the current function.
19415 This is used by arm_compute_frame_layout, which may add extra registers. */
19417 static unsigned long
19418 arm_compute_save_core_reg_mask (void)
19420 unsigned int save_reg_mask = 0;
19421 unsigned long func_type = arm_current_func_type ();
19422 unsigned int reg;
19424 if (IS_NAKED (func_type))
19425 /* This should never really happen. */
19426 return 0;
19428 /* If we are creating a stack frame, then we must save the frame pointer,
19429 IP (which will hold the old stack pointer), LR and the PC. */
19430 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19431 save_reg_mask |=
19432 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19433 | (1 << IP_REGNUM)
19434 | (1 << LR_REGNUM)
19435 | (1 << PC_REGNUM);
19437 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19439 /* Decide if we need to save the link register.
19440 Interrupt routines have their own banked link register,
19441 so they never need to save it.
19442 Otherwise if we do not use the link register we do not need to save
19443 it. If we are pushing other registers onto the stack however, we
19444 can save an instruction in the epilogue by pushing the link register
19445 now and then popping it back into the PC. This incurs extra memory
19446 accesses though, so we only do it when optimizing for size, and only
19447 if we know that we will not need a fancy return sequence. */
19448 if (df_regs_ever_live_p (LR_REGNUM)
19449 || (save_reg_mask
19450 && optimize_size
19451 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19452 && !crtl->tail_call_emit
19453 && !crtl->calls_eh_return))
19454 save_reg_mask |= 1 << LR_REGNUM;
19456 if (cfun->machine->lr_save_eliminated)
19457 save_reg_mask &= ~ (1 << LR_REGNUM);
19459 if (TARGET_REALLY_IWMMXT
19460 && ((bit_count (save_reg_mask)
19461 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19462 arm_compute_static_chain_stack_bytes())
19463 ) % 2) != 0)
19465 /* The total number of registers that are going to be pushed
19466 onto the stack is odd. We need to ensure that the stack
19467 is 64-bit aligned before we start to save iWMMXt registers,
19468 and also before we start to create locals. (A local variable
19469 might be a double or long long which we will load/store using
19470 an iWMMXt instruction). Therefore we need to push another
19471 ARM register, so that the stack will be 64-bit aligned. We
19472 try to avoid using the arg registers (r0 -r3) as they might be
19473 used to pass values in a tail call. */
19474 for (reg = 4; reg <= 12; reg++)
19475 if ((save_reg_mask & (1 << reg)) == 0)
19476 break;
19478 if (reg <= 12)
19479 save_reg_mask |= (1 << reg);
19480 else
19482 cfun->machine->sibcall_blocked = 1;
19483 save_reg_mask |= (1 << 3);
19487 /* We may need to push an additional register for use initializing the
19488 PIC base register. */
19489 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19490 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19492 reg = thumb_find_work_register (1 << 4);
19493 if (!call_used_regs[reg])
19494 save_reg_mask |= (1 << reg);
19497 return save_reg_mask;
19500 /* Compute a bit mask of which core registers need to be
19501 saved on the stack for the current function. */
19502 static unsigned long
19503 thumb1_compute_save_core_reg_mask (void)
19505 unsigned long mask;
19506 unsigned reg;
19508 mask = 0;
19509 for (reg = 0; reg < 12; reg ++)
19510 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19511 mask |= 1 << reg;
19513 /* Handle the frame pointer as a special case. */
19514 if (frame_pointer_needed)
19515 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19517 if (flag_pic
19518 && !TARGET_SINGLE_PIC_BASE
19519 && arm_pic_register != INVALID_REGNUM
19520 && crtl->uses_pic_offset_table)
19521 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19523 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19524 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19525 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19527 /* LR will also be pushed if any lo regs are pushed. */
19528 if (mask & 0xff || thumb_force_lr_save ())
19529 mask |= (1 << LR_REGNUM);
19531 /* Make sure we have a low work register if we need one.
19532 We will need one if we are going to push a high register,
19533 but we are not currently intending to push a low register. */
19534 if ((mask & 0xff) == 0
19535 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19537 /* Use thumb_find_work_register to choose which register
19538 we will use. If the register is live then we will
19539 have to push it. Use LAST_LO_REGNUM as our fallback
19540 choice for the register to select. */
19541 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19542 /* Make sure the register returned by thumb_find_work_register is
19543 not part of the return value. */
19544 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19545 reg = LAST_LO_REGNUM;
19547 if (callee_saved_reg_p (reg))
19548 mask |= 1 << reg;
19551 /* The 504 below is 8 bytes less than 512 because there are two possible
19552 alignment words. We can't tell here if they will be present or not so we
19553 have to play it safe and assume that they are. */
19554 if ((CALLER_INTERWORKING_SLOT_SIZE +
19555 ROUND_UP_WORD (get_frame_size ()) +
19556 crtl->outgoing_args_size) >= 504)
19558 /* This is the same as the code in thumb1_expand_prologue() which
19559 determines which register to use for stack decrement. */
19560 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19561 if (mask & (1 << reg))
19562 break;
19564 if (reg > LAST_LO_REGNUM)
19566 /* Make sure we have a register available for stack decrement. */
19567 mask |= 1 << LAST_LO_REGNUM;
19571 return mask;
19575 /* Return the number of bytes required to save VFP registers. */
19576 static int
19577 arm_get_vfp_saved_size (void)
19579 unsigned int regno;
19580 int count;
19581 int saved;
19583 saved = 0;
19584 /* Space for saved VFP registers. */
19585 if (TARGET_HARD_FLOAT)
19587 count = 0;
19588 for (regno = FIRST_VFP_REGNUM;
19589 regno < LAST_VFP_REGNUM;
19590 regno += 2)
19592 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19593 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19595 if (count > 0)
19597 /* Workaround ARM10 VFPr1 bug. */
19598 if (count == 2 && !arm_arch6)
19599 count++;
19600 saved += count * 8;
19602 count = 0;
19604 else
19605 count++;
19607 if (count > 0)
19609 if (count == 2 && !arm_arch6)
19610 count++;
19611 saved += count * 8;
19614 return saved;
19618 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19619 everything bar the final return instruction. If simple_return is true,
19620 then do not output epilogue, because it has already been emitted in RTL.
19622 Note: do not forget to update length attribute of corresponding insn pattern
19623 when changing assembly output (eg. length attribute of
19624 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19625 register clearing sequences). */
19626 const char *
19627 output_return_instruction (rtx operand, bool really_return, bool reverse,
19628 bool simple_return)
19630 char conditional[10];
19631 char instr[100];
19632 unsigned reg;
19633 unsigned long live_regs_mask;
19634 unsigned long func_type;
19635 arm_stack_offsets *offsets;
19637 func_type = arm_current_func_type ();
19639 if (IS_NAKED (func_type))
19640 return "";
19642 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19644 /* If this function was declared non-returning, and we have
19645 found a tail call, then we have to trust that the called
19646 function won't return. */
19647 if (really_return)
19649 rtx ops[2];
19651 /* Otherwise, trap an attempted return by aborting. */
19652 ops[0] = operand;
19653 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19654 : "abort");
19655 assemble_external_libcall (ops[1]);
19656 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19659 return "";
19662 gcc_assert (!cfun->calls_alloca || really_return);
19664 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19666 cfun->machine->return_used_this_function = 1;
19668 offsets = arm_get_frame_offsets ();
19669 live_regs_mask = offsets->saved_regs_mask;
19671 if (!simple_return && live_regs_mask)
19673 const char * return_reg;
19675 /* If we do not have any special requirements for function exit
19676 (e.g. interworking) then we can load the return address
19677 directly into the PC. Otherwise we must load it into LR. */
19678 if (really_return
19679 && !IS_CMSE_ENTRY (func_type)
19680 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19681 return_reg = reg_names[PC_REGNUM];
19682 else
19683 return_reg = reg_names[LR_REGNUM];
19685 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19687 /* There are three possible reasons for the IP register
19688 being saved. 1) a stack frame was created, in which case
19689 IP contains the old stack pointer, or 2) an ISR routine
19690 corrupted it, or 3) it was saved to align the stack on
19691 iWMMXt. In case 1, restore IP into SP, otherwise just
19692 restore IP. */
19693 if (frame_pointer_needed)
19695 live_regs_mask &= ~ (1 << IP_REGNUM);
19696 live_regs_mask |= (1 << SP_REGNUM);
19698 else
19699 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19702 /* On some ARM architectures it is faster to use LDR rather than
19703 LDM to load a single register. On other architectures, the
19704 cost is the same. In 26 bit mode, or for exception handlers,
19705 we have to use LDM to load the PC so that the CPSR is also
19706 restored. */
19707 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19708 if (live_regs_mask == (1U << reg))
19709 break;
19711 if (reg <= LAST_ARM_REGNUM
19712 && (reg != LR_REGNUM
19713 || ! really_return
19714 || ! IS_INTERRUPT (func_type)))
19716 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19717 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19719 else
19721 char *p;
19722 int first = 1;
19724 /* Generate the load multiple instruction to restore the
19725 registers. Note we can get here, even if
19726 frame_pointer_needed is true, but only if sp already
19727 points to the base of the saved core registers. */
19728 if (live_regs_mask & (1 << SP_REGNUM))
19730 unsigned HOST_WIDE_INT stack_adjust;
19732 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19733 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19735 if (stack_adjust && arm_arch5 && TARGET_ARM)
19736 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19737 else
19739 /* If we can't use ldmib (SA110 bug),
19740 then try to pop r3 instead. */
19741 if (stack_adjust)
19742 live_regs_mask |= 1 << 3;
19744 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19747 /* For interrupt returns we have to use an LDM rather than
19748 a POP so that we can use the exception return variant. */
19749 else if (IS_INTERRUPT (func_type))
19750 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19751 else
19752 sprintf (instr, "pop%s\t{", conditional);
19754 p = instr + strlen (instr);
19756 for (reg = 0; reg <= SP_REGNUM; reg++)
19757 if (live_regs_mask & (1 << reg))
19759 int l = strlen (reg_names[reg]);
19761 if (first)
19762 first = 0;
19763 else
19765 memcpy (p, ", ", 2);
19766 p += 2;
19769 memcpy (p, "%|", 2);
19770 memcpy (p + 2, reg_names[reg], l);
19771 p += l + 2;
19774 if (live_regs_mask & (1 << LR_REGNUM))
19776 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19777 /* If returning from an interrupt, restore the CPSR. */
19778 if (IS_INTERRUPT (func_type))
19779 strcat (p, "^");
19781 else
19782 strcpy (p, "}");
19785 output_asm_insn (instr, & operand);
19787 /* See if we need to generate an extra instruction to
19788 perform the actual function return. */
19789 if (really_return
19790 && func_type != ARM_FT_INTERWORKED
19791 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19793 /* The return has already been handled
19794 by loading the LR into the PC. */
19795 return "";
19799 if (really_return)
19801 switch ((int) ARM_FUNC_TYPE (func_type))
19803 case ARM_FT_ISR:
19804 case ARM_FT_FIQ:
19805 /* ??? This is wrong for unified assembly syntax. */
19806 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19807 break;
19809 case ARM_FT_INTERWORKED:
19810 gcc_assert (arm_arch5 || arm_arch4t);
19811 sprintf (instr, "bx%s\t%%|lr", conditional);
19812 break;
19814 case ARM_FT_EXCEPTION:
19815 /* ??? This is wrong for unified assembly syntax. */
19816 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19817 break;
19819 default:
19820 if (IS_CMSE_ENTRY (func_type))
19822 /* Check if we have to clear the 'GE bits' which is only used if
19823 parallel add and subtraction instructions are available. */
19824 if (TARGET_INT_SIMD)
19825 snprintf (instr, sizeof (instr),
19826 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19827 else
19828 snprintf (instr, sizeof (instr),
19829 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19831 output_asm_insn (instr, & operand);
19832 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19834 /* Clear the cumulative exception-status bits (0-4,7) and the
19835 condition code bits (28-31) of the FPSCR. We need to
19836 remember to clear the first scratch register used (IP) and
19837 save and restore the second (r4). */
19838 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19839 output_asm_insn (instr, & operand);
19840 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19841 output_asm_insn (instr, & operand);
19842 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19843 output_asm_insn (instr, & operand);
19844 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19845 output_asm_insn (instr, & operand);
19846 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19847 output_asm_insn (instr, & operand);
19848 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19849 output_asm_insn (instr, & operand);
19850 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19851 output_asm_insn (instr, & operand);
19852 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19853 output_asm_insn (instr, & operand);
19855 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19857 /* Use bx if it's available. */
19858 else if (arm_arch5 || arm_arch4t)
19859 sprintf (instr, "bx%s\t%%|lr", conditional);
19860 else
19861 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19862 break;
19865 output_asm_insn (instr, & operand);
19868 return "";
19871 /* Output in FILE asm statements needed to declare the NAME of the function
19872 defined by its DECL node. */
19874 void
19875 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19877 size_t cmse_name_len;
19878 char *cmse_name = 0;
19879 char cmse_prefix[] = "__acle_se_";
19881 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19882 extra function label for each function with the 'cmse_nonsecure_entry'
19883 attribute. This extra function label should be prepended with
19884 '__acle_se_', telling the linker that it needs to create secure gateway
19885 veneers for this function. */
19886 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19887 DECL_ATTRIBUTES (decl)))
19889 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19890 cmse_name = XALLOCAVEC (char, cmse_name_len);
19891 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19892 targetm.asm_out.globalize_label (file, cmse_name);
19894 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19895 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19898 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19899 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19900 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19901 ASM_OUTPUT_LABEL (file, name);
19903 if (cmse_name)
19904 ASM_OUTPUT_LABEL (file, cmse_name);
19906 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19909 /* Write the function name into the code section, directly preceding
19910 the function prologue.
19912 Code will be output similar to this:
19914 .ascii "arm_poke_function_name", 0
19915 .align
19917 .word 0xff000000 + (t1 - t0)
19918 arm_poke_function_name
19919 mov ip, sp
19920 stmfd sp!, {fp, ip, lr, pc}
19921 sub fp, ip, #4
19923 When performing a stack backtrace, code can inspect the value
19924 of 'pc' stored at 'fp' + 0. If the trace function then looks
19925 at location pc - 12 and the top 8 bits are set, then we know
19926 that there is a function name embedded immediately preceding this
19927 location and has length ((pc[-3]) & 0xff000000).
19929 We assume that pc is declared as a pointer to an unsigned long.
19931 It is of no benefit to output the function name if we are assembling
19932 a leaf function. These function types will not contain a stack
19933 backtrace structure, therefore it is not possible to determine the
19934 function name. */
19935 void
19936 arm_poke_function_name (FILE *stream, const char *name)
19938 unsigned long alignlength;
19939 unsigned long length;
19940 rtx x;
19942 length = strlen (name) + 1;
19943 alignlength = ROUND_UP_WORD (length);
19945 ASM_OUTPUT_ASCII (stream, name, length);
19946 ASM_OUTPUT_ALIGN (stream, 2);
19947 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19948 assemble_aligned_integer (UNITS_PER_WORD, x);
19951 /* Place some comments into the assembler stream
19952 describing the current function. */
19953 static void
19954 arm_output_function_prologue (FILE *f)
19956 unsigned long func_type;
19958 /* Sanity check. */
19959 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19961 func_type = arm_current_func_type ();
19963 switch ((int) ARM_FUNC_TYPE (func_type))
19965 default:
19966 case ARM_FT_NORMAL:
19967 break;
19968 case ARM_FT_INTERWORKED:
19969 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19970 break;
19971 case ARM_FT_ISR:
19972 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19973 break;
19974 case ARM_FT_FIQ:
19975 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19976 break;
19977 case ARM_FT_EXCEPTION:
19978 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19979 break;
19982 if (IS_NAKED (func_type))
19983 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19985 if (IS_VOLATILE (func_type))
19986 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19988 if (IS_NESTED (func_type))
19989 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19990 if (IS_STACKALIGN (func_type))
19991 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19992 if (IS_CMSE_ENTRY (func_type))
19993 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19995 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19996 (HOST_WIDE_INT) crtl->args.size,
19997 crtl->args.pretend_args_size,
19998 (HOST_WIDE_INT) get_frame_size ());
20000 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20001 frame_pointer_needed,
20002 cfun->machine->uses_anonymous_args);
20004 if (cfun->machine->lr_save_eliminated)
20005 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20007 if (crtl->calls_eh_return)
20008 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20012 static void
20013 arm_output_function_epilogue (FILE *)
20015 arm_stack_offsets *offsets;
20017 if (TARGET_THUMB1)
20019 int regno;
20021 /* Emit any call-via-reg trampolines that are needed for v4t support
20022 of call_reg and call_value_reg type insns. */
20023 for (regno = 0; regno < LR_REGNUM; regno++)
20025 rtx label = cfun->machine->call_via[regno];
20027 if (label != NULL)
20029 switch_to_section (function_section (current_function_decl));
20030 targetm.asm_out.internal_label (asm_out_file, "L",
20031 CODE_LABEL_NUMBER (label));
20032 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20036 /* ??? Probably not safe to set this here, since it assumes that a
20037 function will be emitted as assembly immediately after we generate
20038 RTL for it. This does not happen for inline functions. */
20039 cfun->machine->return_used_this_function = 0;
20041 else /* TARGET_32BIT */
20043 /* We need to take into account any stack-frame rounding. */
20044 offsets = arm_get_frame_offsets ();
20046 gcc_assert (!use_return_insn (FALSE, NULL)
20047 || (cfun->machine->return_used_this_function != 0)
20048 || offsets->saved_regs == offsets->outgoing_args
20049 || frame_pointer_needed);
20053 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20054 STR and STRD. If an even number of registers are being pushed, one
20055 or more STRD patterns are created for each register pair. If an
20056 odd number of registers are pushed, emit an initial STR followed by
20057 as many STRD instructions as are needed. This works best when the
20058 stack is initially 64-bit aligned (the normal case), since it
20059 ensures that each STRD is also 64-bit aligned. */
20060 static void
20061 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20063 int num_regs = 0;
20064 int i;
20065 int regno;
20066 rtx par = NULL_RTX;
20067 rtx dwarf = NULL_RTX;
20068 rtx tmp;
20069 bool first = true;
20071 num_regs = bit_count (saved_regs_mask);
20073 /* Must be at least one register to save, and can't save SP or PC. */
20074 gcc_assert (num_regs > 0 && num_regs <= 14);
20075 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20076 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20078 /* Create sequence for DWARF info. All the frame-related data for
20079 debugging is held in this wrapper. */
20080 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20082 /* Describe the stack adjustment. */
20083 tmp = gen_rtx_SET (stack_pointer_rtx,
20084 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20085 RTX_FRAME_RELATED_P (tmp) = 1;
20086 XVECEXP (dwarf, 0, 0) = tmp;
20088 /* Find the first register. */
20089 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20092 i = 0;
20094 /* If there's an odd number of registers to push. Start off by
20095 pushing a single register. This ensures that subsequent strd
20096 operations are dword aligned (assuming that SP was originally
20097 64-bit aligned). */
20098 if ((num_regs & 1) != 0)
20100 rtx reg, mem, insn;
20102 reg = gen_rtx_REG (SImode, regno);
20103 if (num_regs == 1)
20104 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20105 stack_pointer_rtx));
20106 else
20107 mem = gen_frame_mem (Pmode,
20108 gen_rtx_PRE_MODIFY
20109 (Pmode, stack_pointer_rtx,
20110 plus_constant (Pmode, stack_pointer_rtx,
20111 -4 * num_regs)));
20113 tmp = gen_rtx_SET (mem, reg);
20114 RTX_FRAME_RELATED_P (tmp) = 1;
20115 insn = emit_insn (tmp);
20116 RTX_FRAME_RELATED_P (insn) = 1;
20117 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20118 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20119 RTX_FRAME_RELATED_P (tmp) = 1;
20120 i++;
20121 regno++;
20122 XVECEXP (dwarf, 0, i) = tmp;
20123 first = false;
20126 while (i < num_regs)
20127 if (saved_regs_mask & (1 << regno))
20129 rtx reg1, reg2, mem1, mem2;
20130 rtx tmp0, tmp1, tmp2;
20131 int regno2;
20133 /* Find the register to pair with this one. */
20134 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20135 regno2++)
20138 reg1 = gen_rtx_REG (SImode, regno);
20139 reg2 = gen_rtx_REG (SImode, regno2);
20141 if (first)
20143 rtx insn;
20145 first = false;
20146 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20147 stack_pointer_rtx,
20148 -4 * num_regs));
20149 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20150 stack_pointer_rtx,
20151 -4 * (num_regs - 1)));
20152 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20153 plus_constant (Pmode, stack_pointer_rtx,
20154 -4 * (num_regs)));
20155 tmp1 = gen_rtx_SET (mem1, reg1);
20156 tmp2 = gen_rtx_SET (mem2, reg2);
20157 RTX_FRAME_RELATED_P (tmp0) = 1;
20158 RTX_FRAME_RELATED_P (tmp1) = 1;
20159 RTX_FRAME_RELATED_P (tmp2) = 1;
20160 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20161 XVECEXP (par, 0, 0) = tmp0;
20162 XVECEXP (par, 0, 1) = tmp1;
20163 XVECEXP (par, 0, 2) = tmp2;
20164 insn = emit_insn (par);
20165 RTX_FRAME_RELATED_P (insn) = 1;
20166 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20168 else
20170 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20171 stack_pointer_rtx,
20172 4 * i));
20173 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20174 stack_pointer_rtx,
20175 4 * (i + 1)));
20176 tmp1 = gen_rtx_SET (mem1, reg1);
20177 tmp2 = gen_rtx_SET (mem2, reg2);
20178 RTX_FRAME_RELATED_P (tmp1) = 1;
20179 RTX_FRAME_RELATED_P (tmp2) = 1;
20180 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20181 XVECEXP (par, 0, 0) = tmp1;
20182 XVECEXP (par, 0, 1) = tmp2;
20183 emit_insn (par);
20186 /* Create unwind information. This is an approximation. */
20187 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20188 plus_constant (Pmode,
20189 stack_pointer_rtx,
20190 4 * i)),
20191 reg1);
20192 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20193 plus_constant (Pmode,
20194 stack_pointer_rtx,
20195 4 * (i + 1))),
20196 reg2);
20198 RTX_FRAME_RELATED_P (tmp1) = 1;
20199 RTX_FRAME_RELATED_P (tmp2) = 1;
20200 XVECEXP (dwarf, 0, i + 1) = tmp1;
20201 XVECEXP (dwarf, 0, i + 2) = tmp2;
20202 i += 2;
20203 regno = regno2 + 1;
20205 else
20206 regno++;
20208 return;
20211 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20212 whenever possible, otherwise it emits single-word stores. The first store
20213 also allocates stack space for all saved registers, using writeback with
20214 post-addressing mode. All other stores use offset addressing. If no STRD
20215 can be emitted, this function emits a sequence of single-word stores,
20216 and not an STM as before, because single-word stores provide more freedom
20217 scheduling and can be turned into an STM by peephole optimizations. */
20218 static void
20219 arm_emit_strd_push (unsigned long saved_regs_mask)
20221 int num_regs = 0;
20222 int i, j, dwarf_index = 0;
20223 int offset = 0;
20224 rtx dwarf = NULL_RTX;
20225 rtx insn = NULL_RTX;
20226 rtx tmp, mem;
20228 /* TODO: A more efficient code can be emitted by changing the
20229 layout, e.g., first push all pairs that can use STRD to keep the
20230 stack aligned, and then push all other registers. */
20231 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20232 if (saved_regs_mask & (1 << i))
20233 num_regs++;
20235 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20236 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20237 gcc_assert (num_regs > 0);
20239 /* Create sequence for DWARF info. */
20240 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20242 /* For dwarf info, we generate explicit stack update. */
20243 tmp = gen_rtx_SET (stack_pointer_rtx,
20244 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20245 RTX_FRAME_RELATED_P (tmp) = 1;
20246 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20248 /* Save registers. */
20249 offset = - 4 * num_regs;
20250 j = 0;
20251 while (j <= LAST_ARM_REGNUM)
20252 if (saved_regs_mask & (1 << j))
20254 if ((j % 2 == 0)
20255 && (saved_regs_mask & (1 << (j + 1))))
20257 /* Current register and previous register form register pair for
20258 which STRD can be generated. */
20259 if (offset < 0)
20261 /* Allocate stack space for all saved registers. */
20262 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20263 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20264 mem = gen_frame_mem (DImode, tmp);
20265 offset = 0;
20267 else if (offset > 0)
20268 mem = gen_frame_mem (DImode,
20269 plus_constant (Pmode,
20270 stack_pointer_rtx,
20271 offset));
20272 else
20273 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20275 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20276 RTX_FRAME_RELATED_P (tmp) = 1;
20277 tmp = emit_insn (tmp);
20279 /* Record the first store insn. */
20280 if (dwarf_index == 1)
20281 insn = tmp;
20283 /* Generate dwarf info. */
20284 mem = gen_frame_mem (SImode,
20285 plus_constant (Pmode,
20286 stack_pointer_rtx,
20287 offset));
20288 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20289 RTX_FRAME_RELATED_P (tmp) = 1;
20290 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20292 mem = gen_frame_mem (SImode,
20293 plus_constant (Pmode,
20294 stack_pointer_rtx,
20295 offset + 4));
20296 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20297 RTX_FRAME_RELATED_P (tmp) = 1;
20298 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20300 offset += 8;
20301 j += 2;
20303 else
20305 /* Emit a single word store. */
20306 if (offset < 0)
20308 /* Allocate stack space for all saved registers. */
20309 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20310 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20311 mem = gen_frame_mem (SImode, tmp);
20312 offset = 0;
20314 else if (offset > 0)
20315 mem = gen_frame_mem (SImode,
20316 plus_constant (Pmode,
20317 stack_pointer_rtx,
20318 offset));
20319 else
20320 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20322 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20323 RTX_FRAME_RELATED_P (tmp) = 1;
20324 tmp = emit_insn (tmp);
20326 /* Record the first store insn. */
20327 if (dwarf_index == 1)
20328 insn = tmp;
20330 /* Generate dwarf info. */
20331 mem = gen_frame_mem (SImode,
20332 plus_constant(Pmode,
20333 stack_pointer_rtx,
20334 offset));
20335 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20336 RTX_FRAME_RELATED_P (tmp) = 1;
20337 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20339 offset += 4;
20340 j += 1;
20343 else
20344 j++;
20346 /* Attach dwarf info to the first insn we generate. */
20347 gcc_assert (insn != NULL_RTX);
20348 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20349 RTX_FRAME_RELATED_P (insn) = 1;
20352 /* Generate and emit an insn that we will recognize as a push_multi.
20353 Unfortunately, since this insn does not reflect very well the actual
20354 semantics of the operation, we need to annotate the insn for the benefit
20355 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20356 MASK for registers that should be annotated for DWARF2 frame unwind
20357 information. */
20358 static rtx
20359 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20361 int num_regs = 0;
20362 int num_dwarf_regs = 0;
20363 int i, j;
20364 rtx par;
20365 rtx dwarf;
20366 int dwarf_par_index;
20367 rtx tmp, reg;
20369 /* We don't record the PC in the dwarf frame information. */
20370 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20372 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20374 if (mask & (1 << i))
20375 num_regs++;
20376 if (dwarf_regs_mask & (1 << i))
20377 num_dwarf_regs++;
20380 gcc_assert (num_regs && num_regs <= 16);
20381 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20383 /* For the body of the insn we are going to generate an UNSPEC in
20384 parallel with several USEs. This allows the insn to be recognized
20385 by the push_multi pattern in the arm.md file.
20387 The body of the insn looks something like this:
20389 (parallel [
20390 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20391 (const_int:SI <num>)))
20392 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20393 (use (reg:SI XX))
20394 (use (reg:SI YY))
20398 For the frame note however, we try to be more explicit and actually
20399 show each register being stored into the stack frame, plus a (single)
20400 decrement of the stack pointer. We do it this way in order to be
20401 friendly to the stack unwinding code, which only wants to see a single
20402 stack decrement per instruction. The RTL we generate for the note looks
20403 something like this:
20405 (sequence [
20406 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20407 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20408 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20409 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20413 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20414 instead we'd have a parallel expression detailing all
20415 the stores to the various memory addresses so that debug
20416 information is more up-to-date. Remember however while writing
20417 this to take care of the constraints with the push instruction.
20419 Note also that this has to be taken care of for the VFP registers.
20421 For more see PR43399. */
20423 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20424 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20425 dwarf_par_index = 1;
20427 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20429 if (mask & (1 << i))
20431 reg = gen_rtx_REG (SImode, i);
20433 XVECEXP (par, 0, 0)
20434 = gen_rtx_SET (gen_frame_mem
20435 (BLKmode,
20436 gen_rtx_PRE_MODIFY (Pmode,
20437 stack_pointer_rtx,
20438 plus_constant
20439 (Pmode, stack_pointer_rtx,
20440 -4 * num_regs))
20442 gen_rtx_UNSPEC (BLKmode,
20443 gen_rtvec (1, reg),
20444 UNSPEC_PUSH_MULT));
20446 if (dwarf_regs_mask & (1 << i))
20448 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20449 reg);
20450 RTX_FRAME_RELATED_P (tmp) = 1;
20451 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20454 break;
20458 for (j = 1, i++; j < num_regs; i++)
20460 if (mask & (1 << i))
20462 reg = gen_rtx_REG (SImode, i);
20464 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20466 if (dwarf_regs_mask & (1 << i))
20469 = gen_rtx_SET (gen_frame_mem
20470 (SImode,
20471 plus_constant (Pmode, stack_pointer_rtx,
20472 4 * j)),
20473 reg);
20474 RTX_FRAME_RELATED_P (tmp) = 1;
20475 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20478 j++;
20482 par = emit_insn (par);
20484 tmp = gen_rtx_SET (stack_pointer_rtx,
20485 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20486 RTX_FRAME_RELATED_P (tmp) = 1;
20487 XVECEXP (dwarf, 0, 0) = tmp;
20489 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20491 return par;
20494 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20495 SIZE is the offset to be adjusted.
20496 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20497 static void
20498 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20500 rtx dwarf;
20502 RTX_FRAME_RELATED_P (insn) = 1;
20503 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20504 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20507 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20508 SAVED_REGS_MASK shows which registers need to be restored.
20510 Unfortunately, since this insn does not reflect very well the actual
20511 semantics of the operation, we need to annotate the insn for the benefit
20512 of DWARF2 frame unwind information. */
20513 static void
20514 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20516 int num_regs = 0;
20517 int i, j;
20518 rtx par;
20519 rtx dwarf = NULL_RTX;
20520 rtx tmp, reg;
20521 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20522 int offset_adj;
20523 int emit_update;
20525 offset_adj = return_in_pc ? 1 : 0;
20526 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20527 if (saved_regs_mask & (1 << i))
20528 num_regs++;
20530 gcc_assert (num_regs && num_regs <= 16);
20532 /* If SP is in reglist, then we don't emit SP update insn. */
20533 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20535 /* The parallel needs to hold num_regs SETs
20536 and one SET for the stack update. */
20537 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20539 if (return_in_pc)
20540 XVECEXP (par, 0, 0) = ret_rtx;
20542 if (emit_update)
20544 /* Increment the stack pointer, based on there being
20545 num_regs 4-byte registers to restore. */
20546 tmp = gen_rtx_SET (stack_pointer_rtx,
20547 plus_constant (Pmode,
20548 stack_pointer_rtx,
20549 4 * num_regs));
20550 RTX_FRAME_RELATED_P (tmp) = 1;
20551 XVECEXP (par, 0, offset_adj) = tmp;
20554 /* Now restore every reg, which may include PC. */
20555 for (j = 0, i = 0; j < num_regs; i++)
20556 if (saved_regs_mask & (1 << i))
20558 reg = gen_rtx_REG (SImode, i);
20559 if ((num_regs == 1) && emit_update && !return_in_pc)
20561 /* Emit single load with writeback. */
20562 tmp = gen_frame_mem (SImode,
20563 gen_rtx_POST_INC (Pmode,
20564 stack_pointer_rtx));
20565 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20566 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20567 return;
20570 tmp = gen_rtx_SET (reg,
20571 gen_frame_mem
20572 (SImode,
20573 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20574 RTX_FRAME_RELATED_P (tmp) = 1;
20575 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20577 /* We need to maintain a sequence for DWARF info too. As dwarf info
20578 should not have PC, skip PC. */
20579 if (i != PC_REGNUM)
20580 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20582 j++;
20585 if (return_in_pc)
20586 par = emit_jump_insn (par);
20587 else
20588 par = emit_insn (par);
20590 REG_NOTES (par) = dwarf;
20591 if (!return_in_pc)
20592 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20593 stack_pointer_rtx, stack_pointer_rtx);
20596 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20597 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20599 Unfortunately, since this insn does not reflect very well the actual
20600 semantics of the operation, we need to annotate the insn for the benefit
20601 of DWARF2 frame unwind information. */
20602 static void
20603 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20605 int i, j;
20606 rtx par;
20607 rtx dwarf = NULL_RTX;
20608 rtx tmp, reg;
20610 gcc_assert (num_regs && num_regs <= 32);
20612 /* Workaround ARM10 VFPr1 bug. */
20613 if (num_regs == 2 && !arm_arch6)
20615 if (first_reg == 15)
20616 first_reg--;
20618 num_regs++;
20621 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20622 there could be up to 32 D-registers to restore.
20623 If there are more than 16 D-registers, make two recursive calls,
20624 each of which emits one pop_multi instruction. */
20625 if (num_regs > 16)
20627 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20628 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20629 return;
20632 /* The parallel needs to hold num_regs SETs
20633 and one SET for the stack update. */
20634 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20636 /* Increment the stack pointer, based on there being
20637 num_regs 8-byte registers to restore. */
20638 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20639 RTX_FRAME_RELATED_P (tmp) = 1;
20640 XVECEXP (par, 0, 0) = tmp;
20642 /* Now show every reg that will be restored, using a SET for each. */
20643 for (j = 0, i=first_reg; j < num_regs; i += 2)
20645 reg = gen_rtx_REG (DFmode, i);
20647 tmp = gen_rtx_SET (reg,
20648 gen_frame_mem
20649 (DFmode,
20650 plus_constant (Pmode, base_reg, 8 * j)));
20651 RTX_FRAME_RELATED_P (tmp) = 1;
20652 XVECEXP (par, 0, j + 1) = tmp;
20654 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20656 j++;
20659 par = emit_insn (par);
20660 REG_NOTES (par) = dwarf;
20662 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20663 if (REGNO (base_reg) == IP_REGNUM)
20665 RTX_FRAME_RELATED_P (par) = 1;
20666 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20668 else
20669 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20670 base_reg, base_reg);
20673 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20674 number of registers are being popped, multiple LDRD patterns are created for
20675 all register pairs. If odd number of registers are popped, last register is
20676 loaded by using LDR pattern. */
20677 static void
20678 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20680 int num_regs = 0;
20681 int i, j;
20682 rtx par = NULL_RTX;
20683 rtx dwarf = NULL_RTX;
20684 rtx tmp, reg, tmp1;
20685 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20687 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20688 if (saved_regs_mask & (1 << i))
20689 num_regs++;
20691 gcc_assert (num_regs && num_regs <= 16);
20693 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20694 to be popped. So, if num_regs is even, now it will become odd,
20695 and we can generate pop with PC. If num_regs is odd, it will be
20696 even now, and ldr with return can be generated for PC. */
20697 if (return_in_pc)
20698 num_regs--;
20700 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20702 /* Var j iterates over all the registers to gather all the registers in
20703 saved_regs_mask. Var i gives index of saved registers in stack frame.
20704 A PARALLEL RTX of register-pair is created here, so that pattern for
20705 LDRD can be matched. As PC is always last register to be popped, and
20706 we have already decremented num_regs if PC, we don't have to worry
20707 about PC in this loop. */
20708 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20709 if (saved_regs_mask & (1 << j))
20711 /* Create RTX for memory load. */
20712 reg = gen_rtx_REG (SImode, j);
20713 tmp = gen_rtx_SET (reg,
20714 gen_frame_mem (SImode,
20715 plus_constant (Pmode,
20716 stack_pointer_rtx, 4 * i)));
20717 RTX_FRAME_RELATED_P (tmp) = 1;
20719 if (i % 2 == 0)
20721 /* When saved-register index (i) is even, the RTX to be emitted is
20722 yet to be created. Hence create it first. The LDRD pattern we
20723 are generating is :
20724 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20725 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20726 where target registers need not be consecutive. */
20727 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20728 dwarf = NULL_RTX;
20731 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20732 added as 0th element and if i is odd, reg_i is added as 1st element
20733 of LDRD pattern shown above. */
20734 XVECEXP (par, 0, (i % 2)) = tmp;
20735 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20737 if ((i % 2) == 1)
20739 /* When saved-register index (i) is odd, RTXs for both the registers
20740 to be loaded are generated in above given LDRD pattern, and the
20741 pattern can be emitted now. */
20742 par = emit_insn (par);
20743 REG_NOTES (par) = dwarf;
20744 RTX_FRAME_RELATED_P (par) = 1;
20747 i++;
20750 /* If the number of registers pushed is odd AND return_in_pc is false OR
20751 number of registers are even AND return_in_pc is true, last register is
20752 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20753 then LDR with post increment. */
20755 /* Increment the stack pointer, based on there being
20756 num_regs 4-byte registers to restore. */
20757 tmp = gen_rtx_SET (stack_pointer_rtx,
20758 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20759 RTX_FRAME_RELATED_P (tmp) = 1;
20760 tmp = emit_insn (tmp);
20761 if (!return_in_pc)
20763 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20764 stack_pointer_rtx, stack_pointer_rtx);
20767 dwarf = NULL_RTX;
20769 if (((num_regs % 2) == 1 && !return_in_pc)
20770 || ((num_regs % 2) == 0 && return_in_pc))
20772 /* Scan for the single register to be popped. Skip until the saved
20773 register is found. */
20774 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20776 /* Gen LDR with post increment here. */
20777 tmp1 = gen_rtx_MEM (SImode,
20778 gen_rtx_POST_INC (SImode,
20779 stack_pointer_rtx));
20780 set_mem_alias_set (tmp1, get_frame_alias_set ());
20782 reg = gen_rtx_REG (SImode, j);
20783 tmp = gen_rtx_SET (reg, tmp1);
20784 RTX_FRAME_RELATED_P (tmp) = 1;
20785 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20787 if (return_in_pc)
20789 /* If return_in_pc, j must be PC_REGNUM. */
20790 gcc_assert (j == PC_REGNUM);
20791 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20792 XVECEXP (par, 0, 0) = ret_rtx;
20793 XVECEXP (par, 0, 1) = tmp;
20794 par = emit_jump_insn (par);
20796 else
20798 par = emit_insn (tmp);
20799 REG_NOTES (par) = dwarf;
20800 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20801 stack_pointer_rtx, stack_pointer_rtx);
20805 else if ((num_regs % 2) == 1 && return_in_pc)
20807 /* There are 2 registers to be popped. So, generate the pattern
20808 pop_multiple_with_stack_update_and_return to pop in PC. */
20809 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20812 return;
20815 /* LDRD in ARM mode needs consecutive registers as operands. This function
20816 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20817 offset addressing and then generates one separate stack udpate. This provides
20818 more scheduling freedom, compared to writeback on every load. However,
20819 if the function returns using load into PC directly
20820 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20821 before the last load. TODO: Add a peephole optimization to recognize
20822 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20823 peephole optimization to merge the load at stack-offset zero
20824 with the stack update instruction using load with writeback
20825 in post-index addressing mode. */
20826 static void
20827 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20829 int j = 0;
20830 int offset = 0;
20831 rtx par = NULL_RTX;
20832 rtx dwarf = NULL_RTX;
20833 rtx tmp, mem;
20835 /* Restore saved registers. */
20836 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20837 j = 0;
20838 while (j <= LAST_ARM_REGNUM)
20839 if (saved_regs_mask & (1 << j))
20841 if ((j % 2) == 0
20842 && (saved_regs_mask & (1 << (j + 1)))
20843 && (j + 1) != PC_REGNUM)
20845 /* Current register and next register form register pair for which
20846 LDRD can be generated. PC is always the last register popped, and
20847 we handle it separately. */
20848 if (offset > 0)
20849 mem = gen_frame_mem (DImode,
20850 plus_constant (Pmode,
20851 stack_pointer_rtx,
20852 offset));
20853 else
20854 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20856 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20857 tmp = emit_insn (tmp);
20858 RTX_FRAME_RELATED_P (tmp) = 1;
20860 /* Generate dwarf info. */
20862 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20863 gen_rtx_REG (SImode, j),
20864 NULL_RTX);
20865 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20866 gen_rtx_REG (SImode, j + 1),
20867 dwarf);
20869 REG_NOTES (tmp) = dwarf;
20871 offset += 8;
20872 j += 2;
20874 else if (j != PC_REGNUM)
20876 /* Emit a single word load. */
20877 if (offset > 0)
20878 mem = gen_frame_mem (SImode,
20879 plus_constant (Pmode,
20880 stack_pointer_rtx,
20881 offset));
20882 else
20883 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20885 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20886 tmp = emit_insn (tmp);
20887 RTX_FRAME_RELATED_P (tmp) = 1;
20889 /* Generate dwarf info. */
20890 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20891 gen_rtx_REG (SImode, j),
20892 NULL_RTX);
20894 offset += 4;
20895 j += 1;
20897 else /* j == PC_REGNUM */
20898 j++;
20900 else
20901 j++;
20903 /* Update the stack. */
20904 if (offset > 0)
20906 tmp = gen_rtx_SET (stack_pointer_rtx,
20907 plus_constant (Pmode,
20908 stack_pointer_rtx,
20909 offset));
20910 tmp = emit_insn (tmp);
20911 arm_add_cfa_adjust_cfa_note (tmp, offset,
20912 stack_pointer_rtx, stack_pointer_rtx);
20913 offset = 0;
20916 if (saved_regs_mask & (1 << PC_REGNUM))
20918 /* Only PC is to be popped. */
20919 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20920 XVECEXP (par, 0, 0) = ret_rtx;
20921 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20922 gen_frame_mem (SImode,
20923 gen_rtx_POST_INC (SImode,
20924 stack_pointer_rtx)));
20925 RTX_FRAME_RELATED_P (tmp) = 1;
20926 XVECEXP (par, 0, 1) = tmp;
20927 par = emit_jump_insn (par);
20929 /* Generate dwarf info. */
20930 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20931 gen_rtx_REG (SImode, PC_REGNUM),
20932 NULL_RTX);
20933 REG_NOTES (par) = dwarf;
20934 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20935 stack_pointer_rtx, stack_pointer_rtx);
20939 /* Calculate the size of the return value that is passed in registers. */
20940 static unsigned
20941 arm_size_return_regs (void)
20943 machine_mode mode;
20945 if (crtl->return_rtx != 0)
20946 mode = GET_MODE (crtl->return_rtx);
20947 else
20948 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20950 return GET_MODE_SIZE (mode);
20953 /* Return true if the current function needs to save/restore LR. */
20954 static bool
20955 thumb_force_lr_save (void)
20957 return !cfun->machine->lr_save_eliminated
20958 && (!crtl->is_leaf
20959 || thumb_far_jump_used_p ()
20960 || df_regs_ever_live_p (LR_REGNUM));
20963 /* We do not know if r3 will be available because
20964 we do have an indirect tailcall happening in this
20965 particular case. */
20966 static bool
20967 is_indirect_tailcall_p (rtx call)
20969 rtx pat = PATTERN (call);
20971 /* Indirect tail call. */
20972 pat = XVECEXP (pat, 0, 0);
20973 if (GET_CODE (pat) == SET)
20974 pat = SET_SRC (pat);
20976 pat = XEXP (XEXP (pat, 0), 0);
20977 return REG_P (pat);
20980 /* Return true if r3 is used by any of the tail call insns in the
20981 current function. */
20982 static bool
20983 any_sibcall_could_use_r3 (void)
20985 edge_iterator ei;
20986 edge e;
20988 if (!crtl->tail_call_emit)
20989 return false;
20990 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20991 if (e->flags & EDGE_SIBCALL)
20993 rtx_insn *call = BB_END (e->src);
20994 if (!CALL_P (call))
20995 call = prev_nonnote_nondebug_insn (call);
20996 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20997 if (find_regno_fusage (call, USE, 3)
20998 || is_indirect_tailcall_p (call))
20999 return true;
21001 return false;
21005 /* Compute the distance from register FROM to register TO.
21006 These can be the arg pointer (26), the soft frame pointer (25),
21007 the stack pointer (13) or the hard frame pointer (11).
21008 In thumb mode r7 is used as the soft frame pointer, if needed.
21009 Typical stack layout looks like this:
21011 old stack pointer -> | |
21012 ----
21013 | | \
21014 | | saved arguments for
21015 | | vararg functions
21016 | | /
21018 hard FP & arg pointer -> | | \
21019 | | stack
21020 | | frame
21021 | | /
21023 | | \
21024 | | call saved
21025 | | registers
21026 soft frame pointer -> | | /
21028 | | \
21029 | | local
21030 | | variables
21031 locals base pointer -> | | /
21033 | | \
21034 | | outgoing
21035 | | arguments
21036 current stack pointer -> | | /
21039 For a given function some or all of these stack components
21040 may not be needed, giving rise to the possibility of
21041 eliminating some of the registers.
21043 The values returned by this function must reflect the behavior
21044 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21046 The sign of the number returned reflects the direction of stack
21047 growth, so the values are positive for all eliminations except
21048 from the soft frame pointer to the hard frame pointer.
21050 SFP may point just inside the local variables block to ensure correct
21051 alignment. */
21054 /* Return cached stack offsets. */
21056 static arm_stack_offsets *
21057 arm_get_frame_offsets (void)
21059 struct arm_stack_offsets *offsets;
21061 offsets = &cfun->machine->stack_offsets;
21063 return offsets;
21067 /* Calculate stack offsets. These are used to calculate register elimination
21068 offsets and in prologue/epilogue code. Also calculates which registers
21069 should be saved. */
21071 static void
21072 arm_compute_frame_layout (void)
21074 struct arm_stack_offsets *offsets;
21075 unsigned long func_type;
21076 int saved;
21077 int core_saved;
21078 HOST_WIDE_INT frame_size;
21079 int i;
21081 offsets = &cfun->machine->stack_offsets;
21083 /* Initially this is the size of the local variables. It will translated
21084 into an offset once we have determined the size of preceding data. */
21085 frame_size = ROUND_UP_WORD (get_frame_size ());
21087 /* Space for variadic functions. */
21088 offsets->saved_args = crtl->args.pretend_args_size;
21090 /* In Thumb mode this is incorrect, but never used. */
21091 offsets->frame
21092 = (offsets->saved_args
21093 + arm_compute_static_chain_stack_bytes ()
21094 + (frame_pointer_needed ? 4 : 0));
21096 if (TARGET_32BIT)
21098 unsigned int regno;
21100 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21101 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21102 saved = core_saved;
21104 /* We know that SP will be doubleword aligned on entry, and we must
21105 preserve that condition at any subroutine call. We also require the
21106 soft frame pointer to be doubleword aligned. */
21108 if (TARGET_REALLY_IWMMXT)
21110 /* Check for the call-saved iWMMXt registers. */
21111 for (regno = FIRST_IWMMXT_REGNUM;
21112 regno <= LAST_IWMMXT_REGNUM;
21113 regno++)
21114 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21115 saved += 8;
21118 func_type = arm_current_func_type ();
21119 /* Space for saved VFP registers. */
21120 if (! IS_VOLATILE (func_type)
21121 && TARGET_HARD_FLOAT)
21122 saved += arm_get_vfp_saved_size ();
21124 else /* TARGET_THUMB1 */
21126 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21127 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21128 saved = core_saved;
21129 if (TARGET_BACKTRACE)
21130 saved += 16;
21133 /* Saved registers include the stack frame. */
21134 offsets->saved_regs
21135 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21136 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21138 /* A leaf function does not need any stack alignment if it has nothing
21139 on the stack. */
21140 if (crtl->is_leaf && frame_size == 0
21141 /* However if it calls alloca(), we have a dynamically allocated
21142 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21143 && ! cfun->calls_alloca)
21145 offsets->outgoing_args = offsets->soft_frame;
21146 offsets->locals_base = offsets->soft_frame;
21147 return;
21150 /* Ensure SFP has the correct alignment. */
21151 if (ARM_DOUBLEWORD_ALIGN
21152 && (offsets->soft_frame & 7))
21154 offsets->soft_frame += 4;
21155 /* Try to align stack by pushing an extra reg. Don't bother doing this
21156 when there is a stack frame as the alignment will be rolled into
21157 the normal stack adjustment. */
21158 if (frame_size + crtl->outgoing_args_size == 0)
21160 int reg = -1;
21162 /* Register r3 is caller-saved. Normally it does not need to be
21163 saved on entry by the prologue. However if we choose to save
21164 it for padding then we may confuse the compiler into thinking
21165 a prologue sequence is required when in fact it is not. This
21166 will occur when shrink-wrapping if r3 is used as a scratch
21167 register and there are no other callee-saved writes.
21169 This situation can be avoided when other callee-saved registers
21170 are available and r3 is not mandatory if we choose a callee-saved
21171 register for padding. */
21172 bool prefer_callee_reg_p = false;
21174 /* If it is safe to use r3, then do so. This sometimes
21175 generates better code on Thumb-2 by avoiding the need to
21176 use 32-bit push/pop instructions. */
21177 if (! any_sibcall_could_use_r3 ()
21178 && arm_size_return_regs () <= 12
21179 && (offsets->saved_regs_mask & (1 << 3)) == 0
21180 && (TARGET_THUMB2
21181 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21183 reg = 3;
21184 if (!TARGET_THUMB2)
21185 prefer_callee_reg_p = true;
21187 if (reg == -1
21188 || prefer_callee_reg_p)
21190 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21192 /* Avoid fixed registers; they may be changed at
21193 arbitrary times so it's unsafe to restore them
21194 during the epilogue. */
21195 if (!fixed_regs[i]
21196 && (offsets->saved_regs_mask & (1 << i)) == 0)
21198 reg = i;
21199 break;
21204 if (reg != -1)
21206 offsets->saved_regs += 4;
21207 offsets->saved_regs_mask |= (1 << reg);
21212 offsets->locals_base = offsets->soft_frame + frame_size;
21213 offsets->outgoing_args = (offsets->locals_base
21214 + crtl->outgoing_args_size);
21216 if (ARM_DOUBLEWORD_ALIGN)
21218 /* Ensure SP remains doubleword aligned. */
21219 if (offsets->outgoing_args & 7)
21220 offsets->outgoing_args += 4;
21221 gcc_assert (!(offsets->outgoing_args & 7));
21226 /* Calculate the relative offsets for the different stack pointers. Positive
21227 offsets are in the direction of stack growth. */
21229 HOST_WIDE_INT
21230 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21232 arm_stack_offsets *offsets;
21234 offsets = arm_get_frame_offsets ();
21236 /* OK, now we have enough information to compute the distances.
21237 There must be an entry in these switch tables for each pair
21238 of registers in ELIMINABLE_REGS, even if some of the entries
21239 seem to be redundant or useless. */
21240 switch (from)
21242 case ARG_POINTER_REGNUM:
21243 switch (to)
21245 case THUMB_HARD_FRAME_POINTER_REGNUM:
21246 return 0;
21248 case FRAME_POINTER_REGNUM:
21249 /* This is the reverse of the soft frame pointer
21250 to hard frame pointer elimination below. */
21251 return offsets->soft_frame - offsets->saved_args;
21253 case ARM_HARD_FRAME_POINTER_REGNUM:
21254 /* This is only non-zero in the case where the static chain register
21255 is stored above the frame. */
21256 return offsets->frame - offsets->saved_args - 4;
21258 case STACK_POINTER_REGNUM:
21259 /* If nothing has been pushed on the stack at all
21260 then this will return -4. This *is* correct! */
21261 return offsets->outgoing_args - (offsets->saved_args + 4);
21263 default:
21264 gcc_unreachable ();
21266 gcc_unreachable ();
21268 case FRAME_POINTER_REGNUM:
21269 switch (to)
21271 case THUMB_HARD_FRAME_POINTER_REGNUM:
21272 return 0;
21274 case ARM_HARD_FRAME_POINTER_REGNUM:
21275 /* The hard frame pointer points to the top entry in the
21276 stack frame. The soft frame pointer to the bottom entry
21277 in the stack frame. If there is no stack frame at all,
21278 then they are identical. */
21280 return offsets->frame - offsets->soft_frame;
21282 case STACK_POINTER_REGNUM:
21283 return offsets->outgoing_args - offsets->soft_frame;
21285 default:
21286 gcc_unreachable ();
21288 gcc_unreachable ();
21290 default:
21291 /* You cannot eliminate from the stack pointer.
21292 In theory you could eliminate from the hard frame
21293 pointer to the stack pointer, but this will never
21294 happen, since if a stack frame is not needed the
21295 hard frame pointer will never be used. */
21296 gcc_unreachable ();
21300 /* Given FROM and TO register numbers, say whether this elimination is
21301 allowed. Frame pointer elimination is automatically handled.
21303 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21304 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21305 pointer, we must eliminate FRAME_POINTER_REGNUM into
21306 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21307 ARG_POINTER_REGNUM. */
21309 bool
21310 arm_can_eliminate (const int from, const int to)
21312 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21313 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21314 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21315 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21316 true);
21319 /* Emit RTL to save coprocessor registers on function entry. Returns the
21320 number of bytes pushed. */
21322 static int
21323 arm_save_coproc_regs(void)
21325 int saved_size = 0;
21326 unsigned reg;
21327 unsigned start_reg;
21328 rtx insn;
21330 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21331 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21333 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21334 insn = gen_rtx_MEM (V2SImode, insn);
21335 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21336 RTX_FRAME_RELATED_P (insn) = 1;
21337 saved_size += 8;
21340 if (TARGET_HARD_FLOAT)
21342 start_reg = FIRST_VFP_REGNUM;
21344 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21346 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21347 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21349 if (start_reg != reg)
21350 saved_size += vfp_emit_fstmd (start_reg,
21351 (reg - start_reg) / 2);
21352 start_reg = reg + 2;
21355 if (start_reg != reg)
21356 saved_size += vfp_emit_fstmd (start_reg,
21357 (reg - start_reg) / 2);
21359 return saved_size;
21363 /* Set the Thumb frame pointer from the stack pointer. */
21365 static void
21366 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21368 HOST_WIDE_INT amount;
21369 rtx insn, dwarf;
21371 amount = offsets->outgoing_args - offsets->locals_base;
21372 if (amount < 1024)
21373 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21374 stack_pointer_rtx, GEN_INT (amount)));
21375 else
21377 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21378 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21379 expects the first two operands to be the same. */
21380 if (TARGET_THUMB2)
21382 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21383 stack_pointer_rtx,
21384 hard_frame_pointer_rtx));
21386 else
21388 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21389 hard_frame_pointer_rtx,
21390 stack_pointer_rtx));
21392 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21393 plus_constant (Pmode, stack_pointer_rtx, amount));
21394 RTX_FRAME_RELATED_P (dwarf) = 1;
21395 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21398 RTX_FRAME_RELATED_P (insn) = 1;
21401 struct scratch_reg {
21402 rtx reg;
21403 bool saved;
21406 /* Return a short-lived scratch register for use as a 2nd scratch register on
21407 function entry after the registers are saved in the prologue. This register
21408 must be released by means of release_scratch_register_on_entry. IP is not
21409 considered since it is always used as the 1st scratch register if available.
21411 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21412 mask of live registers. */
21414 static void
21415 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21416 unsigned long live_regs)
21418 int regno = -1;
21420 sr->saved = false;
21422 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21423 regno = LR_REGNUM;
21424 else
21426 unsigned int i;
21428 for (i = 4; i < 11; i++)
21429 if (regno1 != i && (live_regs & (1 << i)) != 0)
21431 regno = i;
21432 break;
21435 if (regno < 0)
21437 /* If IP is used as the 1st scratch register for a nested function,
21438 then either r3 wasn't available or is used to preserve IP. */
21439 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21440 regno1 = 3;
21441 regno = (regno1 == 3 ? 2 : 3);
21442 sr->saved
21443 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21444 regno);
21448 sr->reg = gen_rtx_REG (SImode, regno);
21449 if (sr->saved)
21451 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21452 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21453 rtx x = gen_rtx_SET (stack_pointer_rtx,
21454 plus_constant (Pmode, stack_pointer_rtx, -4));
21455 RTX_FRAME_RELATED_P (insn) = 1;
21456 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21460 /* Release a scratch register obtained from the preceding function. */
21462 static void
21463 release_scratch_register_on_entry (struct scratch_reg *sr)
21465 if (sr->saved)
21467 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21468 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21469 rtx x = gen_rtx_SET (stack_pointer_rtx,
21470 plus_constant (Pmode, stack_pointer_rtx, 4));
21471 RTX_FRAME_RELATED_P (insn) = 1;
21472 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21476 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21478 #if PROBE_INTERVAL > 4096
21479 #error Cannot use indexed addressing mode for stack probing
21480 #endif
21482 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21483 inclusive. These are offsets from the current stack pointer. REGNO1
21484 is the index number of the 1st scratch register and LIVE_REGS is the
21485 mask of live registers. */
21487 static void
21488 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21489 unsigned int regno1, unsigned long live_regs)
21491 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21493 /* See if we have a constant small number of probes to generate. If so,
21494 that's the easy case. */
21495 if (size <= PROBE_INTERVAL)
21497 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21498 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21499 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21502 /* The run-time loop is made up of 10 insns in the generic case while the
21503 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21504 else if (size <= 5 * PROBE_INTERVAL)
21506 HOST_WIDE_INT i, rem;
21508 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21509 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21510 emit_stack_probe (reg1);
21512 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21513 it exceeds SIZE. If only two probes are needed, this will not
21514 generate any code. Then probe at FIRST + SIZE. */
21515 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21517 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21518 emit_stack_probe (reg1);
21521 rem = size - (i - PROBE_INTERVAL);
21522 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21524 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21525 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21527 else
21528 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21531 /* Otherwise, do the same as above, but in a loop. Note that we must be
21532 extra careful with variables wrapping around because we might be at
21533 the very top (or the very bottom) of the address space and we have
21534 to be able to handle this case properly; in particular, we use an
21535 equality test for the loop condition. */
21536 else
21538 HOST_WIDE_INT rounded_size;
21539 struct scratch_reg sr;
21541 get_scratch_register_on_entry (&sr, regno1, live_regs);
21543 emit_move_insn (reg1, GEN_INT (first));
21546 /* Step 1: round SIZE to the previous multiple of the interval. */
21548 rounded_size = size & -PROBE_INTERVAL;
21549 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21552 /* Step 2: compute initial and final value of the loop counter. */
21554 /* TEST_ADDR = SP + FIRST. */
21555 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21557 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21558 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21561 /* Step 3: the loop
21565 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21566 probe at TEST_ADDR
21568 while (TEST_ADDR != LAST_ADDR)
21570 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21571 until it is equal to ROUNDED_SIZE. */
21573 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21576 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21577 that SIZE is equal to ROUNDED_SIZE. */
21579 if (size != rounded_size)
21581 HOST_WIDE_INT rem = size - rounded_size;
21583 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21585 emit_set_insn (sr.reg,
21586 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21587 emit_stack_probe (plus_constant (Pmode, sr.reg,
21588 PROBE_INTERVAL - rem));
21590 else
21591 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21594 release_scratch_register_on_entry (&sr);
21597 /* Make sure nothing is scheduled before we are done. */
21598 emit_insn (gen_blockage ());
21601 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21602 absolute addresses. */
21604 const char *
21605 output_probe_stack_range (rtx reg1, rtx reg2)
21607 static int labelno = 0;
21608 char loop_lab[32];
21609 rtx xops[2];
21611 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21613 /* Loop. */
21614 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21616 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21617 xops[0] = reg1;
21618 xops[1] = GEN_INT (PROBE_INTERVAL);
21619 output_asm_insn ("sub\t%0, %0, %1", xops);
21621 /* Probe at TEST_ADDR. */
21622 output_asm_insn ("str\tr0, [%0, #0]", xops);
21624 /* Test if TEST_ADDR == LAST_ADDR. */
21625 xops[1] = reg2;
21626 output_asm_insn ("cmp\t%0, %1", xops);
21628 /* Branch. */
21629 fputs ("\tbne\t", asm_out_file);
21630 assemble_name_raw (asm_out_file, loop_lab);
21631 fputc ('\n', asm_out_file);
21633 return "";
21636 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21637 function. */
21638 void
21639 arm_expand_prologue (void)
21641 rtx amount;
21642 rtx insn;
21643 rtx ip_rtx;
21644 unsigned long live_regs_mask;
21645 unsigned long func_type;
21646 int fp_offset = 0;
21647 int saved_pretend_args = 0;
21648 int saved_regs = 0;
21649 unsigned HOST_WIDE_INT args_to_push;
21650 HOST_WIDE_INT size;
21651 arm_stack_offsets *offsets;
21652 bool clobber_ip;
21654 func_type = arm_current_func_type ();
21656 /* Naked functions don't have prologues. */
21657 if (IS_NAKED (func_type))
21659 if (flag_stack_usage_info)
21660 current_function_static_stack_size = 0;
21661 return;
21664 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21665 args_to_push = crtl->args.pretend_args_size;
21667 /* Compute which register we will have to save onto the stack. */
21668 offsets = arm_get_frame_offsets ();
21669 live_regs_mask = offsets->saved_regs_mask;
21671 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21673 if (IS_STACKALIGN (func_type))
21675 rtx r0, r1;
21677 /* Handle a word-aligned stack pointer. We generate the following:
21679 mov r0, sp
21680 bic r1, r0, #7
21681 mov sp, r1
21682 <save and restore r0 in normal prologue/epilogue>
21683 mov sp, r0
21684 bx lr
21686 The unwinder doesn't need to know about the stack realignment.
21687 Just tell it we saved SP in r0. */
21688 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21690 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21691 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21693 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21694 RTX_FRAME_RELATED_P (insn) = 1;
21695 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21697 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21699 /* ??? The CFA changes here, which may cause GDB to conclude that it
21700 has entered a different function. That said, the unwind info is
21701 correct, individually, before and after this instruction because
21702 we've described the save of SP, which will override the default
21703 handling of SP as restoring from the CFA. */
21704 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21707 /* Let's compute the static_chain_stack_bytes required and store it. Right
21708 now the value must be -1 as stored by arm_init_machine_status (). */
21709 cfun->machine->static_chain_stack_bytes
21710 = arm_compute_static_chain_stack_bytes ();
21712 /* The static chain register is the same as the IP register. If it is
21713 clobbered when creating the frame, we need to save and restore it. */
21714 clobber_ip = IS_NESTED (func_type)
21715 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21716 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21717 || flag_stack_clash_protection)
21718 && !df_regs_ever_live_p (LR_REGNUM)
21719 && arm_r3_live_at_start_p ()));
21721 /* Find somewhere to store IP whilst the frame is being created.
21722 We try the following places in order:
21724 1. The last argument register r3 if it is available.
21725 2. A slot on the stack above the frame if there are no
21726 arguments to push onto the stack.
21727 3. Register r3 again, after pushing the argument registers
21728 onto the stack, if this is a varargs function.
21729 4. The last slot on the stack created for the arguments to
21730 push, if this isn't a varargs function.
21732 Note - we only need to tell the dwarf2 backend about the SP
21733 adjustment in the second variant; the static chain register
21734 doesn't need to be unwound, as it doesn't contain a value
21735 inherited from the caller. */
21736 if (clobber_ip)
21738 if (!arm_r3_live_at_start_p ())
21739 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21740 else if (args_to_push == 0)
21742 rtx addr, dwarf;
21744 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21745 saved_regs += 4;
21747 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21748 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21749 fp_offset = 4;
21751 /* Just tell the dwarf backend that we adjusted SP. */
21752 dwarf = gen_rtx_SET (stack_pointer_rtx,
21753 plus_constant (Pmode, stack_pointer_rtx,
21754 -fp_offset));
21755 RTX_FRAME_RELATED_P (insn) = 1;
21756 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21758 else
21760 /* Store the args on the stack. */
21761 if (cfun->machine->uses_anonymous_args)
21763 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21764 (0xf0 >> (args_to_push / 4)) & 0xf);
21765 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21766 saved_pretend_args = 1;
21768 else
21770 rtx addr, dwarf;
21772 if (args_to_push == 4)
21773 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21774 else
21775 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21776 plus_constant (Pmode,
21777 stack_pointer_rtx,
21778 -args_to_push));
21780 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21782 /* Just tell the dwarf backend that we adjusted SP. */
21783 dwarf = gen_rtx_SET (stack_pointer_rtx,
21784 plus_constant (Pmode, stack_pointer_rtx,
21785 -args_to_push));
21786 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21789 RTX_FRAME_RELATED_P (insn) = 1;
21790 fp_offset = args_to_push;
21791 args_to_push = 0;
21795 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21797 if (IS_INTERRUPT (func_type))
21799 /* Interrupt functions must not corrupt any registers.
21800 Creating a frame pointer however, corrupts the IP
21801 register, so we must push it first. */
21802 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21804 /* Do not set RTX_FRAME_RELATED_P on this insn.
21805 The dwarf stack unwinding code only wants to see one
21806 stack decrement per function, and this is not it. If
21807 this instruction is labeled as being part of the frame
21808 creation sequence then dwarf2out_frame_debug_expr will
21809 die when it encounters the assignment of IP to FP
21810 later on, since the use of SP here establishes SP as
21811 the CFA register and not IP.
21813 Anyway this instruction is not really part of the stack
21814 frame creation although it is part of the prologue. */
21817 insn = emit_set_insn (ip_rtx,
21818 plus_constant (Pmode, stack_pointer_rtx,
21819 fp_offset));
21820 RTX_FRAME_RELATED_P (insn) = 1;
21823 if (args_to_push)
21825 /* Push the argument registers, or reserve space for them. */
21826 if (cfun->machine->uses_anonymous_args)
21827 insn = emit_multi_reg_push
21828 ((0xf0 >> (args_to_push / 4)) & 0xf,
21829 (0xf0 >> (args_to_push / 4)) & 0xf);
21830 else
21831 insn = emit_insn
21832 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21833 GEN_INT (- args_to_push)));
21834 RTX_FRAME_RELATED_P (insn) = 1;
21837 /* If this is an interrupt service routine, and the link register
21838 is going to be pushed, and we're not generating extra
21839 push of IP (needed when frame is needed and frame layout if apcs),
21840 subtracting four from LR now will mean that the function return
21841 can be done with a single instruction. */
21842 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21843 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21844 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21845 && TARGET_ARM)
21847 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21849 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21852 if (live_regs_mask)
21854 unsigned long dwarf_regs_mask = live_regs_mask;
21856 saved_regs += bit_count (live_regs_mask) * 4;
21857 if (optimize_size && !frame_pointer_needed
21858 && saved_regs == offsets->saved_regs - offsets->saved_args)
21860 /* If no coprocessor registers are being pushed and we don't have
21861 to worry about a frame pointer then push extra registers to
21862 create the stack frame. This is done in a way that does not
21863 alter the frame layout, so is independent of the epilogue. */
21864 int n;
21865 int frame;
21866 n = 0;
21867 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21868 n++;
21869 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21870 if (frame && n * 4 >= frame)
21872 n = frame / 4;
21873 live_regs_mask |= (1 << n) - 1;
21874 saved_regs += frame;
21878 if (TARGET_LDRD
21879 && current_tune->prefer_ldrd_strd
21880 && !optimize_function_for_size_p (cfun))
21882 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21883 if (TARGET_THUMB2)
21884 thumb2_emit_strd_push (live_regs_mask);
21885 else if (TARGET_ARM
21886 && !TARGET_APCS_FRAME
21887 && !IS_INTERRUPT (func_type))
21888 arm_emit_strd_push (live_regs_mask);
21889 else
21891 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21892 RTX_FRAME_RELATED_P (insn) = 1;
21895 else
21897 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21898 RTX_FRAME_RELATED_P (insn) = 1;
21902 if (! IS_VOLATILE (func_type))
21903 saved_regs += arm_save_coproc_regs ();
21905 if (frame_pointer_needed && TARGET_ARM)
21907 /* Create the new frame pointer. */
21908 if (TARGET_APCS_FRAME)
21910 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21911 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21912 RTX_FRAME_RELATED_P (insn) = 1;
21914 else
21916 insn = GEN_INT (saved_regs - (4 + fp_offset));
21917 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21918 stack_pointer_rtx, insn));
21919 RTX_FRAME_RELATED_P (insn) = 1;
21923 size = offsets->outgoing_args - offsets->saved_args;
21924 if (flag_stack_usage_info)
21925 current_function_static_stack_size = size;
21927 /* If this isn't an interrupt service routine and we have a frame, then do
21928 stack checking. We use IP as the first scratch register, except for the
21929 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21930 if (!IS_INTERRUPT (func_type)
21931 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21932 || flag_stack_clash_protection))
21934 unsigned int regno;
21936 if (!IS_NESTED (func_type) || clobber_ip)
21937 regno = IP_REGNUM;
21938 else if (df_regs_ever_live_p (LR_REGNUM))
21939 regno = LR_REGNUM;
21940 else
21941 regno = 3;
21943 if (crtl->is_leaf && !cfun->calls_alloca)
21945 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21946 arm_emit_probe_stack_range (get_stack_check_protect (),
21947 size - get_stack_check_protect (),
21948 regno, live_regs_mask);
21950 else if (size > 0)
21951 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21952 regno, live_regs_mask);
21955 /* Recover the static chain register. */
21956 if (clobber_ip)
21958 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21959 insn = gen_rtx_REG (SImode, 3);
21960 else
21962 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21963 insn = gen_frame_mem (SImode, insn);
21965 emit_set_insn (ip_rtx, insn);
21966 emit_insn (gen_force_register_use (ip_rtx));
21969 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21971 /* This add can produce multiple insns for a large constant, so we
21972 need to get tricky. */
21973 rtx_insn *last = get_last_insn ();
21975 amount = GEN_INT (offsets->saved_args + saved_regs
21976 - offsets->outgoing_args);
21978 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21979 amount));
21982 last = last ? NEXT_INSN (last) : get_insns ();
21983 RTX_FRAME_RELATED_P (last) = 1;
21985 while (last != insn);
21987 /* If the frame pointer is needed, emit a special barrier that
21988 will prevent the scheduler from moving stores to the frame
21989 before the stack adjustment. */
21990 if (frame_pointer_needed)
21991 emit_insn (gen_stack_tie (stack_pointer_rtx,
21992 hard_frame_pointer_rtx));
21996 if (frame_pointer_needed && TARGET_THUMB2)
21997 thumb_set_frame_pointer (offsets);
21999 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22001 unsigned long mask;
22003 mask = live_regs_mask;
22004 mask &= THUMB2_WORK_REGS;
22005 if (!IS_NESTED (func_type))
22006 mask |= (1 << IP_REGNUM);
22007 arm_load_pic_register (mask);
22010 /* If we are profiling, make sure no instructions are scheduled before
22011 the call to mcount. Similarly if the user has requested no
22012 scheduling in the prolog. Similarly if we want non-call exceptions
22013 using the EABI unwinder, to prevent faulting instructions from being
22014 swapped with a stack adjustment. */
22015 if (crtl->profile || !TARGET_SCHED_PROLOG
22016 || (arm_except_unwind_info (&global_options) == UI_TARGET
22017 && cfun->can_throw_non_call_exceptions))
22018 emit_insn (gen_blockage ());
22020 /* If the link register is being kept alive, with the return address in it,
22021 then make sure that it does not get reused by the ce2 pass. */
22022 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22023 cfun->machine->lr_save_eliminated = 1;
22026 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22027 static void
22028 arm_print_condition (FILE *stream)
22030 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22032 /* Branch conversion is not implemented for Thumb-2. */
22033 if (TARGET_THUMB)
22035 output_operand_lossage ("predicated Thumb instruction");
22036 return;
22038 if (current_insn_predicate != NULL)
22040 output_operand_lossage
22041 ("predicated instruction in conditional sequence");
22042 return;
22045 fputs (arm_condition_codes[arm_current_cc], stream);
22047 else if (current_insn_predicate)
22049 enum arm_cond_code code;
22051 if (TARGET_THUMB1)
22053 output_operand_lossage ("predicated Thumb instruction");
22054 return;
22057 code = get_arm_condition_code (current_insn_predicate);
22058 fputs (arm_condition_codes[code], stream);
22063 /* Globally reserved letters: acln
22064 Puncutation letters currently used: @_|?().!#
22065 Lower case letters currently used: bcdefhimpqtvwxyz
22066 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22067 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22069 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22071 If CODE is 'd', then the X is a condition operand and the instruction
22072 should only be executed if the condition is true.
22073 if CODE is 'D', then the X is a condition operand and the instruction
22074 should only be executed if the condition is false: however, if the mode
22075 of the comparison is CCFPEmode, then always execute the instruction -- we
22076 do this because in these circumstances !GE does not necessarily imply LT;
22077 in these cases the instruction pattern will take care to make sure that
22078 an instruction containing %d will follow, thereby undoing the effects of
22079 doing this instruction unconditionally.
22080 If CODE is 'N' then X is a floating point operand that must be negated
22081 before output.
22082 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22083 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22084 static void
22085 arm_print_operand (FILE *stream, rtx x, int code)
22087 switch (code)
22089 case '@':
22090 fputs (ASM_COMMENT_START, stream);
22091 return;
22093 case '_':
22094 fputs (user_label_prefix, stream);
22095 return;
22097 case '|':
22098 fputs (REGISTER_PREFIX, stream);
22099 return;
22101 case '?':
22102 arm_print_condition (stream);
22103 return;
22105 case '.':
22106 /* The current condition code for a condition code setting instruction.
22107 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22108 fputc('s', stream);
22109 arm_print_condition (stream);
22110 return;
22112 case '!':
22113 /* If the instruction is conditionally executed then print
22114 the current condition code, otherwise print 's'. */
22115 gcc_assert (TARGET_THUMB2);
22116 if (current_insn_predicate)
22117 arm_print_condition (stream);
22118 else
22119 fputc('s', stream);
22120 break;
22122 /* %# is a "break" sequence. It doesn't output anything, but is used to
22123 separate e.g. operand numbers from following text, if that text consists
22124 of further digits which we don't want to be part of the operand
22125 number. */
22126 case '#':
22127 return;
22129 case 'N':
22131 REAL_VALUE_TYPE r;
22132 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22133 fprintf (stream, "%s", fp_const_from_val (&r));
22135 return;
22137 /* An integer or symbol address without a preceding # sign. */
22138 case 'c':
22139 switch (GET_CODE (x))
22141 case CONST_INT:
22142 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22143 break;
22145 case SYMBOL_REF:
22146 output_addr_const (stream, x);
22147 break;
22149 case CONST:
22150 if (GET_CODE (XEXP (x, 0)) == PLUS
22151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22153 output_addr_const (stream, x);
22154 break;
22156 /* Fall through. */
22158 default:
22159 output_operand_lossage ("Unsupported operand for code '%c'", code);
22161 return;
22163 /* An integer that we want to print in HEX. */
22164 case 'x':
22165 switch (GET_CODE (x))
22167 case CONST_INT:
22168 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22169 break;
22171 default:
22172 output_operand_lossage ("Unsupported operand for code '%c'", code);
22174 return;
22176 case 'B':
22177 if (CONST_INT_P (x))
22179 HOST_WIDE_INT val;
22180 val = ARM_SIGN_EXTEND (~INTVAL (x));
22181 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22183 else
22185 putc ('~', stream);
22186 output_addr_const (stream, x);
22188 return;
22190 case 'b':
22191 /* Print the log2 of a CONST_INT. */
22193 HOST_WIDE_INT val;
22195 if (!CONST_INT_P (x)
22196 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22197 output_operand_lossage ("Unsupported operand for code '%c'", code);
22198 else
22199 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22201 return;
22203 case 'L':
22204 /* The low 16 bits of an immediate constant. */
22205 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22206 return;
22208 case 'i':
22209 fprintf (stream, "%s", arithmetic_instr (x, 1));
22210 return;
22212 case 'I':
22213 fprintf (stream, "%s", arithmetic_instr (x, 0));
22214 return;
22216 case 'S':
22218 HOST_WIDE_INT val;
22219 const char *shift;
22221 shift = shift_op (x, &val);
22223 if (shift)
22225 fprintf (stream, ", %s ", shift);
22226 if (val == -1)
22227 arm_print_operand (stream, XEXP (x, 1), 0);
22228 else
22229 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22232 return;
22234 /* An explanation of the 'Q', 'R' and 'H' register operands:
22236 In a pair of registers containing a DI or DF value the 'Q'
22237 operand returns the register number of the register containing
22238 the least significant part of the value. The 'R' operand returns
22239 the register number of the register containing the most
22240 significant part of the value.
22242 The 'H' operand returns the higher of the two register numbers.
22243 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22244 same as the 'Q' operand, since the most significant part of the
22245 value is held in the lower number register. The reverse is true
22246 on systems where WORDS_BIG_ENDIAN is false.
22248 The purpose of these operands is to distinguish between cases
22249 where the endian-ness of the values is important (for example
22250 when they are added together), and cases where the endian-ness
22251 is irrelevant, but the order of register operations is important.
22252 For example when loading a value from memory into a register
22253 pair, the endian-ness does not matter. Provided that the value
22254 from the lower memory address is put into the lower numbered
22255 register, and the value from the higher address is put into the
22256 higher numbered register, the load will work regardless of whether
22257 the value being loaded is big-wordian or little-wordian. The
22258 order of the two register loads can matter however, if the address
22259 of the memory location is actually held in one of the registers
22260 being overwritten by the load.
22262 The 'Q' and 'R' constraints are also available for 64-bit
22263 constants. */
22264 case 'Q':
22265 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22267 rtx part = gen_lowpart (SImode, x);
22268 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22269 return;
22272 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22274 output_operand_lossage ("invalid operand for code '%c'", code);
22275 return;
22278 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22279 return;
22281 case 'R':
22282 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22284 machine_mode mode = GET_MODE (x);
22285 rtx part;
22287 if (mode == VOIDmode)
22288 mode = DImode;
22289 part = gen_highpart_mode (SImode, mode, x);
22290 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22291 return;
22294 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22296 output_operand_lossage ("invalid operand for code '%c'", code);
22297 return;
22300 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22301 return;
22303 case 'H':
22304 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22306 output_operand_lossage ("invalid operand for code '%c'", code);
22307 return;
22310 asm_fprintf (stream, "%r", REGNO (x) + 1);
22311 return;
22313 case 'J':
22314 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22316 output_operand_lossage ("invalid operand for code '%c'", code);
22317 return;
22320 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22321 return;
22323 case 'K':
22324 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22326 output_operand_lossage ("invalid operand for code '%c'", code);
22327 return;
22330 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22331 return;
22333 case 'm':
22334 asm_fprintf (stream, "%r",
22335 REG_P (XEXP (x, 0))
22336 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22337 return;
22339 case 'M':
22340 asm_fprintf (stream, "{%r-%r}",
22341 REGNO (x),
22342 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22343 return;
22345 /* Like 'M', but writing doubleword vector registers, for use by Neon
22346 insns. */
22347 case 'h':
22349 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22350 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22351 if (numregs == 1)
22352 asm_fprintf (stream, "{d%d}", regno);
22353 else
22354 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22356 return;
22358 case 'd':
22359 /* CONST_TRUE_RTX means always -- that's the default. */
22360 if (x == const_true_rtx)
22361 return;
22363 if (!COMPARISON_P (x))
22365 output_operand_lossage ("invalid operand for code '%c'", code);
22366 return;
22369 fputs (arm_condition_codes[get_arm_condition_code (x)],
22370 stream);
22371 return;
22373 case 'D':
22374 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22375 want to do that. */
22376 if (x == const_true_rtx)
22378 output_operand_lossage ("instruction never executed");
22379 return;
22381 if (!COMPARISON_P (x))
22383 output_operand_lossage ("invalid operand for code '%c'", code);
22384 return;
22387 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22388 (get_arm_condition_code (x))],
22389 stream);
22390 return;
22392 case 's':
22393 case 'V':
22394 case 'W':
22395 case 'X':
22396 case 'Y':
22397 case 'Z':
22398 /* Former Maverick support, removed after GCC-4.7. */
22399 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22400 return;
22402 case 'U':
22403 if (!REG_P (x)
22404 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22405 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22406 /* Bad value for wCG register number. */
22408 output_operand_lossage ("invalid operand for code '%c'", code);
22409 return;
22412 else
22413 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22414 return;
22416 /* Print an iWMMXt control register name. */
22417 case 'w':
22418 if (!CONST_INT_P (x)
22419 || INTVAL (x) < 0
22420 || INTVAL (x) >= 16)
22421 /* Bad value for wC register number. */
22423 output_operand_lossage ("invalid operand for code '%c'", code);
22424 return;
22427 else
22429 static const char * wc_reg_names [16] =
22431 "wCID", "wCon", "wCSSF", "wCASF",
22432 "wC4", "wC5", "wC6", "wC7",
22433 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22434 "wC12", "wC13", "wC14", "wC15"
22437 fputs (wc_reg_names [INTVAL (x)], stream);
22439 return;
22441 /* Print the high single-precision register of a VFP double-precision
22442 register. */
22443 case 'p':
22445 machine_mode mode = GET_MODE (x);
22446 int regno;
22448 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22450 output_operand_lossage ("invalid operand for code '%c'", code);
22451 return;
22454 regno = REGNO (x);
22455 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22457 output_operand_lossage ("invalid operand for code '%c'", code);
22458 return;
22461 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22463 return;
22465 /* Print a VFP/Neon double precision or quad precision register name. */
22466 case 'P':
22467 case 'q':
22469 machine_mode mode = GET_MODE (x);
22470 int is_quad = (code == 'q');
22471 int regno;
22473 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22475 output_operand_lossage ("invalid operand for code '%c'", code);
22476 return;
22479 if (!REG_P (x)
22480 || !IS_VFP_REGNUM (REGNO (x)))
22482 output_operand_lossage ("invalid operand for code '%c'", code);
22483 return;
22486 regno = REGNO (x);
22487 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22488 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22490 output_operand_lossage ("invalid operand for code '%c'", code);
22491 return;
22494 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22495 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22497 return;
22499 /* These two codes print the low/high doubleword register of a Neon quad
22500 register, respectively. For pair-structure types, can also print
22501 low/high quadword registers. */
22502 case 'e':
22503 case 'f':
22505 machine_mode mode = GET_MODE (x);
22506 int regno;
22508 if ((GET_MODE_SIZE (mode) != 16
22509 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22511 output_operand_lossage ("invalid operand for code '%c'", code);
22512 return;
22515 regno = REGNO (x);
22516 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22518 output_operand_lossage ("invalid operand for code '%c'", code);
22519 return;
22522 if (GET_MODE_SIZE (mode) == 16)
22523 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22524 + (code == 'f' ? 1 : 0));
22525 else
22526 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22527 + (code == 'f' ? 1 : 0));
22529 return;
22531 /* Print a VFPv3 floating-point constant, represented as an integer
22532 index. */
22533 case 'G':
22535 int index = vfp3_const_double_index (x);
22536 gcc_assert (index != -1);
22537 fprintf (stream, "%d", index);
22539 return;
22541 /* Print bits representing opcode features for Neon.
22543 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22544 and polynomials as unsigned.
22546 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22548 Bit 2 is 1 for rounding functions, 0 otherwise. */
22550 /* Identify the type as 's', 'u', 'p' or 'f'. */
22551 case 'T':
22553 HOST_WIDE_INT bits = INTVAL (x);
22554 fputc ("uspf"[bits & 3], stream);
22556 return;
22558 /* Likewise, but signed and unsigned integers are both 'i'. */
22559 case 'F':
22561 HOST_WIDE_INT bits = INTVAL (x);
22562 fputc ("iipf"[bits & 3], stream);
22564 return;
22566 /* As for 'T', but emit 'u' instead of 'p'. */
22567 case 't':
22569 HOST_WIDE_INT bits = INTVAL (x);
22570 fputc ("usuf"[bits & 3], stream);
22572 return;
22574 /* Bit 2: rounding (vs none). */
22575 case 'O':
22577 HOST_WIDE_INT bits = INTVAL (x);
22578 fputs ((bits & 4) != 0 ? "r" : "", stream);
22580 return;
22582 /* Memory operand for vld1/vst1 instruction. */
22583 case 'A':
22585 rtx addr;
22586 bool postinc = FALSE;
22587 rtx postinc_reg = NULL;
22588 unsigned align, memsize, align_bits;
22590 gcc_assert (MEM_P (x));
22591 addr = XEXP (x, 0);
22592 if (GET_CODE (addr) == POST_INC)
22594 postinc = 1;
22595 addr = XEXP (addr, 0);
22597 if (GET_CODE (addr) == POST_MODIFY)
22599 postinc_reg = XEXP( XEXP (addr, 1), 1);
22600 addr = XEXP (addr, 0);
22602 asm_fprintf (stream, "[%r", REGNO (addr));
22604 /* We know the alignment of this access, so we can emit a hint in the
22605 instruction (for some alignments) as an aid to the memory subsystem
22606 of the target. */
22607 align = MEM_ALIGN (x) >> 3;
22608 memsize = MEM_SIZE (x);
22610 /* Only certain alignment specifiers are supported by the hardware. */
22611 if (memsize == 32 && (align % 32) == 0)
22612 align_bits = 256;
22613 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22614 align_bits = 128;
22615 else if (memsize >= 8 && (align % 8) == 0)
22616 align_bits = 64;
22617 else
22618 align_bits = 0;
22620 if (align_bits != 0)
22621 asm_fprintf (stream, ":%d", align_bits);
22623 asm_fprintf (stream, "]");
22625 if (postinc)
22626 fputs("!", stream);
22627 if (postinc_reg)
22628 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22630 return;
22632 case 'C':
22634 rtx addr;
22636 gcc_assert (MEM_P (x));
22637 addr = XEXP (x, 0);
22638 gcc_assert (REG_P (addr));
22639 asm_fprintf (stream, "[%r]", REGNO (addr));
22641 return;
22643 /* Translate an S register number into a D register number and element index. */
22644 case 'y':
22646 machine_mode mode = GET_MODE (x);
22647 int regno;
22649 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22651 output_operand_lossage ("invalid operand for code '%c'", code);
22652 return;
22655 regno = REGNO (x);
22656 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22658 output_operand_lossage ("invalid operand for code '%c'", code);
22659 return;
22662 regno = regno - FIRST_VFP_REGNUM;
22663 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22665 return;
22667 case 'v':
22668 gcc_assert (CONST_DOUBLE_P (x));
22669 int result;
22670 result = vfp3_const_double_for_fract_bits (x);
22671 if (result == 0)
22672 result = vfp3_const_double_for_bits (x);
22673 fprintf (stream, "#%d", result);
22674 return;
22676 /* Register specifier for vld1.16/vst1.16. Translate the S register
22677 number into a D register number and element index. */
22678 case 'z':
22680 machine_mode mode = GET_MODE (x);
22681 int regno;
22683 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22685 output_operand_lossage ("invalid operand for code '%c'", code);
22686 return;
22689 regno = REGNO (x);
22690 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22692 output_operand_lossage ("invalid operand for code '%c'", code);
22693 return;
22696 regno = regno - FIRST_VFP_REGNUM;
22697 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22699 return;
22701 default:
22702 if (x == 0)
22704 output_operand_lossage ("missing operand");
22705 return;
22708 switch (GET_CODE (x))
22710 case REG:
22711 asm_fprintf (stream, "%r", REGNO (x));
22712 break;
22714 case MEM:
22715 output_address (GET_MODE (x), XEXP (x, 0));
22716 break;
22718 case CONST_DOUBLE:
22720 char fpstr[20];
22721 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22722 sizeof (fpstr), 0, 1);
22723 fprintf (stream, "#%s", fpstr);
22725 break;
22727 default:
22728 gcc_assert (GET_CODE (x) != NEG);
22729 fputc ('#', stream);
22730 if (GET_CODE (x) == HIGH)
22732 fputs (":lower16:", stream);
22733 x = XEXP (x, 0);
22736 output_addr_const (stream, x);
22737 break;
22742 /* Target hook for printing a memory address. */
22743 static void
22744 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22746 if (TARGET_32BIT)
22748 int is_minus = GET_CODE (x) == MINUS;
22750 if (REG_P (x))
22751 asm_fprintf (stream, "[%r]", REGNO (x));
22752 else if (GET_CODE (x) == PLUS || is_minus)
22754 rtx base = XEXP (x, 0);
22755 rtx index = XEXP (x, 1);
22756 HOST_WIDE_INT offset = 0;
22757 if (!REG_P (base)
22758 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22760 /* Ensure that BASE is a register. */
22761 /* (one of them must be). */
22762 /* Also ensure the SP is not used as in index register. */
22763 std::swap (base, index);
22765 switch (GET_CODE (index))
22767 case CONST_INT:
22768 offset = INTVAL (index);
22769 if (is_minus)
22770 offset = -offset;
22771 asm_fprintf (stream, "[%r, #%wd]",
22772 REGNO (base), offset);
22773 break;
22775 case REG:
22776 asm_fprintf (stream, "[%r, %s%r]",
22777 REGNO (base), is_minus ? "-" : "",
22778 REGNO (index));
22779 break;
22781 case MULT:
22782 case ASHIFTRT:
22783 case LSHIFTRT:
22784 case ASHIFT:
22785 case ROTATERT:
22787 asm_fprintf (stream, "[%r, %s%r",
22788 REGNO (base), is_minus ? "-" : "",
22789 REGNO (XEXP (index, 0)));
22790 arm_print_operand (stream, index, 'S');
22791 fputs ("]", stream);
22792 break;
22795 default:
22796 gcc_unreachable ();
22799 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22800 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22802 gcc_assert (REG_P (XEXP (x, 0)));
22804 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22805 asm_fprintf (stream, "[%r, #%s%d]!",
22806 REGNO (XEXP (x, 0)),
22807 GET_CODE (x) == PRE_DEC ? "-" : "",
22808 GET_MODE_SIZE (mode));
22809 else
22810 asm_fprintf (stream, "[%r], #%s%d",
22811 REGNO (XEXP (x, 0)),
22812 GET_CODE (x) == POST_DEC ? "-" : "",
22813 GET_MODE_SIZE (mode));
22815 else if (GET_CODE (x) == PRE_MODIFY)
22817 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22818 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22819 asm_fprintf (stream, "#%wd]!",
22820 INTVAL (XEXP (XEXP (x, 1), 1)));
22821 else
22822 asm_fprintf (stream, "%r]!",
22823 REGNO (XEXP (XEXP (x, 1), 1)));
22825 else if (GET_CODE (x) == POST_MODIFY)
22827 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22828 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22829 asm_fprintf (stream, "#%wd",
22830 INTVAL (XEXP (XEXP (x, 1), 1)));
22831 else
22832 asm_fprintf (stream, "%r",
22833 REGNO (XEXP (XEXP (x, 1), 1)));
22835 else output_addr_const (stream, x);
22837 else
22839 if (REG_P (x))
22840 asm_fprintf (stream, "[%r]", REGNO (x));
22841 else if (GET_CODE (x) == POST_INC)
22842 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22843 else if (GET_CODE (x) == PLUS)
22845 gcc_assert (REG_P (XEXP (x, 0)));
22846 if (CONST_INT_P (XEXP (x, 1)))
22847 asm_fprintf (stream, "[%r, #%wd]",
22848 REGNO (XEXP (x, 0)),
22849 INTVAL (XEXP (x, 1)));
22850 else
22851 asm_fprintf (stream, "[%r, %r]",
22852 REGNO (XEXP (x, 0)),
22853 REGNO (XEXP (x, 1)));
22855 else
22856 output_addr_const (stream, x);
22860 /* Target hook for indicating whether a punctuation character for
22861 TARGET_PRINT_OPERAND is valid. */
22862 static bool
22863 arm_print_operand_punct_valid_p (unsigned char code)
22865 return (code == '@' || code == '|' || code == '.'
22866 || code == '(' || code == ')' || code == '#'
22867 || (TARGET_32BIT && (code == '?'))
22868 || (TARGET_THUMB2 && (code == '!'))
22869 || (TARGET_THUMB && (code == '_')));
22872 /* Target hook for assembling integer objects. The ARM version needs to
22873 handle word-sized values specially. */
22874 static bool
22875 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22877 machine_mode mode;
22879 if (size == UNITS_PER_WORD && aligned_p)
22881 fputs ("\t.word\t", asm_out_file);
22882 output_addr_const (asm_out_file, x);
22884 /* Mark symbols as position independent. We only do this in the
22885 .text segment, not in the .data segment. */
22886 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22887 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22889 /* See legitimize_pic_address for an explanation of the
22890 TARGET_VXWORKS_RTP check. */
22891 /* References to weak symbols cannot be resolved locally:
22892 they may be overridden by a non-weak definition at link
22893 time. */
22894 if (!arm_pic_data_is_text_relative
22895 || (GET_CODE (x) == SYMBOL_REF
22896 && (!SYMBOL_REF_LOCAL_P (x)
22897 || (SYMBOL_REF_DECL (x)
22898 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22899 fputs ("(GOT)", asm_out_file);
22900 else
22901 fputs ("(GOTOFF)", asm_out_file);
22903 fputc ('\n', asm_out_file);
22904 return true;
22907 mode = GET_MODE (x);
22909 if (arm_vector_mode_supported_p (mode))
22911 int i, units;
22913 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22915 units = CONST_VECTOR_NUNITS (x);
22916 size = GET_MODE_UNIT_SIZE (mode);
22918 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22919 for (i = 0; i < units; i++)
22921 rtx elt = CONST_VECTOR_ELT (x, i);
22922 assemble_integer
22923 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22925 else
22926 for (i = 0; i < units; i++)
22928 rtx elt = CONST_VECTOR_ELT (x, i);
22929 assemble_real
22930 (*CONST_DOUBLE_REAL_VALUE (elt),
22931 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22932 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22935 return true;
22938 return default_assemble_integer (x, size, aligned_p);
22941 static void
22942 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22944 section *s;
22946 if (!TARGET_AAPCS_BASED)
22948 (is_ctor ?
22949 default_named_section_asm_out_constructor
22950 : default_named_section_asm_out_destructor) (symbol, priority);
22951 return;
22954 /* Put these in the .init_array section, using a special relocation. */
22955 if (priority != DEFAULT_INIT_PRIORITY)
22957 char buf[18];
22958 sprintf (buf, "%s.%.5u",
22959 is_ctor ? ".init_array" : ".fini_array",
22960 priority);
22961 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22963 else if (is_ctor)
22964 s = ctors_section;
22965 else
22966 s = dtors_section;
22968 switch_to_section (s);
22969 assemble_align (POINTER_SIZE);
22970 fputs ("\t.word\t", asm_out_file);
22971 output_addr_const (asm_out_file, symbol);
22972 fputs ("(target1)\n", asm_out_file);
22975 /* Add a function to the list of static constructors. */
22977 static void
22978 arm_elf_asm_constructor (rtx symbol, int priority)
22980 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22983 /* Add a function to the list of static destructors. */
22985 static void
22986 arm_elf_asm_destructor (rtx symbol, int priority)
22988 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22991 /* A finite state machine takes care of noticing whether or not instructions
22992 can be conditionally executed, and thus decrease execution time and code
22993 size by deleting branch instructions. The fsm is controlled by
22994 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22996 /* The state of the fsm controlling condition codes are:
22997 0: normal, do nothing special
22998 1: make ASM_OUTPUT_OPCODE not output this instruction
22999 2: make ASM_OUTPUT_OPCODE not output this instruction
23000 3: make instructions conditional
23001 4: make instructions conditional
23003 State transitions (state->state by whom under condition):
23004 0 -> 1 final_prescan_insn if the `target' is a label
23005 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23006 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23007 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23008 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23009 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23010 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23011 (the target insn is arm_target_insn).
23013 If the jump clobbers the conditions then we use states 2 and 4.
23015 A similar thing can be done with conditional return insns.
23017 XXX In case the `target' is an unconditional branch, this conditionalising
23018 of the instructions always reduces code size, but not always execution
23019 time. But then, I want to reduce the code size to somewhere near what
23020 /bin/cc produces. */
23022 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23023 instructions. When a COND_EXEC instruction is seen the subsequent
23024 instructions are scanned so that multiple conditional instructions can be
23025 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23026 specify the length and true/false mask for the IT block. These will be
23027 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23029 /* Returns the index of the ARM condition code string in
23030 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23031 COMPARISON should be an rtx like `(eq (...) (...))'. */
23033 enum arm_cond_code
23034 maybe_get_arm_condition_code (rtx comparison)
23036 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23037 enum arm_cond_code code;
23038 enum rtx_code comp_code = GET_CODE (comparison);
23040 if (GET_MODE_CLASS (mode) != MODE_CC)
23041 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23042 XEXP (comparison, 1));
23044 switch (mode)
23046 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23047 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23048 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23049 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23050 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23051 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23052 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23053 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23054 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23055 case E_CC_DLTUmode: code = ARM_CC;
23057 dominance:
23058 if (comp_code == EQ)
23059 return ARM_INVERSE_CONDITION_CODE (code);
23060 if (comp_code == NE)
23061 return code;
23062 return ARM_NV;
23064 case E_CC_NOOVmode:
23065 switch (comp_code)
23067 case NE: return ARM_NE;
23068 case EQ: return ARM_EQ;
23069 case GE: return ARM_PL;
23070 case LT: return ARM_MI;
23071 default: return ARM_NV;
23074 case E_CC_Zmode:
23075 switch (comp_code)
23077 case NE: return ARM_NE;
23078 case EQ: return ARM_EQ;
23079 default: return ARM_NV;
23082 case E_CC_Nmode:
23083 switch (comp_code)
23085 case NE: return ARM_MI;
23086 case EQ: return ARM_PL;
23087 default: return ARM_NV;
23090 case E_CCFPEmode:
23091 case E_CCFPmode:
23092 /* We can handle all cases except UNEQ and LTGT. */
23093 switch (comp_code)
23095 case GE: return ARM_GE;
23096 case GT: return ARM_GT;
23097 case LE: return ARM_LS;
23098 case LT: return ARM_MI;
23099 case NE: return ARM_NE;
23100 case EQ: return ARM_EQ;
23101 case ORDERED: return ARM_VC;
23102 case UNORDERED: return ARM_VS;
23103 case UNLT: return ARM_LT;
23104 case UNLE: return ARM_LE;
23105 case UNGT: return ARM_HI;
23106 case UNGE: return ARM_PL;
23107 /* UNEQ and LTGT do not have a representation. */
23108 case UNEQ: /* Fall through. */
23109 case LTGT: /* Fall through. */
23110 default: return ARM_NV;
23113 case E_CC_SWPmode:
23114 switch (comp_code)
23116 case NE: return ARM_NE;
23117 case EQ: return ARM_EQ;
23118 case GE: return ARM_LE;
23119 case GT: return ARM_LT;
23120 case LE: return ARM_GE;
23121 case LT: return ARM_GT;
23122 case GEU: return ARM_LS;
23123 case GTU: return ARM_CC;
23124 case LEU: return ARM_CS;
23125 case LTU: return ARM_HI;
23126 default: return ARM_NV;
23129 case E_CC_Cmode:
23130 switch (comp_code)
23132 case LTU: return ARM_CS;
23133 case GEU: return ARM_CC;
23134 case NE: return ARM_CS;
23135 case EQ: return ARM_CC;
23136 default: return ARM_NV;
23139 case E_CC_CZmode:
23140 switch (comp_code)
23142 case NE: return ARM_NE;
23143 case EQ: return ARM_EQ;
23144 case GEU: return ARM_CS;
23145 case GTU: return ARM_HI;
23146 case LEU: return ARM_LS;
23147 case LTU: return ARM_CC;
23148 default: return ARM_NV;
23151 case E_CC_NCVmode:
23152 switch (comp_code)
23154 case GE: return ARM_GE;
23155 case LT: return ARM_LT;
23156 case GEU: return ARM_CS;
23157 case LTU: return ARM_CC;
23158 default: return ARM_NV;
23161 case E_CC_Vmode:
23162 switch (comp_code)
23164 case NE: return ARM_VS;
23165 case EQ: return ARM_VC;
23166 default: return ARM_NV;
23169 case E_CCmode:
23170 switch (comp_code)
23172 case NE: return ARM_NE;
23173 case EQ: return ARM_EQ;
23174 case GE: return ARM_GE;
23175 case GT: return ARM_GT;
23176 case LE: return ARM_LE;
23177 case LT: return ARM_LT;
23178 case GEU: return ARM_CS;
23179 case GTU: return ARM_HI;
23180 case LEU: return ARM_LS;
23181 case LTU: return ARM_CC;
23182 default: return ARM_NV;
23185 default: gcc_unreachable ();
23189 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23190 static enum arm_cond_code
23191 get_arm_condition_code (rtx comparison)
23193 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23194 gcc_assert (code != ARM_NV);
23195 return code;
23198 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23199 code registers when not targetting Thumb1. The VFP condition register
23200 only exists when generating hard-float code. */
23201 static bool
23202 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23204 if (!TARGET_32BIT)
23205 return false;
23207 *p1 = CC_REGNUM;
23208 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23209 return true;
23212 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23213 instructions. */
23214 void
23215 thumb2_final_prescan_insn (rtx_insn *insn)
23217 rtx_insn *first_insn = insn;
23218 rtx body = PATTERN (insn);
23219 rtx predicate;
23220 enum arm_cond_code code;
23221 int n;
23222 int mask;
23223 int max;
23225 /* max_insns_skipped in the tune was already taken into account in the
23226 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23227 just emit the IT blocks as we can. It does not make sense to split
23228 the IT blocks. */
23229 max = MAX_INSN_PER_IT_BLOCK;
23231 /* Remove the previous insn from the count of insns to be output. */
23232 if (arm_condexec_count)
23233 arm_condexec_count--;
23235 /* Nothing to do if we are already inside a conditional block. */
23236 if (arm_condexec_count)
23237 return;
23239 if (GET_CODE (body) != COND_EXEC)
23240 return;
23242 /* Conditional jumps are implemented directly. */
23243 if (JUMP_P (insn))
23244 return;
23246 predicate = COND_EXEC_TEST (body);
23247 arm_current_cc = get_arm_condition_code (predicate);
23249 n = get_attr_ce_count (insn);
23250 arm_condexec_count = 1;
23251 arm_condexec_mask = (1 << n) - 1;
23252 arm_condexec_masklen = n;
23253 /* See if subsequent instructions can be combined into the same block. */
23254 for (;;)
23256 insn = next_nonnote_insn (insn);
23258 /* Jumping into the middle of an IT block is illegal, so a label or
23259 barrier terminates the block. */
23260 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23261 break;
23263 body = PATTERN (insn);
23264 /* USE and CLOBBER aren't really insns, so just skip them. */
23265 if (GET_CODE (body) == USE
23266 || GET_CODE (body) == CLOBBER)
23267 continue;
23269 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23270 if (GET_CODE (body) != COND_EXEC)
23271 break;
23272 /* Maximum number of conditionally executed instructions in a block. */
23273 n = get_attr_ce_count (insn);
23274 if (arm_condexec_masklen + n > max)
23275 break;
23277 predicate = COND_EXEC_TEST (body);
23278 code = get_arm_condition_code (predicate);
23279 mask = (1 << n) - 1;
23280 if (arm_current_cc == code)
23281 arm_condexec_mask |= (mask << arm_condexec_masklen);
23282 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23283 break;
23285 arm_condexec_count++;
23286 arm_condexec_masklen += n;
23288 /* A jump must be the last instruction in a conditional block. */
23289 if (JUMP_P (insn))
23290 break;
23292 /* Restore recog_data (getting the attributes of other insns can
23293 destroy this array, but final.c assumes that it remains intact
23294 across this call). */
23295 extract_constrain_insn_cached (first_insn);
23298 void
23299 arm_final_prescan_insn (rtx_insn *insn)
23301 /* BODY will hold the body of INSN. */
23302 rtx body = PATTERN (insn);
23304 /* This will be 1 if trying to repeat the trick, and things need to be
23305 reversed if it appears to fail. */
23306 int reverse = 0;
23308 /* If we start with a return insn, we only succeed if we find another one. */
23309 int seeking_return = 0;
23310 enum rtx_code return_code = UNKNOWN;
23312 /* START_INSN will hold the insn from where we start looking. This is the
23313 first insn after the following code_label if REVERSE is true. */
23314 rtx_insn *start_insn = insn;
23316 /* If in state 4, check if the target branch is reached, in order to
23317 change back to state 0. */
23318 if (arm_ccfsm_state == 4)
23320 if (insn == arm_target_insn)
23322 arm_target_insn = NULL;
23323 arm_ccfsm_state = 0;
23325 return;
23328 /* If in state 3, it is possible to repeat the trick, if this insn is an
23329 unconditional branch to a label, and immediately following this branch
23330 is the previous target label which is only used once, and the label this
23331 branch jumps to is not too far off. */
23332 if (arm_ccfsm_state == 3)
23334 if (simplejump_p (insn))
23336 start_insn = next_nonnote_insn (start_insn);
23337 if (BARRIER_P (start_insn))
23339 /* XXX Isn't this always a barrier? */
23340 start_insn = next_nonnote_insn (start_insn);
23342 if (LABEL_P (start_insn)
23343 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23344 && LABEL_NUSES (start_insn) == 1)
23345 reverse = TRUE;
23346 else
23347 return;
23349 else if (ANY_RETURN_P (body))
23351 start_insn = next_nonnote_insn (start_insn);
23352 if (BARRIER_P (start_insn))
23353 start_insn = next_nonnote_insn (start_insn);
23354 if (LABEL_P (start_insn)
23355 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23356 && LABEL_NUSES (start_insn) == 1)
23358 reverse = TRUE;
23359 seeking_return = 1;
23360 return_code = GET_CODE (body);
23362 else
23363 return;
23365 else
23366 return;
23369 gcc_assert (!arm_ccfsm_state || reverse);
23370 if (!JUMP_P (insn))
23371 return;
23373 /* This jump might be paralleled with a clobber of the condition codes
23374 the jump should always come first */
23375 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23376 body = XVECEXP (body, 0, 0);
23378 if (reverse
23379 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23380 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23382 int insns_skipped;
23383 int fail = FALSE, succeed = FALSE;
23384 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23385 int then_not_else = TRUE;
23386 rtx_insn *this_insn = start_insn;
23387 rtx label = 0;
23389 /* Register the insn jumped to. */
23390 if (reverse)
23392 if (!seeking_return)
23393 label = XEXP (SET_SRC (body), 0);
23395 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23396 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23397 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23399 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23400 then_not_else = FALSE;
23402 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23404 seeking_return = 1;
23405 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23407 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23409 seeking_return = 1;
23410 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23411 then_not_else = FALSE;
23413 else
23414 gcc_unreachable ();
23416 /* See how many insns this branch skips, and what kind of insns. If all
23417 insns are okay, and the label or unconditional branch to the same
23418 label is not too far away, succeed. */
23419 for (insns_skipped = 0;
23420 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23422 rtx scanbody;
23424 this_insn = next_nonnote_insn (this_insn);
23425 if (!this_insn)
23426 break;
23428 switch (GET_CODE (this_insn))
23430 case CODE_LABEL:
23431 /* Succeed if it is the target label, otherwise fail since
23432 control falls in from somewhere else. */
23433 if (this_insn == label)
23435 arm_ccfsm_state = 1;
23436 succeed = TRUE;
23438 else
23439 fail = TRUE;
23440 break;
23442 case BARRIER:
23443 /* Succeed if the following insn is the target label.
23444 Otherwise fail.
23445 If return insns are used then the last insn in a function
23446 will be a barrier. */
23447 this_insn = next_nonnote_insn (this_insn);
23448 if (this_insn && this_insn == label)
23450 arm_ccfsm_state = 1;
23451 succeed = TRUE;
23453 else
23454 fail = TRUE;
23455 break;
23457 case CALL_INSN:
23458 /* The AAPCS says that conditional calls should not be
23459 used since they make interworking inefficient (the
23460 linker can't transform BL<cond> into BLX). That's
23461 only a problem if the machine has BLX. */
23462 if (arm_arch5)
23464 fail = TRUE;
23465 break;
23468 /* Succeed if the following insn is the target label, or
23469 if the following two insns are a barrier and the
23470 target label. */
23471 this_insn = next_nonnote_insn (this_insn);
23472 if (this_insn && BARRIER_P (this_insn))
23473 this_insn = next_nonnote_insn (this_insn);
23475 if (this_insn && this_insn == label
23476 && insns_skipped < max_insns_skipped)
23478 arm_ccfsm_state = 1;
23479 succeed = TRUE;
23481 else
23482 fail = TRUE;
23483 break;
23485 case JUMP_INSN:
23486 /* If this is an unconditional branch to the same label, succeed.
23487 If it is to another label, do nothing. If it is conditional,
23488 fail. */
23489 /* XXX Probably, the tests for SET and the PC are
23490 unnecessary. */
23492 scanbody = PATTERN (this_insn);
23493 if (GET_CODE (scanbody) == SET
23494 && GET_CODE (SET_DEST (scanbody)) == PC)
23496 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23497 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23499 arm_ccfsm_state = 2;
23500 succeed = TRUE;
23502 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23503 fail = TRUE;
23505 /* Fail if a conditional return is undesirable (e.g. on a
23506 StrongARM), but still allow this if optimizing for size. */
23507 else if (GET_CODE (scanbody) == return_code
23508 && !use_return_insn (TRUE, NULL)
23509 && !optimize_size)
23510 fail = TRUE;
23511 else if (GET_CODE (scanbody) == return_code)
23513 arm_ccfsm_state = 2;
23514 succeed = TRUE;
23516 else if (GET_CODE (scanbody) == PARALLEL)
23518 switch (get_attr_conds (this_insn))
23520 case CONDS_NOCOND:
23521 break;
23522 default:
23523 fail = TRUE;
23524 break;
23527 else
23528 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23530 break;
23532 case INSN:
23533 /* Instructions using or affecting the condition codes make it
23534 fail. */
23535 scanbody = PATTERN (this_insn);
23536 if (!(GET_CODE (scanbody) == SET
23537 || GET_CODE (scanbody) == PARALLEL)
23538 || get_attr_conds (this_insn) != CONDS_NOCOND)
23539 fail = TRUE;
23540 break;
23542 default:
23543 break;
23546 if (succeed)
23548 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23549 arm_target_label = CODE_LABEL_NUMBER (label);
23550 else
23552 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23554 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23556 this_insn = next_nonnote_insn (this_insn);
23557 gcc_assert (!this_insn
23558 || (!BARRIER_P (this_insn)
23559 && !LABEL_P (this_insn)));
23561 if (!this_insn)
23563 /* Oh, dear! we ran off the end.. give up. */
23564 extract_constrain_insn_cached (insn);
23565 arm_ccfsm_state = 0;
23566 arm_target_insn = NULL;
23567 return;
23569 arm_target_insn = this_insn;
23572 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23573 what it was. */
23574 if (!reverse)
23575 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23577 if (reverse || then_not_else)
23578 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23581 /* Restore recog_data (getting the attributes of other insns can
23582 destroy this array, but final.c assumes that it remains intact
23583 across this call. */
23584 extract_constrain_insn_cached (insn);
23588 /* Output IT instructions. */
23589 void
23590 thumb2_asm_output_opcode (FILE * stream)
23592 char buff[5];
23593 int n;
23595 if (arm_condexec_mask)
23597 for (n = 0; n < arm_condexec_masklen; n++)
23598 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23599 buff[n] = 0;
23600 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23601 arm_condition_codes[arm_current_cc]);
23602 arm_condexec_mask = 0;
23606 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23607 UNITS_PER_WORD bytes wide. */
23608 static unsigned int
23609 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23611 if (TARGET_32BIT
23612 && regno > PC_REGNUM
23613 && regno != FRAME_POINTER_REGNUM
23614 && regno != ARG_POINTER_REGNUM
23615 && !IS_VFP_REGNUM (regno))
23616 return 1;
23618 return ARM_NUM_REGS (mode);
23621 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23622 static bool
23623 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23625 if (GET_MODE_CLASS (mode) == MODE_CC)
23626 return (regno == CC_REGNUM
23627 || (TARGET_HARD_FLOAT
23628 && regno == VFPCC_REGNUM));
23630 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23631 return false;
23633 if (TARGET_THUMB1)
23634 /* For the Thumb we only allow values bigger than SImode in
23635 registers 0 - 6, so that there is always a second low
23636 register available to hold the upper part of the value.
23637 We probably we ought to ensure that the register is the
23638 start of an even numbered register pair. */
23639 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23641 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23643 if (mode == SFmode || mode == SImode)
23644 return VFP_REGNO_OK_FOR_SINGLE (regno);
23646 if (mode == DFmode)
23647 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23649 if (mode == HFmode)
23650 return VFP_REGNO_OK_FOR_SINGLE (regno);
23652 /* VFP registers can hold HImode values. */
23653 if (mode == HImode)
23654 return VFP_REGNO_OK_FOR_SINGLE (regno);
23656 if (TARGET_NEON)
23657 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23658 || (VALID_NEON_QREG_MODE (mode)
23659 && NEON_REGNO_OK_FOR_QUAD (regno))
23660 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23661 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23662 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23663 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23664 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23666 return false;
23669 if (TARGET_REALLY_IWMMXT)
23671 if (IS_IWMMXT_GR_REGNUM (regno))
23672 return mode == SImode;
23674 if (IS_IWMMXT_REGNUM (regno))
23675 return VALID_IWMMXT_REG_MODE (mode);
23678 /* We allow almost any value to be stored in the general registers.
23679 Restrict doubleword quantities to even register pairs in ARM state
23680 so that we can use ldrd. Do not allow very large Neon structure
23681 opaque modes in general registers; they would use too many. */
23682 if (regno <= LAST_ARM_REGNUM)
23684 if (ARM_NUM_REGS (mode) > 4)
23685 return false;
23687 if (TARGET_THUMB2)
23688 return true;
23690 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23693 if (regno == FRAME_POINTER_REGNUM
23694 || regno == ARG_POINTER_REGNUM)
23695 /* We only allow integers in the fake hard registers. */
23696 return GET_MODE_CLASS (mode) == MODE_INT;
23698 return false;
23701 /* Implement TARGET_MODES_TIEABLE_P. */
23703 static bool
23704 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23706 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23707 return true;
23709 /* We specifically want to allow elements of "structure" modes to
23710 be tieable to the structure. This more general condition allows
23711 other rarer situations too. */
23712 if (TARGET_NEON
23713 && (VALID_NEON_DREG_MODE (mode1)
23714 || VALID_NEON_QREG_MODE (mode1)
23715 || VALID_NEON_STRUCT_MODE (mode1))
23716 && (VALID_NEON_DREG_MODE (mode2)
23717 || VALID_NEON_QREG_MODE (mode2)
23718 || VALID_NEON_STRUCT_MODE (mode2)))
23719 return true;
23721 return false;
23724 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23725 not used in arm mode. */
23727 enum reg_class
23728 arm_regno_class (int regno)
23730 if (regno == PC_REGNUM)
23731 return NO_REGS;
23733 if (TARGET_THUMB1)
23735 if (regno == STACK_POINTER_REGNUM)
23736 return STACK_REG;
23737 if (regno == CC_REGNUM)
23738 return CC_REG;
23739 if (regno < 8)
23740 return LO_REGS;
23741 return HI_REGS;
23744 if (TARGET_THUMB2 && regno < 8)
23745 return LO_REGS;
23747 if ( regno <= LAST_ARM_REGNUM
23748 || regno == FRAME_POINTER_REGNUM
23749 || regno == ARG_POINTER_REGNUM)
23750 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23752 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23753 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23755 if (IS_VFP_REGNUM (regno))
23757 if (regno <= D7_VFP_REGNUM)
23758 return VFP_D0_D7_REGS;
23759 else if (regno <= LAST_LO_VFP_REGNUM)
23760 return VFP_LO_REGS;
23761 else
23762 return VFP_HI_REGS;
23765 if (IS_IWMMXT_REGNUM (regno))
23766 return IWMMXT_REGS;
23768 if (IS_IWMMXT_GR_REGNUM (regno))
23769 return IWMMXT_GR_REGS;
23771 return NO_REGS;
23774 /* Handle a special case when computing the offset
23775 of an argument from the frame pointer. */
23777 arm_debugger_arg_offset (int value, rtx addr)
23779 rtx_insn *insn;
23781 /* We are only interested if dbxout_parms() failed to compute the offset. */
23782 if (value != 0)
23783 return 0;
23785 /* We can only cope with the case where the address is held in a register. */
23786 if (!REG_P (addr))
23787 return 0;
23789 /* If we are using the frame pointer to point at the argument, then
23790 an offset of 0 is correct. */
23791 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23792 return 0;
23794 /* If we are using the stack pointer to point at the
23795 argument, then an offset of 0 is correct. */
23796 /* ??? Check this is consistent with thumb2 frame layout. */
23797 if ((TARGET_THUMB || !frame_pointer_needed)
23798 && REGNO (addr) == SP_REGNUM)
23799 return 0;
23801 /* Oh dear. The argument is pointed to by a register rather
23802 than being held in a register, or being stored at a known
23803 offset from the frame pointer. Since GDB only understands
23804 those two kinds of argument we must translate the address
23805 held in the register into an offset from the frame pointer.
23806 We do this by searching through the insns for the function
23807 looking to see where this register gets its value. If the
23808 register is initialized from the frame pointer plus an offset
23809 then we are in luck and we can continue, otherwise we give up.
23811 This code is exercised by producing debugging information
23812 for a function with arguments like this:
23814 double func (double a, double b, int c, double d) {return d;}
23816 Without this code the stab for parameter 'd' will be set to
23817 an offset of 0 from the frame pointer, rather than 8. */
23819 /* The if() statement says:
23821 If the insn is a normal instruction
23822 and if the insn is setting the value in a register
23823 and if the register being set is the register holding the address of the argument
23824 and if the address is computing by an addition
23825 that involves adding to a register
23826 which is the frame pointer
23827 a constant integer
23829 then... */
23831 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23833 if ( NONJUMP_INSN_P (insn)
23834 && GET_CODE (PATTERN (insn)) == SET
23835 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23836 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23837 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23838 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23839 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23842 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23844 break;
23848 if (value == 0)
23850 debug_rtx (addr);
23851 warning (0, "unable to compute real location of stacked parameter");
23852 value = 8; /* XXX magic hack */
23855 return value;
23858 /* Implement TARGET_PROMOTED_TYPE. */
23860 static tree
23861 arm_promoted_type (const_tree t)
23863 if (SCALAR_FLOAT_TYPE_P (t)
23864 && TYPE_PRECISION (t) == 16
23865 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23866 return float_type_node;
23867 return NULL_TREE;
23870 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23871 This simply adds HFmode as a supported mode; even though we don't
23872 implement arithmetic on this type directly, it's supported by
23873 optabs conversions, much the way the double-word arithmetic is
23874 special-cased in the default hook. */
23876 static bool
23877 arm_scalar_mode_supported_p (scalar_mode mode)
23879 if (mode == HFmode)
23880 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23881 else if (ALL_FIXED_POINT_MODE_P (mode))
23882 return true;
23883 else
23884 return default_scalar_mode_supported_p (mode);
23887 /* Set the value of FLT_EVAL_METHOD.
23888 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23890 0: evaluate all operations and constants, whose semantic type has at
23891 most the range and precision of type float, to the range and
23892 precision of float; evaluate all other operations and constants to
23893 the range and precision of the semantic type;
23895 N, where _FloatN is a supported interchange floating type
23896 evaluate all operations and constants, whose semantic type has at
23897 most the range and precision of _FloatN type, to the range and
23898 precision of the _FloatN type; evaluate all other operations and
23899 constants to the range and precision of the semantic type;
23901 If we have the ARMv8.2-A extensions then we support _Float16 in native
23902 precision, so we should set this to 16. Otherwise, we support the type,
23903 but want to evaluate expressions in float precision, so set this to
23904 0. */
23906 static enum flt_eval_method
23907 arm_excess_precision (enum excess_precision_type type)
23909 switch (type)
23911 case EXCESS_PRECISION_TYPE_FAST:
23912 case EXCESS_PRECISION_TYPE_STANDARD:
23913 /* We can calculate either in 16-bit range and precision or
23914 32-bit range and precision. Make that decision based on whether
23915 we have native support for the ARMv8.2-A 16-bit floating-point
23916 instructions or not. */
23917 return (TARGET_VFP_FP16INST
23918 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23919 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23920 case EXCESS_PRECISION_TYPE_IMPLICIT:
23921 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23922 default:
23923 gcc_unreachable ();
23925 return FLT_EVAL_METHOD_UNPREDICTABLE;
23929 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23930 _Float16 if we are using anything other than ieee format for 16-bit
23931 floating point. Otherwise, punt to the default implementation. */
23932 static opt_scalar_float_mode
23933 arm_floatn_mode (int n, bool extended)
23935 if (!extended && n == 16)
23937 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23938 return HFmode;
23939 return opt_scalar_float_mode ();
23942 return default_floatn_mode (n, extended);
23946 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23947 not to early-clobber SRC registers in the process.
23949 We assume that the operands described by SRC and DEST represent a
23950 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23951 number of components into which the copy has been decomposed. */
23952 void
23953 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23955 unsigned int i;
23957 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23958 || REGNO (operands[0]) < REGNO (operands[1]))
23960 for (i = 0; i < count; i++)
23962 operands[2 * i] = dest[i];
23963 operands[2 * i + 1] = src[i];
23966 else
23968 for (i = 0; i < count; i++)
23970 operands[2 * i] = dest[count - i - 1];
23971 operands[2 * i + 1] = src[count - i - 1];
23976 /* Split operands into moves from op[1] + op[2] into op[0]. */
23978 void
23979 neon_split_vcombine (rtx operands[3])
23981 unsigned int dest = REGNO (operands[0]);
23982 unsigned int src1 = REGNO (operands[1]);
23983 unsigned int src2 = REGNO (operands[2]);
23984 machine_mode halfmode = GET_MODE (operands[1]);
23985 unsigned int halfregs = REG_NREGS (operands[1]);
23986 rtx destlo, desthi;
23988 if (src1 == dest && src2 == dest + halfregs)
23990 /* No-op move. Can't split to nothing; emit something. */
23991 emit_note (NOTE_INSN_DELETED);
23992 return;
23995 /* Preserve register attributes for variable tracking. */
23996 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23997 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23998 GET_MODE_SIZE (halfmode));
24000 /* Special case of reversed high/low parts. Use VSWP. */
24001 if (src2 == dest && src1 == dest + halfregs)
24003 rtx x = gen_rtx_SET (destlo, operands[1]);
24004 rtx y = gen_rtx_SET (desthi, operands[2]);
24005 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24006 return;
24009 if (!reg_overlap_mentioned_p (operands[2], destlo))
24011 /* Try to avoid unnecessary moves if part of the result
24012 is in the right place already. */
24013 if (src1 != dest)
24014 emit_move_insn (destlo, operands[1]);
24015 if (src2 != dest + halfregs)
24016 emit_move_insn (desthi, operands[2]);
24018 else
24020 if (src2 != dest + halfregs)
24021 emit_move_insn (desthi, operands[2]);
24022 if (src1 != dest)
24023 emit_move_insn (destlo, operands[1]);
24027 /* Return the number (counting from 0) of
24028 the least significant set bit in MASK. */
24030 inline static int
24031 number_of_first_bit_set (unsigned mask)
24033 return ctz_hwi (mask);
24036 /* Like emit_multi_reg_push, but allowing for a different set of
24037 registers to be described as saved. MASK is the set of registers
24038 to be saved; REAL_REGS is the set of registers to be described as
24039 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24041 static rtx_insn *
24042 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24044 unsigned long regno;
24045 rtx par[10], tmp, reg;
24046 rtx_insn *insn;
24047 int i, j;
24049 /* Build the parallel of the registers actually being stored. */
24050 for (i = 0; mask; ++i, mask &= mask - 1)
24052 regno = ctz_hwi (mask);
24053 reg = gen_rtx_REG (SImode, regno);
24055 if (i == 0)
24056 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24057 else
24058 tmp = gen_rtx_USE (VOIDmode, reg);
24060 par[i] = tmp;
24063 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24064 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24065 tmp = gen_frame_mem (BLKmode, tmp);
24066 tmp = gen_rtx_SET (tmp, par[0]);
24067 par[0] = tmp;
24069 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24070 insn = emit_insn (tmp);
24072 /* Always build the stack adjustment note for unwind info. */
24073 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24074 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24075 par[0] = tmp;
24077 /* Build the parallel of the registers recorded as saved for unwind. */
24078 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24080 regno = ctz_hwi (real_regs);
24081 reg = gen_rtx_REG (SImode, regno);
24083 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24084 tmp = gen_frame_mem (SImode, tmp);
24085 tmp = gen_rtx_SET (tmp, reg);
24086 RTX_FRAME_RELATED_P (tmp) = 1;
24087 par[j + 1] = tmp;
24090 if (j == 0)
24091 tmp = par[0];
24092 else
24094 RTX_FRAME_RELATED_P (par[0]) = 1;
24095 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24098 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24100 return insn;
24103 /* Emit code to push or pop registers to or from the stack. F is the
24104 assembly file. MASK is the registers to pop. */
24105 static void
24106 thumb_pop (FILE *f, unsigned long mask)
24108 int regno;
24109 int lo_mask = mask & 0xFF;
24111 gcc_assert (mask);
24113 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24115 /* Special case. Do not generate a POP PC statement here, do it in
24116 thumb_exit() */
24117 thumb_exit (f, -1);
24118 return;
24121 fprintf (f, "\tpop\t{");
24123 /* Look at the low registers first. */
24124 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24126 if (lo_mask & 1)
24128 asm_fprintf (f, "%r", regno);
24130 if ((lo_mask & ~1) != 0)
24131 fprintf (f, ", ");
24135 if (mask & (1 << PC_REGNUM))
24137 /* Catch popping the PC. */
24138 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24139 || IS_CMSE_ENTRY (arm_current_func_type ()))
24141 /* The PC is never poped directly, instead
24142 it is popped into r3 and then BX is used. */
24143 fprintf (f, "}\n");
24145 thumb_exit (f, -1);
24147 return;
24149 else
24151 if (mask & 0xFF)
24152 fprintf (f, ", ");
24154 asm_fprintf (f, "%r", PC_REGNUM);
24158 fprintf (f, "}\n");
24161 /* Generate code to return from a thumb function.
24162 If 'reg_containing_return_addr' is -1, then the return address is
24163 actually on the stack, at the stack pointer.
24165 Note: do not forget to update length attribute of corresponding insn pattern
24166 when changing assembly output (eg. length attribute of epilogue_insns when
24167 updating Armv8-M Baseline Security Extensions register clearing
24168 sequences). */
24169 static void
24170 thumb_exit (FILE *f, int reg_containing_return_addr)
24172 unsigned regs_available_for_popping;
24173 unsigned regs_to_pop;
24174 int pops_needed;
24175 unsigned available;
24176 unsigned required;
24177 machine_mode mode;
24178 int size;
24179 int restore_a4 = FALSE;
24181 /* Compute the registers we need to pop. */
24182 regs_to_pop = 0;
24183 pops_needed = 0;
24185 if (reg_containing_return_addr == -1)
24187 regs_to_pop |= 1 << LR_REGNUM;
24188 ++pops_needed;
24191 if (TARGET_BACKTRACE)
24193 /* Restore the (ARM) frame pointer and stack pointer. */
24194 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24195 pops_needed += 2;
24198 /* If there is nothing to pop then just emit the BX instruction and
24199 return. */
24200 if (pops_needed == 0)
24202 if (crtl->calls_eh_return)
24203 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24205 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24207 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24208 reg_containing_return_addr);
24209 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24211 else
24212 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24213 return;
24215 /* Otherwise if we are not supporting interworking and we have not created
24216 a backtrace structure and the function was not entered in ARM mode then
24217 just pop the return address straight into the PC. */
24218 else if (!TARGET_INTERWORK
24219 && !TARGET_BACKTRACE
24220 && !is_called_in_ARM_mode (current_function_decl)
24221 && !crtl->calls_eh_return
24222 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24224 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24225 return;
24228 /* Find out how many of the (return) argument registers we can corrupt. */
24229 regs_available_for_popping = 0;
24231 /* If returning via __builtin_eh_return, the bottom three registers
24232 all contain information needed for the return. */
24233 if (crtl->calls_eh_return)
24234 size = 12;
24235 else
24237 /* If we can deduce the registers used from the function's
24238 return value. This is more reliable that examining
24239 df_regs_ever_live_p () because that will be set if the register is
24240 ever used in the function, not just if the register is used
24241 to hold a return value. */
24243 if (crtl->return_rtx != 0)
24244 mode = GET_MODE (crtl->return_rtx);
24245 else
24246 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24248 size = GET_MODE_SIZE (mode);
24250 if (size == 0)
24252 /* In a void function we can use any argument register.
24253 In a function that returns a structure on the stack
24254 we can use the second and third argument registers. */
24255 if (mode == VOIDmode)
24256 regs_available_for_popping =
24257 (1 << ARG_REGISTER (1))
24258 | (1 << ARG_REGISTER (2))
24259 | (1 << ARG_REGISTER (3));
24260 else
24261 regs_available_for_popping =
24262 (1 << ARG_REGISTER (2))
24263 | (1 << ARG_REGISTER (3));
24265 else if (size <= 4)
24266 regs_available_for_popping =
24267 (1 << ARG_REGISTER (2))
24268 | (1 << ARG_REGISTER (3));
24269 else if (size <= 8)
24270 regs_available_for_popping =
24271 (1 << ARG_REGISTER (3));
24274 /* Match registers to be popped with registers into which we pop them. */
24275 for (available = regs_available_for_popping,
24276 required = regs_to_pop;
24277 required != 0 && available != 0;
24278 available &= ~(available & - available),
24279 required &= ~(required & - required))
24280 -- pops_needed;
24282 /* If we have any popping registers left over, remove them. */
24283 if (available > 0)
24284 regs_available_for_popping &= ~available;
24286 /* Otherwise if we need another popping register we can use
24287 the fourth argument register. */
24288 else if (pops_needed)
24290 /* If we have not found any free argument registers and
24291 reg a4 contains the return address, we must move it. */
24292 if (regs_available_for_popping == 0
24293 && reg_containing_return_addr == LAST_ARG_REGNUM)
24295 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24296 reg_containing_return_addr = LR_REGNUM;
24298 else if (size > 12)
24300 /* Register a4 is being used to hold part of the return value,
24301 but we have dire need of a free, low register. */
24302 restore_a4 = TRUE;
24304 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24307 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24309 /* The fourth argument register is available. */
24310 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24312 --pops_needed;
24316 /* Pop as many registers as we can. */
24317 thumb_pop (f, regs_available_for_popping);
24319 /* Process the registers we popped. */
24320 if (reg_containing_return_addr == -1)
24322 /* The return address was popped into the lowest numbered register. */
24323 regs_to_pop &= ~(1 << LR_REGNUM);
24325 reg_containing_return_addr =
24326 number_of_first_bit_set (regs_available_for_popping);
24328 /* Remove this register for the mask of available registers, so that
24329 the return address will not be corrupted by further pops. */
24330 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24333 /* If we popped other registers then handle them here. */
24334 if (regs_available_for_popping)
24336 int frame_pointer;
24338 /* Work out which register currently contains the frame pointer. */
24339 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24341 /* Move it into the correct place. */
24342 asm_fprintf (f, "\tmov\t%r, %r\n",
24343 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24345 /* (Temporarily) remove it from the mask of popped registers. */
24346 regs_available_for_popping &= ~(1 << frame_pointer);
24347 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24349 if (regs_available_for_popping)
24351 int stack_pointer;
24353 /* We popped the stack pointer as well,
24354 find the register that contains it. */
24355 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24357 /* Move it into the stack register. */
24358 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24360 /* At this point we have popped all necessary registers, so
24361 do not worry about restoring regs_available_for_popping
24362 to its correct value:
24364 assert (pops_needed == 0)
24365 assert (regs_available_for_popping == (1 << frame_pointer))
24366 assert (regs_to_pop == (1 << STACK_POINTER)) */
24368 else
24370 /* Since we have just move the popped value into the frame
24371 pointer, the popping register is available for reuse, and
24372 we know that we still have the stack pointer left to pop. */
24373 regs_available_for_popping |= (1 << frame_pointer);
24377 /* If we still have registers left on the stack, but we no longer have
24378 any registers into which we can pop them, then we must move the return
24379 address into the link register and make available the register that
24380 contained it. */
24381 if (regs_available_for_popping == 0 && pops_needed > 0)
24383 regs_available_for_popping |= 1 << reg_containing_return_addr;
24385 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24386 reg_containing_return_addr);
24388 reg_containing_return_addr = LR_REGNUM;
24391 /* If we have registers left on the stack then pop some more.
24392 We know that at most we will want to pop FP and SP. */
24393 if (pops_needed > 0)
24395 int popped_into;
24396 int move_to;
24398 thumb_pop (f, regs_available_for_popping);
24400 /* We have popped either FP or SP.
24401 Move whichever one it is into the correct register. */
24402 popped_into = number_of_first_bit_set (regs_available_for_popping);
24403 move_to = number_of_first_bit_set (regs_to_pop);
24405 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24406 --pops_needed;
24409 /* If we still have not popped everything then we must have only
24410 had one register available to us and we are now popping the SP. */
24411 if (pops_needed > 0)
24413 int popped_into;
24415 thumb_pop (f, regs_available_for_popping);
24417 popped_into = number_of_first_bit_set (regs_available_for_popping);
24419 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24421 assert (regs_to_pop == (1 << STACK_POINTER))
24422 assert (pops_needed == 1)
24426 /* If necessary restore the a4 register. */
24427 if (restore_a4)
24429 if (reg_containing_return_addr != LR_REGNUM)
24431 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24432 reg_containing_return_addr = LR_REGNUM;
24435 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24438 if (crtl->calls_eh_return)
24439 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24441 /* Return to caller. */
24442 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24444 /* This is for the cases where LR is not being used to contain the return
24445 address. It may therefore contain information that we might not want
24446 to leak, hence it must be cleared. The value in R0 will never be a
24447 secret at this point, so it is safe to use it, see the clearing code
24448 in 'cmse_nonsecure_entry_clear_before_return'. */
24449 if (reg_containing_return_addr != LR_REGNUM)
24450 asm_fprintf (f, "\tmov\tlr, r0\n");
24452 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24453 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24455 else
24456 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24459 /* Scan INSN just before assembler is output for it.
24460 For Thumb-1, we track the status of the condition codes; this
24461 information is used in the cbranchsi4_insn pattern. */
24462 void
24463 thumb1_final_prescan_insn (rtx_insn *insn)
24465 if (flag_print_asm_name)
24466 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24467 INSN_ADDRESSES (INSN_UID (insn)));
24468 /* Don't overwrite the previous setter when we get to a cbranch. */
24469 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24471 enum attr_conds conds;
24473 if (cfun->machine->thumb1_cc_insn)
24475 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24476 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24477 CC_STATUS_INIT;
24479 conds = get_attr_conds (insn);
24480 if (conds == CONDS_SET)
24482 rtx set = single_set (insn);
24483 cfun->machine->thumb1_cc_insn = insn;
24484 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24485 cfun->machine->thumb1_cc_op1 = const0_rtx;
24486 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24487 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24489 rtx src1 = XEXP (SET_SRC (set), 1);
24490 if (src1 == const0_rtx)
24491 cfun->machine->thumb1_cc_mode = CCmode;
24493 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24495 /* Record the src register operand instead of dest because
24496 cprop_hardreg pass propagates src. */
24497 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24500 else if (conds != CONDS_NOCOND)
24501 cfun->machine->thumb1_cc_insn = NULL_RTX;
24504 /* Check if unexpected far jump is used. */
24505 if (cfun->machine->lr_save_eliminated
24506 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24507 internal_error("Unexpected thumb1 far jump");
24511 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24513 unsigned HOST_WIDE_INT mask = 0xff;
24514 int i;
24516 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24517 if (val == 0) /* XXX */
24518 return 0;
24520 for (i = 0; i < 25; i++)
24521 if ((val & (mask << i)) == val)
24522 return 1;
24524 return 0;
24527 /* Returns nonzero if the current function contains,
24528 or might contain a far jump. */
24529 static int
24530 thumb_far_jump_used_p (void)
24532 rtx_insn *insn;
24533 bool far_jump = false;
24534 unsigned int func_size = 0;
24536 /* If we have already decided that far jumps may be used,
24537 do not bother checking again, and always return true even if
24538 it turns out that they are not being used. Once we have made
24539 the decision that far jumps are present (and that hence the link
24540 register will be pushed onto the stack) we cannot go back on it. */
24541 if (cfun->machine->far_jump_used)
24542 return 1;
24544 /* If this function is not being called from the prologue/epilogue
24545 generation code then it must be being called from the
24546 INITIAL_ELIMINATION_OFFSET macro. */
24547 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24549 /* In this case we know that we are being asked about the elimination
24550 of the arg pointer register. If that register is not being used,
24551 then there are no arguments on the stack, and we do not have to
24552 worry that a far jump might force the prologue to push the link
24553 register, changing the stack offsets. In this case we can just
24554 return false, since the presence of far jumps in the function will
24555 not affect stack offsets.
24557 If the arg pointer is live (or if it was live, but has now been
24558 eliminated and so set to dead) then we do have to test to see if
24559 the function might contain a far jump. This test can lead to some
24560 false negatives, since before reload is completed, then length of
24561 branch instructions is not known, so gcc defaults to returning their
24562 longest length, which in turn sets the far jump attribute to true.
24564 A false negative will not result in bad code being generated, but it
24565 will result in a needless push and pop of the link register. We
24566 hope that this does not occur too often.
24568 If we need doubleword stack alignment this could affect the other
24569 elimination offsets so we can't risk getting it wrong. */
24570 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24571 cfun->machine->arg_pointer_live = 1;
24572 else if (!cfun->machine->arg_pointer_live)
24573 return 0;
24576 /* We should not change far_jump_used during or after reload, as there is
24577 no chance to change stack frame layout. */
24578 if (reload_in_progress || reload_completed)
24579 return 0;
24581 /* Check to see if the function contains a branch
24582 insn with the far jump attribute set. */
24583 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24585 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24587 far_jump = true;
24589 func_size += get_attr_length (insn);
24592 /* Attribute far_jump will always be true for thumb1 before
24593 shorten_branch pass. So checking far_jump attribute before
24594 shorten_branch isn't much useful.
24596 Following heuristic tries to estimate more accurately if a far jump
24597 may finally be used. The heuristic is very conservative as there is
24598 no chance to roll-back the decision of not to use far jump.
24600 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24601 2-byte insn is associated with a 4 byte constant pool. Using
24602 function size 2048/3 as the threshold is conservative enough. */
24603 if (far_jump)
24605 if ((func_size * 3) >= 2048)
24607 /* Record the fact that we have decided that
24608 the function does use far jumps. */
24609 cfun->machine->far_jump_used = 1;
24610 return 1;
24614 return 0;
24617 /* Return nonzero if FUNC must be entered in ARM mode. */
24618 static bool
24619 is_called_in_ARM_mode (tree func)
24621 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24623 /* Ignore the problem about functions whose address is taken. */
24624 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24625 return true;
24627 #ifdef ARM_PE
24628 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24629 #else
24630 return false;
24631 #endif
24634 /* Given the stack offsets and register mask in OFFSETS, decide how
24635 many additional registers to push instead of subtracting a constant
24636 from SP. For epilogues the principle is the same except we use pop.
24637 FOR_PROLOGUE indicates which we're generating. */
24638 static int
24639 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24641 HOST_WIDE_INT amount;
24642 unsigned long live_regs_mask = offsets->saved_regs_mask;
24643 /* Extract a mask of the ones we can give to the Thumb's push/pop
24644 instruction. */
24645 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24646 /* Then count how many other high registers will need to be pushed. */
24647 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24648 int n_free, reg_base, size;
24650 if (!for_prologue && frame_pointer_needed)
24651 amount = offsets->locals_base - offsets->saved_regs;
24652 else
24653 amount = offsets->outgoing_args - offsets->saved_regs;
24655 /* If the stack frame size is 512 exactly, we can save one load
24656 instruction, which should make this a win even when optimizing
24657 for speed. */
24658 if (!optimize_size && amount != 512)
24659 return 0;
24661 /* Can't do this if there are high registers to push. */
24662 if (high_regs_pushed != 0)
24663 return 0;
24665 /* Shouldn't do it in the prologue if no registers would normally
24666 be pushed at all. In the epilogue, also allow it if we'll have
24667 a pop insn for the PC. */
24668 if (l_mask == 0
24669 && (for_prologue
24670 || TARGET_BACKTRACE
24671 || (live_regs_mask & 1 << LR_REGNUM) == 0
24672 || TARGET_INTERWORK
24673 || crtl->args.pretend_args_size != 0))
24674 return 0;
24676 /* Don't do this if thumb_expand_prologue wants to emit instructions
24677 between the push and the stack frame allocation. */
24678 if (for_prologue
24679 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24680 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24681 return 0;
24683 reg_base = 0;
24684 n_free = 0;
24685 if (!for_prologue)
24687 size = arm_size_return_regs ();
24688 reg_base = ARM_NUM_INTS (size);
24689 live_regs_mask >>= reg_base;
24692 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24693 && (for_prologue || call_used_regs[reg_base + n_free]))
24695 live_regs_mask >>= 1;
24696 n_free++;
24699 if (n_free == 0)
24700 return 0;
24701 gcc_assert (amount / 4 * 4 == amount);
24703 if (amount >= 512 && (amount - n_free * 4) < 512)
24704 return (amount - 508) / 4;
24705 if (amount <= n_free * 4)
24706 return amount / 4;
24707 return 0;
24710 /* The bits which aren't usefully expanded as rtl. */
24711 const char *
24712 thumb1_unexpanded_epilogue (void)
24714 arm_stack_offsets *offsets;
24715 int regno;
24716 unsigned long live_regs_mask = 0;
24717 int high_regs_pushed = 0;
24718 int extra_pop;
24719 int had_to_push_lr;
24720 int size;
24722 if (cfun->machine->return_used_this_function != 0)
24723 return "";
24725 if (IS_NAKED (arm_current_func_type ()))
24726 return "";
24728 offsets = arm_get_frame_offsets ();
24729 live_regs_mask = offsets->saved_regs_mask;
24730 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24732 /* If we can deduce the registers used from the function's return value.
24733 This is more reliable that examining df_regs_ever_live_p () because that
24734 will be set if the register is ever used in the function, not just if
24735 the register is used to hold a return value. */
24736 size = arm_size_return_regs ();
24738 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24739 if (extra_pop > 0)
24741 unsigned long extra_mask = (1 << extra_pop) - 1;
24742 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24745 /* The prolog may have pushed some high registers to use as
24746 work registers. e.g. the testsuite file:
24747 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24748 compiles to produce:
24749 push {r4, r5, r6, r7, lr}
24750 mov r7, r9
24751 mov r6, r8
24752 push {r6, r7}
24753 as part of the prolog. We have to undo that pushing here. */
24755 if (high_regs_pushed)
24757 unsigned long mask = live_regs_mask & 0xff;
24758 int next_hi_reg;
24760 /* The available low registers depend on the size of the value we are
24761 returning. */
24762 if (size <= 12)
24763 mask |= 1 << 3;
24764 if (size <= 8)
24765 mask |= 1 << 2;
24767 if (mask == 0)
24768 /* Oh dear! We have no low registers into which we can pop
24769 high registers! */
24770 internal_error
24771 ("no low registers available for popping high registers");
24773 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24774 if (live_regs_mask & (1 << next_hi_reg))
24775 break;
24777 while (high_regs_pushed)
24779 /* Find lo register(s) into which the high register(s) can
24780 be popped. */
24781 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24783 if (mask & (1 << regno))
24784 high_regs_pushed--;
24785 if (high_regs_pushed == 0)
24786 break;
24789 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24791 /* Pop the values into the low register(s). */
24792 thumb_pop (asm_out_file, mask);
24794 /* Move the value(s) into the high registers. */
24795 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24797 if (mask & (1 << regno))
24799 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24800 regno);
24802 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24803 if (live_regs_mask & (1 << next_hi_reg))
24804 break;
24808 live_regs_mask &= ~0x0f00;
24811 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24812 live_regs_mask &= 0xff;
24814 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24816 /* Pop the return address into the PC. */
24817 if (had_to_push_lr)
24818 live_regs_mask |= 1 << PC_REGNUM;
24820 /* Either no argument registers were pushed or a backtrace
24821 structure was created which includes an adjusted stack
24822 pointer, so just pop everything. */
24823 if (live_regs_mask)
24824 thumb_pop (asm_out_file, live_regs_mask);
24826 /* We have either just popped the return address into the
24827 PC or it is was kept in LR for the entire function.
24828 Note that thumb_pop has already called thumb_exit if the
24829 PC was in the list. */
24830 if (!had_to_push_lr)
24831 thumb_exit (asm_out_file, LR_REGNUM);
24833 else
24835 /* Pop everything but the return address. */
24836 if (live_regs_mask)
24837 thumb_pop (asm_out_file, live_regs_mask);
24839 if (had_to_push_lr)
24841 if (size > 12)
24843 /* We have no free low regs, so save one. */
24844 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24845 LAST_ARG_REGNUM);
24848 /* Get the return address into a temporary register. */
24849 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24851 if (size > 12)
24853 /* Move the return address to lr. */
24854 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24855 LAST_ARG_REGNUM);
24856 /* Restore the low register. */
24857 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24858 IP_REGNUM);
24859 regno = LR_REGNUM;
24861 else
24862 regno = LAST_ARG_REGNUM;
24864 else
24865 regno = LR_REGNUM;
24867 /* Remove the argument registers that were pushed onto the stack. */
24868 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24869 SP_REGNUM, SP_REGNUM,
24870 crtl->args.pretend_args_size);
24872 thumb_exit (asm_out_file, regno);
24875 return "";
24878 /* Functions to save and restore machine-specific function data. */
24879 static struct machine_function *
24880 arm_init_machine_status (void)
24882 struct machine_function *machine;
24883 machine = ggc_cleared_alloc<machine_function> ();
24885 #if ARM_FT_UNKNOWN != 0
24886 machine->func_type = ARM_FT_UNKNOWN;
24887 #endif
24888 machine->static_chain_stack_bytes = -1;
24889 return machine;
24892 /* Return an RTX indicating where the return address to the
24893 calling function can be found. */
24895 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24897 if (count != 0)
24898 return NULL_RTX;
24900 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24903 /* Do anything needed before RTL is emitted for each function. */
24904 void
24905 arm_init_expanders (void)
24907 /* Arrange to initialize and mark the machine per-function status. */
24908 init_machine_status = arm_init_machine_status;
24910 /* This is to stop the combine pass optimizing away the alignment
24911 adjustment of va_arg. */
24912 /* ??? It is claimed that this should not be necessary. */
24913 if (cfun)
24914 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24917 /* Check that FUNC is called with a different mode. */
24919 bool
24920 arm_change_mode_p (tree func)
24922 if (TREE_CODE (func) != FUNCTION_DECL)
24923 return false;
24925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24927 if (!callee_tree)
24928 callee_tree = target_option_default_node;
24930 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24931 int flags = callee_opts->x_target_flags;
24933 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24936 /* Like arm_compute_initial_elimination offset. Simpler because there
24937 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24938 to point at the base of the local variables after static stack
24939 space for a function has been allocated. */
24941 HOST_WIDE_INT
24942 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24944 arm_stack_offsets *offsets;
24946 offsets = arm_get_frame_offsets ();
24948 switch (from)
24950 case ARG_POINTER_REGNUM:
24951 switch (to)
24953 case STACK_POINTER_REGNUM:
24954 return offsets->outgoing_args - offsets->saved_args;
24956 case FRAME_POINTER_REGNUM:
24957 return offsets->soft_frame - offsets->saved_args;
24959 case ARM_HARD_FRAME_POINTER_REGNUM:
24960 return offsets->saved_regs - offsets->saved_args;
24962 case THUMB_HARD_FRAME_POINTER_REGNUM:
24963 return offsets->locals_base - offsets->saved_args;
24965 default:
24966 gcc_unreachable ();
24968 break;
24970 case FRAME_POINTER_REGNUM:
24971 switch (to)
24973 case STACK_POINTER_REGNUM:
24974 return offsets->outgoing_args - offsets->soft_frame;
24976 case ARM_HARD_FRAME_POINTER_REGNUM:
24977 return offsets->saved_regs - offsets->soft_frame;
24979 case THUMB_HARD_FRAME_POINTER_REGNUM:
24980 return offsets->locals_base - offsets->soft_frame;
24982 default:
24983 gcc_unreachable ();
24985 break;
24987 default:
24988 gcc_unreachable ();
24992 /* Generate the function's prologue. */
24994 void
24995 thumb1_expand_prologue (void)
24997 rtx_insn *insn;
24999 HOST_WIDE_INT amount;
25000 HOST_WIDE_INT size;
25001 arm_stack_offsets *offsets;
25002 unsigned long func_type;
25003 int regno;
25004 unsigned long live_regs_mask;
25005 unsigned long l_mask;
25006 unsigned high_regs_pushed = 0;
25007 bool lr_needs_saving;
25009 func_type = arm_current_func_type ();
25011 /* Naked functions don't have prologues. */
25012 if (IS_NAKED (func_type))
25014 if (flag_stack_usage_info)
25015 current_function_static_stack_size = 0;
25016 return;
25019 if (IS_INTERRUPT (func_type))
25021 error ("interrupt Service Routines cannot be coded in Thumb mode");
25022 return;
25025 if (is_called_in_ARM_mode (current_function_decl))
25026 emit_insn (gen_prologue_thumb1_interwork ());
25028 offsets = arm_get_frame_offsets ();
25029 live_regs_mask = offsets->saved_regs_mask;
25030 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25032 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25033 l_mask = live_regs_mask & 0x40ff;
25034 /* Then count how many other high registers will need to be pushed. */
25035 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25037 if (crtl->args.pretend_args_size)
25039 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25041 if (cfun->machine->uses_anonymous_args)
25043 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25044 unsigned long mask;
25046 mask = 1ul << (LAST_ARG_REGNUM + 1);
25047 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25049 insn = thumb1_emit_multi_reg_push (mask, 0);
25051 else
25053 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25054 stack_pointer_rtx, x));
25056 RTX_FRAME_RELATED_P (insn) = 1;
25059 if (TARGET_BACKTRACE)
25061 HOST_WIDE_INT offset = 0;
25062 unsigned work_register;
25063 rtx work_reg, x, arm_hfp_rtx;
25065 /* We have been asked to create a stack backtrace structure.
25066 The code looks like this:
25068 0 .align 2
25069 0 func:
25070 0 sub SP, #16 Reserve space for 4 registers.
25071 2 push {R7} Push low registers.
25072 4 add R7, SP, #20 Get the stack pointer before the push.
25073 6 str R7, [SP, #8] Store the stack pointer
25074 (before reserving the space).
25075 8 mov R7, PC Get hold of the start of this code + 12.
25076 10 str R7, [SP, #16] Store it.
25077 12 mov R7, FP Get hold of the current frame pointer.
25078 14 str R7, [SP, #4] Store it.
25079 16 mov R7, LR Get hold of the current return address.
25080 18 str R7, [SP, #12] Store it.
25081 20 add R7, SP, #16 Point at the start of the
25082 backtrace structure.
25083 22 mov FP, R7 Put this value into the frame pointer. */
25085 work_register = thumb_find_work_register (live_regs_mask);
25086 work_reg = gen_rtx_REG (SImode, work_register);
25087 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25089 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25090 stack_pointer_rtx, GEN_INT (-16)));
25091 RTX_FRAME_RELATED_P (insn) = 1;
25093 if (l_mask)
25095 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25096 RTX_FRAME_RELATED_P (insn) = 1;
25097 lr_needs_saving = false;
25099 offset = bit_count (l_mask) * UNITS_PER_WORD;
25102 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25103 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25105 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25106 x = gen_frame_mem (SImode, x);
25107 emit_move_insn (x, work_reg);
25109 /* Make sure that the instruction fetching the PC is in the right place
25110 to calculate "start of backtrace creation code + 12". */
25111 /* ??? The stores using the common WORK_REG ought to be enough to
25112 prevent the scheduler from doing anything weird. Failing that
25113 we could always move all of the following into an UNSPEC_VOLATILE. */
25114 if (l_mask)
25116 x = gen_rtx_REG (SImode, PC_REGNUM);
25117 emit_move_insn (work_reg, x);
25119 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25120 x = gen_frame_mem (SImode, x);
25121 emit_move_insn (x, work_reg);
25123 emit_move_insn (work_reg, arm_hfp_rtx);
25125 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25126 x = gen_frame_mem (SImode, x);
25127 emit_move_insn (x, work_reg);
25129 else
25131 emit_move_insn (work_reg, arm_hfp_rtx);
25133 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25134 x = gen_frame_mem (SImode, x);
25135 emit_move_insn (x, work_reg);
25137 x = gen_rtx_REG (SImode, PC_REGNUM);
25138 emit_move_insn (work_reg, x);
25140 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25141 x = gen_frame_mem (SImode, x);
25142 emit_move_insn (x, work_reg);
25145 x = gen_rtx_REG (SImode, LR_REGNUM);
25146 emit_move_insn (work_reg, x);
25148 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25149 x = gen_frame_mem (SImode, x);
25150 emit_move_insn (x, work_reg);
25152 x = GEN_INT (offset + 12);
25153 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25155 emit_move_insn (arm_hfp_rtx, work_reg);
25157 /* Optimization: If we are not pushing any low registers but we are going
25158 to push some high registers then delay our first push. This will just
25159 be a push of LR and we can combine it with the push of the first high
25160 register. */
25161 else if ((l_mask & 0xff) != 0
25162 || (high_regs_pushed == 0 && lr_needs_saving))
25164 unsigned long mask = l_mask;
25165 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25166 insn = thumb1_emit_multi_reg_push (mask, mask);
25167 RTX_FRAME_RELATED_P (insn) = 1;
25168 lr_needs_saving = false;
25171 if (high_regs_pushed)
25173 unsigned pushable_regs;
25174 unsigned next_hi_reg;
25175 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25176 : crtl->args.info.nregs;
25177 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25179 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25180 if (live_regs_mask & (1 << next_hi_reg))
25181 break;
25183 /* Here we need to mask out registers used for passing arguments
25184 even if they can be pushed. This is to avoid using them to stash the high
25185 registers. Such kind of stash may clobber the use of arguments. */
25186 pushable_regs = l_mask & (~arg_regs_mask);
25187 if (lr_needs_saving)
25188 pushable_regs &= ~(1 << LR_REGNUM);
25190 if (pushable_regs == 0)
25191 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25193 while (high_regs_pushed > 0)
25195 unsigned long real_regs_mask = 0;
25196 unsigned long push_mask = 0;
25198 for (regno = LR_REGNUM; regno >= 0; regno --)
25200 if (pushable_regs & (1 << regno))
25202 emit_move_insn (gen_rtx_REG (SImode, regno),
25203 gen_rtx_REG (SImode, next_hi_reg));
25205 high_regs_pushed --;
25206 real_regs_mask |= (1 << next_hi_reg);
25207 push_mask |= (1 << regno);
25209 if (high_regs_pushed)
25211 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25212 next_hi_reg --)
25213 if (live_regs_mask & (1 << next_hi_reg))
25214 break;
25216 else
25217 break;
25221 /* If we had to find a work register and we have not yet
25222 saved the LR then add it to the list of regs to push. */
25223 if (lr_needs_saving)
25225 push_mask |= 1 << LR_REGNUM;
25226 real_regs_mask |= 1 << LR_REGNUM;
25227 lr_needs_saving = false;
25230 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25231 RTX_FRAME_RELATED_P (insn) = 1;
25235 /* Load the pic register before setting the frame pointer,
25236 so we can use r7 as a temporary work register. */
25237 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25238 arm_load_pic_register (live_regs_mask);
25240 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25241 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25242 stack_pointer_rtx);
25244 size = offsets->outgoing_args - offsets->saved_args;
25245 if (flag_stack_usage_info)
25246 current_function_static_stack_size = size;
25248 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25249 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25250 || flag_stack_clash_protection)
25251 && size)
25252 sorry ("-fstack-check=specific for Thumb-1");
25254 amount = offsets->outgoing_args - offsets->saved_regs;
25255 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25256 if (amount)
25258 if (amount < 512)
25260 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25261 GEN_INT (- amount)));
25262 RTX_FRAME_RELATED_P (insn) = 1;
25264 else
25266 rtx reg, dwarf;
25268 /* The stack decrement is too big for an immediate value in a single
25269 insn. In theory we could issue multiple subtracts, but after
25270 three of them it becomes more space efficient to place the full
25271 value in the constant pool and load into a register. (Also the
25272 ARM debugger really likes to see only one stack decrement per
25273 function). So instead we look for a scratch register into which
25274 we can load the decrement, and then we subtract this from the
25275 stack pointer. Unfortunately on the thumb the only available
25276 scratch registers are the argument registers, and we cannot use
25277 these as they may hold arguments to the function. Instead we
25278 attempt to locate a call preserved register which is used by this
25279 function. If we can find one, then we know that it will have
25280 been pushed at the start of the prologue and so we can corrupt
25281 it now. */
25282 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25283 if (live_regs_mask & (1 << regno))
25284 break;
25286 gcc_assert(regno <= LAST_LO_REGNUM);
25288 reg = gen_rtx_REG (SImode, regno);
25290 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25292 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25293 stack_pointer_rtx, reg));
25295 dwarf = gen_rtx_SET (stack_pointer_rtx,
25296 plus_constant (Pmode, stack_pointer_rtx,
25297 -amount));
25298 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25299 RTX_FRAME_RELATED_P (insn) = 1;
25303 if (frame_pointer_needed)
25304 thumb_set_frame_pointer (offsets);
25306 /* If we are profiling, make sure no instructions are scheduled before
25307 the call to mcount. Similarly if the user has requested no
25308 scheduling in the prolog. Similarly if we want non-call exceptions
25309 using the EABI unwinder, to prevent faulting instructions from being
25310 swapped with a stack adjustment. */
25311 if (crtl->profile || !TARGET_SCHED_PROLOG
25312 || (arm_except_unwind_info (&global_options) == UI_TARGET
25313 && cfun->can_throw_non_call_exceptions))
25314 emit_insn (gen_blockage ());
25316 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25317 if (live_regs_mask & 0xff)
25318 cfun->machine->lr_save_eliminated = 0;
25321 /* Clear caller saved registers not used to pass return values and leaked
25322 condition flags before exiting a cmse_nonsecure_entry function. */
25324 void
25325 cmse_nonsecure_entry_clear_before_return (void)
25327 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25328 uint32_t padding_bits_to_clear = 0;
25329 auto_sbitmap to_clear_bitmap (maxregno + 1);
25330 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25331 tree result_type;
25333 bitmap_clear (to_clear_bitmap);
25334 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25335 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25337 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25338 registers. */
25339 if (TARGET_HARD_FLOAT)
25341 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25343 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25345 /* Make sure we don't clear the two scratch registers used to clear the
25346 relevant FPSCR bits in output_return_instruction. */
25347 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25348 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25349 emit_use (gen_rtx_REG (SImode, 4));
25350 bitmap_clear_bit (to_clear_bitmap, 4);
25353 /* If the user has defined registers to be caller saved, these are no longer
25354 restored by the function before returning and must thus be cleared for
25355 security purposes. */
25356 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25358 /* We do not touch registers that can be used to pass arguments as per
25359 the AAPCS, since these should never be made callee-saved by user
25360 options. */
25361 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25362 continue;
25363 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25364 continue;
25365 if (call_used_regs[regno])
25366 bitmap_set_bit (to_clear_bitmap, regno);
25369 /* Make sure we do not clear the registers used to return the result in. */
25370 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25371 if (!VOID_TYPE_P (result_type))
25373 uint64_t to_clear_return_mask;
25374 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25376 /* No need to check that we return in registers, because we don't
25377 support returning on stack yet. */
25378 gcc_assert (REG_P (result_rtl));
25379 to_clear_return_mask
25380 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25381 &padding_bits_to_clear);
25382 if (to_clear_return_mask)
25384 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25385 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25387 if (to_clear_return_mask & (1ULL << regno))
25388 bitmap_clear_bit (to_clear_bitmap, regno);
25393 if (padding_bits_to_clear != 0)
25395 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25396 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25398 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25399 returning a composite type, which only uses r0. Let's make sure that
25400 r1-r3 is cleared too. */
25401 bitmap_clear (to_clear_arg_regs_bitmap);
25402 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25403 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25406 /* Clear full registers that leak before returning. */
25407 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25408 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25409 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25410 clearing_reg);
25413 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25414 POP instruction can be generated. LR should be replaced by PC. All
25415 the checks required are already done by USE_RETURN_INSN (). Hence,
25416 all we really need to check here is if single register is to be
25417 returned, or multiple register return. */
25418 void
25419 thumb2_expand_return (bool simple_return)
25421 int i, num_regs;
25422 unsigned long saved_regs_mask;
25423 arm_stack_offsets *offsets;
25425 offsets = arm_get_frame_offsets ();
25426 saved_regs_mask = offsets->saved_regs_mask;
25428 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25429 if (saved_regs_mask & (1 << i))
25430 num_regs++;
25432 if (!simple_return && saved_regs_mask)
25434 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25435 functions or adapt code to handle according to ACLE. This path should
25436 not be reachable for cmse_nonsecure_entry functions though we prefer
25437 to assert it for now to ensure that future code changes do not silently
25438 change this behavior. */
25439 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25440 if (num_regs == 1)
25442 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25443 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25444 rtx addr = gen_rtx_MEM (SImode,
25445 gen_rtx_POST_INC (SImode,
25446 stack_pointer_rtx));
25447 set_mem_alias_set (addr, get_frame_alias_set ());
25448 XVECEXP (par, 0, 0) = ret_rtx;
25449 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25450 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25451 emit_jump_insn (par);
25453 else
25455 saved_regs_mask &= ~ (1 << LR_REGNUM);
25456 saved_regs_mask |= (1 << PC_REGNUM);
25457 arm_emit_multi_reg_pop (saved_regs_mask);
25460 else
25462 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25463 cmse_nonsecure_entry_clear_before_return ();
25464 emit_jump_insn (simple_return_rtx);
25468 void
25469 thumb1_expand_epilogue (void)
25471 HOST_WIDE_INT amount;
25472 arm_stack_offsets *offsets;
25473 int regno;
25475 /* Naked functions don't have prologues. */
25476 if (IS_NAKED (arm_current_func_type ()))
25477 return;
25479 offsets = arm_get_frame_offsets ();
25480 amount = offsets->outgoing_args - offsets->saved_regs;
25482 if (frame_pointer_needed)
25484 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25485 amount = offsets->locals_base - offsets->saved_regs;
25487 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25489 gcc_assert (amount >= 0);
25490 if (amount)
25492 emit_insn (gen_blockage ());
25494 if (amount < 512)
25495 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25496 GEN_INT (amount)));
25497 else
25499 /* r3 is always free in the epilogue. */
25500 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25502 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25503 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25507 /* Emit a USE (stack_pointer_rtx), so that
25508 the stack adjustment will not be deleted. */
25509 emit_insn (gen_force_register_use (stack_pointer_rtx));
25511 if (crtl->profile || !TARGET_SCHED_PROLOG)
25512 emit_insn (gen_blockage ());
25514 /* Emit a clobber for each insn that will be restored in the epilogue,
25515 so that flow2 will get register lifetimes correct. */
25516 for (regno = 0; regno < 13; regno++)
25517 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25518 emit_clobber (gen_rtx_REG (SImode, regno));
25520 if (! df_regs_ever_live_p (LR_REGNUM))
25521 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25523 /* Clear all caller-saved regs that are not used to return. */
25524 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25525 cmse_nonsecure_entry_clear_before_return ();
25528 /* Epilogue code for APCS frame. */
25529 static void
25530 arm_expand_epilogue_apcs_frame (bool really_return)
25532 unsigned long func_type;
25533 unsigned long saved_regs_mask;
25534 int num_regs = 0;
25535 int i;
25536 int floats_from_frame = 0;
25537 arm_stack_offsets *offsets;
25539 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25540 func_type = arm_current_func_type ();
25542 /* Get frame offsets for ARM. */
25543 offsets = arm_get_frame_offsets ();
25544 saved_regs_mask = offsets->saved_regs_mask;
25546 /* Find the offset of the floating-point save area in the frame. */
25547 floats_from_frame
25548 = (offsets->saved_args
25549 + arm_compute_static_chain_stack_bytes ()
25550 - offsets->frame);
25552 /* Compute how many core registers saved and how far away the floats are. */
25553 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25554 if (saved_regs_mask & (1 << i))
25556 num_regs++;
25557 floats_from_frame += 4;
25560 if (TARGET_HARD_FLOAT)
25562 int start_reg;
25563 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25565 /* The offset is from IP_REGNUM. */
25566 int saved_size = arm_get_vfp_saved_size ();
25567 if (saved_size > 0)
25569 rtx_insn *insn;
25570 floats_from_frame += saved_size;
25571 insn = emit_insn (gen_addsi3 (ip_rtx,
25572 hard_frame_pointer_rtx,
25573 GEN_INT (-floats_from_frame)));
25574 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25575 ip_rtx, hard_frame_pointer_rtx);
25578 /* Generate VFP register multi-pop. */
25579 start_reg = FIRST_VFP_REGNUM;
25581 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25582 /* Look for a case where a reg does not need restoring. */
25583 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25584 && (!df_regs_ever_live_p (i + 1)
25585 || call_used_regs[i + 1]))
25587 if (start_reg != i)
25588 arm_emit_vfp_multi_reg_pop (start_reg,
25589 (i - start_reg) / 2,
25590 gen_rtx_REG (SImode,
25591 IP_REGNUM));
25592 start_reg = i + 2;
25595 /* Restore the remaining regs that we have discovered (or possibly
25596 even all of them, if the conditional in the for loop never
25597 fired). */
25598 if (start_reg != i)
25599 arm_emit_vfp_multi_reg_pop (start_reg,
25600 (i - start_reg) / 2,
25601 gen_rtx_REG (SImode, IP_REGNUM));
25604 if (TARGET_IWMMXT)
25606 /* The frame pointer is guaranteed to be non-double-word aligned, as
25607 it is set to double-word-aligned old_stack_pointer - 4. */
25608 rtx_insn *insn;
25609 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25611 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25612 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25614 rtx addr = gen_frame_mem (V2SImode,
25615 plus_constant (Pmode, hard_frame_pointer_rtx,
25616 - lrm_count * 4));
25617 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25618 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25619 gen_rtx_REG (V2SImode, i),
25620 NULL_RTX);
25621 lrm_count += 2;
25625 /* saved_regs_mask should contain IP which contains old stack pointer
25626 at the time of activation creation. Since SP and IP are adjacent registers,
25627 we can restore the value directly into SP. */
25628 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25629 saved_regs_mask &= ~(1 << IP_REGNUM);
25630 saved_regs_mask |= (1 << SP_REGNUM);
25632 /* There are two registers left in saved_regs_mask - LR and PC. We
25633 only need to restore LR (the return address), but to
25634 save time we can load it directly into PC, unless we need a
25635 special function exit sequence, or we are not really returning. */
25636 if (really_return
25637 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25638 && !crtl->calls_eh_return)
25639 /* Delete LR from the register mask, so that LR on
25640 the stack is loaded into the PC in the register mask. */
25641 saved_regs_mask &= ~(1 << LR_REGNUM);
25642 else
25643 saved_regs_mask &= ~(1 << PC_REGNUM);
25645 num_regs = bit_count (saved_regs_mask);
25646 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25648 rtx_insn *insn;
25649 emit_insn (gen_blockage ());
25650 /* Unwind the stack to just below the saved registers. */
25651 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25652 hard_frame_pointer_rtx,
25653 GEN_INT (- 4 * num_regs)));
25655 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25656 stack_pointer_rtx, hard_frame_pointer_rtx);
25659 arm_emit_multi_reg_pop (saved_regs_mask);
25661 if (IS_INTERRUPT (func_type))
25663 /* Interrupt handlers will have pushed the
25664 IP onto the stack, so restore it now. */
25665 rtx_insn *insn;
25666 rtx addr = gen_rtx_MEM (SImode,
25667 gen_rtx_POST_INC (SImode,
25668 stack_pointer_rtx));
25669 set_mem_alias_set (addr, get_frame_alias_set ());
25670 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25671 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25672 gen_rtx_REG (SImode, IP_REGNUM),
25673 NULL_RTX);
25676 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25677 return;
25679 if (crtl->calls_eh_return)
25680 emit_insn (gen_addsi3 (stack_pointer_rtx,
25681 stack_pointer_rtx,
25682 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25684 if (IS_STACKALIGN (func_type))
25685 /* Restore the original stack pointer. Before prologue, the stack was
25686 realigned and the original stack pointer saved in r0. For details,
25687 see comment in arm_expand_prologue. */
25688 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25690 emit_jump_insn (simple_return_rtx);
25693 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25694 function is not a sibcall. */
25695 void
25696 arm_expand_epilogue (bool really_return)
25698 unsigned long func_type;
25699 unsigned long saved_regs_mask;
25700 int num_regs = 0;
25701 int i;
25702 int amount;
25703 arm_stack_offsets *offsets;
25705 func_type = arm_current_func_type ();
25707 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25708 let output_return_instruction take care of instruction emission if any. */
25709 if (IS_NAKED (func_type)
25710 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25712 if (really_return)
25713 emit_jump_insn (simple_return_rtx);
25714 return;
25717 /* If we are throwing an exception, then we really must be doing a
25718 return, so we can't tail-call. */
25719 gcc_assert (!crtl->calls_eh_return || really_return);
25721 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25723 arm_expand_epilogue_apcs_frame (really_return);
25724 return;
25727 /* Get frame offsets for ARM. */
25728 offsets = arm_get_frame_offsets ();
25729 saved_regs_mask = offsets->saved_regs_mask;
25730 num_regs = bit_count (saved_regs_mask);
25732 if (frame_pointer_needed)
25734 rtx_insn *insn;
25735 /* Restore stack pointer if necessary. */
25736 if (TARGET_ARM)
25738 /* In ARM mode, frame pointer points to first saved register.
25739 Restore stack pointer to last saved register. */
25740 amount = offsets->frame - offsets->saved_regs;
25742 /* Force out any pending memory operations that reference stacked data
25743 before stack de-allocation occurs. */
25744 emit_insn (gen_blockage ());
25745 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25746 hard_frame_pointer_rtx,
25747 GEN_INT (amount)));
25748 arm_add_cfa_adjust_cfa_note (insn, amount,
25749 stack_pointer_rtx,
25750 hard_frame_pointer_rtx);
25752 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25753 deleted. */
25754 emit_insn (gen_force_register_use (stack_pointer_rtx));
25756 else
25758 /* In Thumb-2 mode, the frame pointer points to the last saved
25759 register. */
25760 amount = offsets->locals_base - offsets->saved_regs;
25761 if (amount)
25763 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25764 hard_frame_pointer_rtx,
25765 GEN_INT (amount)));
25766 arm_add_cfa_adjust_cfa_note (insn, amount,
25767 hard_frame_pointer_rtx,
25768 hard_frame_pointer_rtx);
25771 /* Force out any pending memory operations that reference stacked data
25772 before stack de-allocation occurs. */
25773 emit_insn (gen_blockage ());
25774 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25775 hard_frame_pointer_rtx));
25776 arm_add_cfa_adjust_cfa_note (insn, 0,
25777 stack_pointer_rtx,
25778 hard_frame_pointer_rtx);
25779 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25780 deleted. */
25781 emit_insn (gen_force_register_use (stack_pointer_rtx));
25784 else
25786 /* Pop off outgoing args and local frame to adjust stack pointer to
25787 last saved register. */
25788 amount = offsets->outgoing_args - offsets->saved_regs;
25789 if (amount)
25791 rtx_insn *tmp;
25792 /* Force out any pending memory operations that reference stacked data
25793 before stack de-allocation occurs. */
25794 emit_insn (gen_blockage ());
25795 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25796 stack_pointer_rtx,
25797 GEN_INT (amount)));
25798 arm_add_cfa_adjust_cfa_note (tmp, amount,
25799 stack_pointer_rtx, stack_pointer_rtx);
25800 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25801 not deleted. */
25802 emit_insn (gen_force_register_use (stack_pointer_rtx));
25806 if (TARGET_HARD_FLOAT)
25808 /* Generate VFP register multi-pop. */
25809 int end_reg = LAST_VFP_REGNUM + 1;
25811 /* Scan the registers in reverse order. We need to match
25812 any groupings made in the prologue and generate matching
25813 vldm operations. The need to match groups is because,
25814 unlike pop, vldm can only do consecutive regs. */
25815 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25816 /* Look for a case where a reg does not need restoring. */
25817 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25818 && (!df_regs_ever_live_p (i + 1)
25819 || call_used_regs[i + 1]))
25821 /* Restore the regs discovered so far (from reg+2 to
25822 end_reg). */
25823 if (end_reg > i + 2)
25824 arm_emit_vfp_multi_reg_pop (i + 2,
25825 (end_reg - (i + 2)) / 2,
25826 stack_pointer_rtx);
25827 end_reg = i;
25830 /* Restore the remaining regs that we have discovered (or possibly
25831 even all of them, if the conditional in the for loop never
25832 fired). */
25833 if (end_reg > i + 2)
25834 arm_emit_vfp_multi_reg_pop (i + 2,
25835 (end_reg - (i + 2)) / 2,
25836 stack_pointer_rtx);
25839 if (TARGET_IWMMXT)
25840 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25841 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25843 rtx_insn *insn;
25844 rtx addr = gen_rtx_MEM (V2SImode,
25845 gen_rtx_POST_INC (SImode,
25846 stack_pointer_rtx));
25847 set_mem_alias_set (addr, get_frame_alias_set ());
25848 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25849 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25850 gen_rtx_REG (V2SImode, i),
25851 NULL_RTX);
25852 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25853 stack_pointer_rtx, stack_pointer_rtx);
25856 if (saved_regs_mask)
25858 rtx insn;
25859 bool return_in_pc = false;
25861 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25862 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25863 && !IS_CMSE_ENTRY (func_type)
25864 && !IS_STACKALIGN (func_type)
25865 && really_return
25866 && crtl->args.pretend_args_size == 0
25867 && saved_regs_mask & (1 << LR_REGNUM)
25868 && !crtl->calls_eh_return)
25870 saved_regs_mask &= ~(1 << LR_REGNUM);
25871 saved_regs_mask |= (1 << PC_REGNUM);
25872 return_in_pc = true;
25875 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25877 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25878 if (saved_regs_mask & (1 << i))
25880 rtx addr = gen_rtx_MEM (SImode,
25881 gen_rtx_POST_INC (SImode,
25882 stack_pointer_rtx));
25883 set_mem_alias_set (addr, get_frame_alias_set ());
25885 if (i == PC_REGNUM)
25887 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25888 XVECEXP (insn, 0, 0) = ret_rtx;
25889 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25890 addr);
25891 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25892 insn = emit_jump_insn (insn);
25894 else
25896 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25897 addr));
25898 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25899 gen_rtx_REG (SImode, i),
25900 NULL_RTX);
25901 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25902 stack_pointer_rtx,
25903 stack_pointer_rtx);
25907 else
25909 if (TARGET_LDRD
25910 && current_tune->prefer_ldrd_strd
25911 && !optimize_function_for_size_p (cfun))
25913 if (TARGET_THUMB2)
25914 thumb2_emit_ldrd_pop (saved_regs_mask);
25915 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25916 arm_emit_ldrd_pop (saved_regs_mask);
25917 else
25918 arm_emit_multi_reg_pop (saved_regs_mask);
25920 else
25921 arm_emit_multi_reg_pop (saved_regs_mask);
25924 if (return_in_pc)
25925 return;
25928 amount
25929 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25930 if (amount)
25932 int i, j;
25933 rtx dwarf = NULL_RTX;
25934 rtx_insn *tmp =
25935 emit_insn (gen_addsi3 (stack_pointer_rtx,
25936 stack_pointer_rtx,
25937 GEN_INT (amount)));
25939 RTX_FRAME_RELATED_P (tmp) = 1;
25941 if (cfun->machine->uses_anonymous_args)
25943 /* Restore pretend args. Refer arm_expand_prologue on how to save
25944 pretend_args in stack. */
25945 int num_regs = crtl->args.pretend_args_size / 4;
25946 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25947 for (j = 0, i = 0; j < num_regs; i++)
25948 if (saved_regs_mask & (1 << i))
25950 rtx reg = gen_rtx_REG (SImode, i);
25951 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25952 j++;
25954 REG_NOTES (tmp) = dwarf;
25956 arm_add_cfa_adjust_cfa_note (tmp, amount,
25957 stack_pointer_rtx, stack_pointer_rtx);
25960 /* Clear all caller-saved regs that are not used to return. */
25961 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25963 /* CMSE_ENTRY always returns. */
25964 gcc_assert (really_return);
25965 cmse_nonsecure_entry_clear_before_return ();
25968 if (!really_return)
25969 return;
25971 if (crtl->calls_eh_return)
25972 emit_insn (gen_addsi3 (stack_pointer_rtx,
25973 stack_pointer_rtx,
25974 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25976 if (IS_STACKALIGN (func_type))
25977 /* Restore the original stack pointer. Before prologue, the stack was
25978 realigned and the original stack pointer saved in r0. For details,
25979 see comment in arm_expand_prologue. */
25980 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25982 emit_jump_insn (simple_return_rtx);
25985 /* Implementation of insn prologue_thumb1_interwork. This is the first
25986 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25988 const char *
25989 thumb1_output_interwork (void)
25991 const char * name;
25992 FILE *f = asm_out_file;
25994 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25995 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25996 == SYMBOL_REF);
25997 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25999 /* Generate code sequence to switch us into Thumb mode. */
26000 /* The .code 32 directive has already been emitted by
26001 ASM_DECLARE_FUNCTION_NAME. */
26002 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26003 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26005 /* Generate a label, so that the debugger will notice the
26006 change in instruction sets. This label is also used by
26007 the assembler to bypass the ARM code when this function
26008 is called from a Thumb encoded function elsewhere in the
26009 same file. Hence the definition of STUB_NAME here must
26010 agree with the definition in gas/config/tc-arm.c. */
26012 #define STUB_NAME ".real_start_of"
26014 fprintf (f, "\t.code\t16\n");
26015 #ifdef ARM_PE
26016 if (arm_dllexport_name_p (name))
26017 name = arm_strip_name_encoding (name);
26018 #endif
26019 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26020 fprintf (f, "\t.thumb_func\n");
26021 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26023 return "";
26026 /* Handle the case of a double word load into a low register from
26027 a computed memory address. The computed address may involve a
26028 register which is overwritten by the load. */
26029 const char *
26030 thumb_load_double_from_address (rtx *operands)
26032 rtx addr;
26033 rtx base;
26034 rtx offset;
26035 rtx arg1;
26036 rtx arg2;
26038 gcc_assert (REG_P (operands[0]));
26039 gcc_assert (MEM_P (operands[1]));
26041 /* Get the memory address. */
26042 addr = XEXP (operands[1], 0);
26044 /* Work out how the memory address is computed. */
26045 switch (GET_CODE (addr))
26047 case REG:
26048 operands[2] = adjust_address (operands[1], SImode, 4);
26050 if (REGNO (operands[0]) == REGNO (addr))
26052 output_asm_insn ("ldr\t%H0, %2", operands);
26053 output_asm_insn ("ldr\t%0, %1", operands);
26055 else
26057 output_asm_insn ("ldr\t%0, %1", operands);
26058 output_asm_insn ("ldr\t%H0, %2", operands);
26060 break;
26062 case CONST:
26063 /* Compute <address> + 4 for the high order load. */
26064 operands[2] = adjust_address (operands[1], SImode, 4);
26066 output_asm_insn ("ldr\t%0, %1", operands);
26067 output_asm_insn ("ldr\t%H0, %2", operands);
26068 break;
26070 case PLUS:
26071 arg1 = XEXP (addr, 0);
26072 arg2 = XEXP (addr, 1);
26074 if (CONSTANT_P (arg1))
26075 base = arg2, offset = arg1;
26076 else
26077 base = arg1, offset = arg2;
26079 gcc_assert (REG_P (base));
26081 /* Catch the case of <address> = <reg> + <reg> */
26082 if (REG_P (offset))
26084 int reg_offset = REGNO (offset);
26085 int reg_base = REGNO (base);
26086 int reg_dest = REGNO (operands[0]);
26088 /* Add the base and offset registers together into the
26089 higher destination register. */
26090 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26091 reg_dest + 1, reg_base, reg_offset);
26093 /* Load the lower destination register from the address in
26094 the higher destination register. */
26095 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26096 reg_dest, reg_dest + 1);
26098 /* Load the higher destination register from its own address
26099 plus 4. */
26100 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26101 reg_dest + 1, reg_dest + 1);
26103 else
26105 /* Compute <address> + 4 for the high order load. */
26106 operands[2] = adjust_address (operands[1], SImode, 4);
26108 /* If the computed address is held in the low order register
26109 then load the high order register first, otherwise always
26110 load the low order register first. */
26111 if (REGNO (operands[0]) == REGNO (base))
26113 output_asm_insn ("ldr\t%H0, %2", operands);
26114 output_asm_insn ("ldr\t%0, %1", operands);
26116 else
26118 output_asm_insn ("ldr\t%0, %1", operands);
26119 output_asm_insn ("ldr\t%H0, %2", operands);
26122 break;
26124 case LABEL_REF:
26125 /* With no registers to worry about we can just load the value
26126 directly. */
26127 operands[2] = adjust_address (operands[1], SImode, 4);
26129 output_asm_insn ("ldr\t%H0, %2", operands);
26130 output_asm_insn ("ldr\t%0, %1", operands);
26131 break;
26133 default:
26134 gcc_unreachable ();
26137 return "";
26140 const char *
26141 thumb_output_move_mem_multiple (int n, rtx *operands)
26143 switch (n)
26145 case 2:
26146 if (REGNO (operands[4]) > REGNO (operands[5]))
26147 std::swap (operands[4], operands[5]);
26149 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26150 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26151 break;
26153 case 3:
26154 if (REGNO (operands[4]) > REGNO (operands[5]))
26155 std::swap (operands[4], operands[5]);
26156 if (REGNO (operands[5]) > REGNO (operands[6]))
26157 std::swap (operands[5], operands[6]);
26158 if (REGNO (operands[4]) > REGNO (operands[5]))
26159 std::swap (operands[4], operands[5]);
26161 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26162 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26163 break;
26165 default:
26166 gcc_unreachable ();
26169 return "";
26172 /* Output a call-via instruction for thumb state. */
26173 const char *
26174 thumb_call_via_reg (rtx reg)
26176 int regno = REGNO (reg);
26177 rtx *labelp;
26179 gcc_assert (regno < LR_REGNUM);
26181 /* If we are in the normal text section we can use a single instance
26182 per compilation unit. If we are doing function sections, then we need
26183 an entry per section, since we can't rely on reachability. */
26184 if (in_section == text_section)
26186 thumb_call_reg_needed = 1;
26188 if (thumb_call_via_label[regno] == NULL)
26189 thumb_call_via_label[regno] = gen_label_rtx ();
26190 labelp = thumb_call_via_label + regno;
26192 else
26194 if (cfun->machine->call_via[regno] == NULL)
26195 cfun->machine->call_via[regno] = gen_label_rtx ();
26196 labelp = cfun->machine->call_via + regno;
26199 output_asm_insn ("bl\t%a0", labelp);
26200 return "";
26203 /* Routines for generating rtl. */
26204 void
26205 thumb_expand_movmemqi (rtx *operands)
26207 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26208 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26209 HOST_WIDE_INT len = INTVAL (operands[2]);
26210 HOST_WIDE_INT offset = 0;
26212 while (len >= 12)
26214 emit_insn (gen_movmem12b (out, in, out, in));
26215 len -= 12;
26218 if (len >= 8)
26220 emit_insn (gen_movmem8b (out, in, out, in));
26221 len -= 8;
26224 if (len >= 4)
26226 rtx reg = gen_reg_rtx (SImode);
26227 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26228 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26229 len -= 4;
26230 offset += 4;
26233 if (len >= 2)
26235 rtx reg = gen_reg_rtx (HImode);
26236 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26237 plus_constant (Pmode, in,
26238 offset))));
26239 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26240 offset)),
26241 reg));
26242 len -= 2;
26243 offset += 2;
26246 if (len)
26248 rtx reg = gen_reg_rtx (QImode);
26249 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26250 plus_constant (Pmode, in,
26251 offset))));
26252 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26253 offset)),
26254 reg));
26258 void
26259 thumb_reload_out_hi (rtx *operands)
26261 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26264 /* Return the length of a function name prefix
26265 that starts with the character 'c'. */
26266 static int
26267 arm_get_strip_length (int c)
26269 switch (c)
26271 ARM_NAME_ENCODING_LENGTHS
26272 default: return 0;
26276 /* Return a pointer to a function's name with any
26277 and all prefix encodings stripped from it. */
26278 const char *
26279 arm_strip_name_encoding (const char *name)
26281 int skip;
26283 while ((skip = arm_get_strip_length (* name)))
26284 name += skip;
26286 return name;
26289 /* If there is a '*' anywhere in the name's prefix, then
26290 emit the stripped name verbatim, otherwise prepend an
26291 underscore if leading underscores are being used. */
26292 void
26293 arm_asm_output_labelref (FILE *stream, const char *name)
26295 int skip;
26296 int verbatim = 0;
26298 while ((skip = arm_get_strip_length (* name)))
26300 verbatim |= (*name == '*');
26301 name += skip;
26304 if (verbatim)
26305 fputs (name, stream);
26306 else
26307 asm_fprintf (stream, "%U%s", name);
26310 /* This function is used to emit an EABI tag and its associated value.
26311 We emit the numerical value of the tag in case the assembler does not
26312 support textual tags. (Eg gas prior to 2.20). If requested we include
26313 the tag name in a comment so that anyone reading the assembler output
26314 will know which tag is being set.
26316 This function is not static because arm-c.c needs it too. */
26318 void
26319 arm_emit_eabi_attribute (const char *name, int num, int val)
26321 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26322 if (flag_verbose_asm || flag_debug_asm)
26323 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26324 asm_fprintf (asm_out_file, "\n");
26327 /* This function is used to print CPU tuning information as comment
26328 in assembler file. Pointers are not printed for now. */
26330 void
26331 arm_print_tune_info (void)
26333 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26334 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26335 current_tune->constant_limit);
26336 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26337 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26338 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26340 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26341 "prefetch.l1_cache_size:\t%d\n",
26342 current_tune->prefetch.l1_cache_size);
26343 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26344 "prefetch.l1_cache_line_size:\t%d\n",
26345 current_tune->prefetch.l1_cache_line_size);
26346 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26347 "prefer_constant_pool:\t%d\n",
26348 (int) current_tune->prefer_constant_pool);
26349 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26350 "branch_cost:\t(s:speed, p:predictable)\n");
26351 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26352 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26353 current_tune->branch_cost (false, false));
26354 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26355 current_tune->branch_cost (false, true));
26356 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26357 current_tune->branch_cost (true, false));
26358 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26359 current_tune->branch_cost (true, true));
26360 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361 "prefer_ldrd_strd:\t%d\n",
26362 (int) current_tune->prefer_ldrd_strd);
26363 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26364 "logical_op_non_short_circuit:\t[%d,%d]\n",
26365 (int) current_tune->logical_op_non_short_circuit_thumb,
26366 (int) current_tune->logical_op_non_short_circuit_arm);
26367 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26368 "prefer_neon_for_64bits:\t%d\n",
26369 (int) current_tune->prefer_neon_for_64bits);
26370 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26371 "disparage_flag_setting_t16_encodings:\t%d\n",
26372 (int) current_tune->disparage_flag_setting_t16_encodings);
26373 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26374 "string_ops_prefer_neon:\t%d\n",
26375 (int) current_tune->string_ops_prefer_neon);
26376 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26377 "max_insns_inline_memset:\t%d\n",
26378 current_tune->max_insns_inline_memset);
26379 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26380 current_tune->fusible_ops);
26381 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26382 (int) current_tune->sched_autopref);
26385 /* Print .arch and .arch_extension directives corresponding to the
26386 current architecture configuration. */
26387 static void
26388 arm_print_asm_arch_directives ()
26390 const arch_option *arch
26391 = arm_parse_arch_option_name (all_architectures, "-march",
26392 arm_active_target.arch_name);
26393 auto_sbitmap opt_bits (isa_num_bits);
26395 gcc_assert (arch);
26397 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26398 arm_last_printed_arch_string = arm_active_target.arch_name;
26399 if (!arch->common.extensions)
26400 return;
26402 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26403 opt->name != NULL;
26404 opt++)
26406 if (!opt->remove)
26408 arm_initialize_isa (opt_bits, opt->isa_bits);
26410 /* If every feature bit of this option is set in the target
26411 ISA specification, print out the option name. However,
26412 don't print anything if all the bits are part of the
26413 FPU specification. */
26414 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26415 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26416 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26421 static void
26422 arm_file_start (void)
26424 int val;
26426 if (TARGET_BPABI)
26428 /* We don't have a specified CPU. Use the architecture to
26429 generate the tags.
26431 Note: it might be better to do this unconditionally, then the
26432 assembler would not need to know about all new CPU names as
26433 they are added. */
26434 if (!arm_active_target.core_name)
26436 /* armv7ve doesn't support any extensions. */
26437 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26439 /* Keep backward compatability for assemblers
26440 which don't support armv7ve. */
26441 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26442 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26443 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26444 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26445 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26446 arm_last_printed_arch_string = "armv7ve";
26448 else
26449 arm_print_asm_arch_directives ();
26451 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26453 asm_fprintf (asm_out_file, "\t.arch %s\n",
26454 arm_active_target.core_name + 8);
26455 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26457 else
26459 const char* truncated_name
26460 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26461 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26464 if (print_tune_info)
26465 arm_print_tune_info ();
26467 if (! TARGET_SOFT_FLOAT)
26469 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26470 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26472 if (TARGET_HARD_FLOAT_ABI)
26473 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26476 /* Some of these attributes only apply when the corresponding features
26477 are used. However we don't have any easy way of figuring this out.
26478 Conservatively record the setting that would have been used. */
26480 if (flag_rounding_math)
26481 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26483 if (!flag_unsafe_math_optimizations)
26485 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26486 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26488 if (flag_signaling_nans)
26489 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26491 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26492 flag_finite_math_only ? 1 : 3);
26494 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26495 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26496 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26497 flag_short_enums ? 1 : 2);
26499 /* Tag_ABI_optimization_goals. */
26500 if (optimize_size)
26501 val = 4;
26502 else if (optimize >= 2)
26503 val = 2;
26504 else if (optimize)
26505 val = 1;
26506 else
26507 val = 6;
26508 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26510 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26511 unaligned_access);
26513 if (arm_fp16_format)
26514 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26515 (int) arm_fp16_format);
26517 if (arm_lang_output_object_attributes_hook)
26518 arm_lang_output_object_attributes_hook();
26521 default_file_start ();
26524 static void
26525 arm_file_end (void)
26527 int regno;
26529 if (NEED_INDICATE_EXEC_STACK)
26530 /* Add .note.GNU-stack. */
26531 file_end_indicate_exec_stack ();
26533 if (! thumb_call_reg_needed)
26534 return;
26536 switch_to_section (text_section);
26537 asm_fprintf (asm_out_file, "\t.code 16\n");
26538 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26540 for (regno = 0; regno < LR_REGNUM; regno++)
26542 rtx label = thumb_call_via_label[regno];
26544 if (label != 0)
26546 targetm.asm_out.internal_label (asm_out_file, "L",
26547 CODE_LABEL_NUMBER (label));
26548 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26553 #ifndef ARM_PE
26554 /* Symbols in the text segment can be accessed without indirecting via the
26555 constant pool; it may take an extra binary operation, but this is still
26556 faster than indirecting via memory. Don't do this when not optimizing,
26557 since we won't be calculating al of the offsets necessary to do this
26558 simplification. */
26560 static void
26561 arm_encode_section_info (tree decl, rtx rtl, int first)
26563 if (optimize > 0 && TREE_CONSTANT (decl))
26564 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26566 default_encode_section_info (decl, rtl, first);
26568 #endif /* !ARM_PE */
26570 static void
26571 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26573 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26574 && !strcmp (prefix, "L"))
26576 arm_ccfsm_state = 0;
26577 arm_target_insn = NULL;
26579 default_internal_label (stream, prefix, labelno);
26582 /* Output code to add DELTA to the first argument, and then jump
26583 to FUNCTION. Used for C++ multiple inheritance. */
26585 static void
26586 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26587 HOST_WIDE_INT, tree function)
26589 static int thunk_label = 0;
26590 char label[256];
26591 char labelpc[256];
26592 int mi_delta = delta;
26593 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26594 int shift = 0;
26595 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26596 ? 1 : 0);
26597 if (mi_delta < 0)
26598 mi_delta = - mi_delta;
26600 final_start_function (emit_barrier (), file, 1);
26602 if (TARGET_THUMB1)
26604 int labelno = thunk_label++;
26605 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26606 /* Thunks are entered in arm mode when available. */
26607 if (TARGET_THUMB1_ONLY)
26609 /* push r3 so we can use it as a temporary. */
26610 /* TODO: Omit this save if r3 is not used. */
26611 fputs ("\tpush {r3}\n", file);
26612 fputs ("\tldr\tr3, ", file);
26614 else
26616 fputs ("\tldr\tr12, ", file);
26618 assemble_name (file, label);
26619 fputc ('\n', file);
26620 if (flag_pic)
26622 /* If we are generating PIC, the ldr instruction below loads
26623 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26624 the address of the add + 8, so we have:
26626 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26627 = target + 1.
26629 Note that we have "+ 1" because some versions of GNU ld
26630 don't set the low bit of the result for R_ARM_REL32
26631 relocations against thumb function symbols.
26632 On ARMv6M this is +4, not +8. */
26633 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26634 assemble_name (file, labelpc);
26635 fputs (":\n", file);
26636 if (TARGET_THUMB1_ONLY)
26638 /* This is 2 insns after the start of the thunk, so we know it
26639 is 4-byte aligned. */
26640 fputs ("\tadd\tr3, pc, r3\n", file);
26641 fputs ("\tmov r12, r3\n", file);
26643 else
26644 fputs ("\tadd\tr12, pc, r12\n", file);
26646 else if (TARGET_THUMB1_ONLY)
26647 fputs ("\tmov r12, r3\n", file);
26649 if (TARGET_THUMB1_ONLY)
26651 if (mi_delta > 255)
26653 fputs ("\tldr\tr3, ", file);
26654 assemble_name (file, label);
26655 fputs ("+4\n", file);
26656 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26657 mi_op, this_regno, this_regno);
26659 else if (mi_delta != 0)
26661 /* Thumb1 unified syntax requires s suffix in instruction name when
26662 one of the operands is immediate. */
26663 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26664 mi_op, this_regno, this_regno,
26665 mi_delta);
26668 else
26670 /* TODO: Use movw/movt for large constants when available. */
26671 while (mi_delta != 0)
26673 if ((mi_delta & (3 << shift)) == 0)
26674 shift += 2;
26675 else
26677 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26678 mi_op, this_regno, this_regno,
26679 mi_delta & (0xff << shift));
26680 mi_delta &= ~(0xff << shift);
26681 shift += 8;
26685 if (TARGET_THUMB1)
26687 if (TARGET_THUMB1_ONLY)
26688 fputs ("\tpop\t{r3}\n", file);
26690 fprintf (file, "\tbx\tr12\n");
26691 ASM_OUTPUT_ALIGN (file, 2);
26692 assemble_name (file, label);
26693 fputs (":\n", file);
26694 if (flag_pic)
26696 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26697 rtx tem = XEXP (DECL_RTL (function), 0);
26698 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26699 pipeline offset is four rather than eight. Adjust the offset
26700 accordingly. */
26701 tem = plus_constant (GET_MODE (tem), tem,
26702 TARGET_THUMB1_ONLY ? -3 : -7);
26703 tem = gen_rtx_MINUS (GET_MODE (tem),
26704 tem,
26705 gen_rtx_SYMBOL_REF (Pmode,
26706 ggc_strdup (labelpc)));
26707 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26709 else
26710 /* Output ".word .LTHUNKn". */
26711 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26713 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26714 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26716 else
26718 fputs ("\tb\t", file);
26719 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26720 if (NEED_PLT_RELOC)
26721 fputs ("(PLT)", file);
26722 fputc ('\n', file);
26725 final_end_function ();
26728 /* MI thunk handling for TARGET_32BIT. */
26730 static void
26731 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26732 HOST_WIDE_INT vcall_offset, tree function)
26734 /* On ARM, this_regno is R0 or R1 depending on
26735 whether the function returns an aggregate or not.
26737 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26738 function)
26739 ? R1_REGNUM : R0_REGNUM);
26741 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26742 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26743 reload_completed = 1;
26744 emit_note (NOTE_INSN_PROLOGUE_END);
26746 /* Add DELTA to THIS_RTX. */
26747 if (delta != 0)
26748 arm_split_constant (PLUS, Pmode, NULL_RTX,
26749 delta, this_rtx, this_rtx, false);
26751 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26752 if (vcall_offset != 0)
26754 /* Load *THIS_RTX. */
26755 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26756 /* Compute *THIS_RTX + VCALL_OFFSET. */
26757 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26758 false);
26759 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26760 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26761 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26764 /* Generate a tail call to the target function. */
26765 if (!TREE_USED (function))
26767 assemble_external (function);
26768 TREE_USED (function) = 1;
26770 rtx funexp = XEXP (DECL_RTL (function), 0);
26771 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26772 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26773 SIBLING_CALL_P (insn) = 1;
26775 insn = get_insns ();
26776 shorten_branches (insn);
26777 final_start_function (insn, file, 1);
26778 final (insn, file, 1);
26779 final_end_function ();
26781 /* Stop pretending this is a post-reload pass. */
26782 reload_completed = 0;
26785 /* Output code to add DELTA to the first argument, and then jump
26786 to FUNCTION. Used for C++ multiple inheritance. */
26788 static void
26789 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26790 HOST_WIDE_INT vcall_offset, tree function)
26792 if (TARGET_32BIT)
26793 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26794 else
26795 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26799 arm_emit_vector_const (FILE *file, rtx x)
26801 int i;
26802 const char * pattern;
26804 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26806 switch (GET_MODE (x))
26808 case E_V2SImode: pattern = "%08x"; break;
26809 case E_V4HImode: pattern = "%04x"; break;
26810 case E_V8QImode: pattern = "%02x"; break;
26811 default: gcc_unreachable ();
26814 fprintf (file, "0x");
26815 for (i = CONST_VECTOR_NUNITS (x); i--;)
26817 rtx element;
26819 element = CONST_VECTOR_ELT (x, i);
26820 fprintf (file, pattern, INTVAL (element));
26823 return 1;
26826 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26827 HFmode constant pool entries are actually loaded with ldr. */
26828 void
26829 arm_emit_fp16_const (rtx c)
26831 long bits;
26833 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26834 if (WORDS_BIG_ENDIAN)
26835 assemble_zeros (2);
26836 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26837 if (!WORDS_BIG_ENDIAN)
26838 assemble_zeros (2);
26841 const char *
26842 arm_output_load_gr (rtx *operands)
26844 rtx reg;
26845 rtx offset;
26846 rtx wcgr;
26847 rtx sum;
26849 if (!MEM_P (operands [1])
26850 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26851 || !REG_P (reg = XEXP (sum, 0))
26852 || !CONST_INT_P (offset = XEXP (sum, 1))
26853 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26854 return "wldrw%?\t%0, %1";
26856 /* Fix up an out-of-range load of a GR register. */
26857 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26858 wcgr = operands[0];
26859 operands[0] = reg;
26860 output_asm_insn ("ldr%?\t%0, %1", operands);
26862 operands[0] = wcgr;
26863 operands[1] = reg;
26864 output_asm_insn ("tmcr%?\t%0, %1", operands);
26865 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26867 return "";
26870 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26872 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26873 named arg and all anonymous args onto the stack.
26874 XXX I know the prologue shouldn't be pushing registers, but it is faster
26875 that way. */
26877 static void
26878 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26879 machine_mode mode,
26880 tree type,
26881 int *pretend_size,
26882 int second_time ATTRIBUTE_UNUSED)
26884 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26885 int nregs;
26887 cfun->machine->uses_anonymous_args = 1;
26888 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26890 nregs = pcum->aapcs_ncrn;
26891 if (nregs & 1)
26893 int res = arm_needs_doubleword_align (mode, type);
26894 if (res < 0 && warn_psabi)
26895 inform (input_location, "parameter passing for argument of "
26896 "type %qT changed in GCC 7.1", type);
26897 else if (res > 0)
26898 nregs++;
26901 else
26902 nregs = pcum->nregs;
26904 if (nregs < NUM_ARG_REGS)
26905 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26908 /* We can't rely on the caller doing the proper promotion when
26909 using APCS or ATPCS. */
26911 static bool
26912 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26914 return !TARGET_AAPCS_BASED;
26917 static machine_mode
26918 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26919 machine_mode mode,
26920 int *punsignedp ATTRIBUTE_UNUSED,
26921 const_tree fntype ATTRIBUTE_UNUSED,
26922 int for_return ATTRIBUTE_UNUSED)
26924 if (GET_MODE_CLASS (mode) == MODE_INT
26925 && GET_MODE_SIZE (mode) < 4)
26926 return SImode;
26928 return mode;
26932 static bool
26933 arm_default_short_enums (void)
26935 return ARM_DEFAULT_SHORT_ENUMS;
26939 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26941 static bool
26942 arm_align_anon_bitfield (void)
26944 return TARGET_AAPCS_BASED;
26948 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26950 static tree
26951 arm_cxx_guard_type (void)
26953 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26957 /* The EABI says test the least significant bit of a guard variable. */
26959 static bool
26960 arm_cxx_guard_mask_bit (void)
26962 return TARGET_AAPCS_BASED;
26966 /* The EABI specifies that all array cookies are 8 bytes long. */
26968 static tree
26969 arm_get_cookie_size (tree type)
26971 tree size;
26973 if (!TARGET_AAPCS_BASED)
26974 return default_cxx_get_cookie_size (type);
26976 size = build_int_cst (sizetype, 8);
26977 return size;
26981 /* The EABI says that array cookies should also contain the element size. */
26983 static bool
26984 arm_cookie_has_size (void)
26986 return TARGET_AAPCS_BASED;
26990 /* The EABI says constructors and destructors should return a pointer to
26991 the object constructed/destroyed. */
26993 static bool
26994 arm_cxx_cdtor_returns_this (void)
26996 return TARGET_AAPCS_BASED;
26999 /* The EABI says that an inline function may never be the key
27000 method. */
27002 static bool
27003 arm_cxx_key_method_may_be_inline (void)
27005 return !TARGET_AAPCS_BASED;
27008 static void
27009 arm_cxx_determine_class_data_visibility (tree decl)
27011 if (!TARGET_AAPCS_BASED
27012 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27013 return;
27015 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27016 is exported. However, on systems without dynamic vague linkage,
27017 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27018 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27019 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27020 else
27021 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27022 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27025 static bool
27026 arm_cxx_class_data_always_comdat (void)
27028 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27029 vague linkage if the class has no key function. */
27030 return !TARGET_AAPCS_BASED;
27034 /* The EABI says __aeabi_atexit should be used to register static
27035 destructors. */
27037 static bool
27038 arm_cxx_use_aeabi_atexit (void)
27040 return TARGET_AAPCS_BASED;
27044 void
27045 arm_set_return_address (rtx source, rtx scratch)
27047 arm_stack_offsets *offsets;
27048 HOST_WIDE_INT delta;
27049 rtx addr, mem;
27050 unsigned long saved_regs;
27052 offsets = arm_get_frame_offsets ();
27053 saved_regs = offsets->saved_regs_mask;
27055 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27056 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27057 else
27059 if (frame_pointer_needed)
27060 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27061 else
27063 /* LR will be the first saved register. */
27064 delta = offsets->outgoing_args - (offsets->frame + 4);
27067 if (delta >= 4096)
27069 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27070 GEN_INT (delta & ~4095)));
27071 addr = scratch;
27072 delta &= 4095;
27074 else
27075 addr = stack_pointer_rtx;
27077 addr = plus_constant (Pmode, addr, delta);
27080 /* The store needs to be marked to prevent DSE from deleting
27081 it as dead if it is based on fp. */
27082 mem = gen_frame_mem (Pmode, addr);
27083 MEM_VOLATILE_P (mem) = true;
27084 emit_move_insn (mem, source);
27089 void
27090 thumb_set_return_address (rtx source, rtx scratch)
27092 arm_stack_offsets *offsets;
27093 HOST_WIDE_INT delta;
27094 HOST_WIDE_INT limit;
27095 int reg;
27096 rtx addr, mem;
27097 unsigned long mask;
27099 emit_use (source);
27101 offsets = arm_get_frame_offsets ();
27102 mask = offsets->saved_regs_mask;
27103 if (mask & (1 << LR_REGNUM))
27105 limit = 1024;
27106 /* Find the saved regs. */
27107 if (frame_pointer_needed)
27109 delta = offsets->soft_frame - offsets->saved_args;
27110 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27111 if (TARGET_THUMB1)
27112 limit = 128;
27114 else
27116 delta = offsets->outgoing_args - offsets->saved_args;
27117 reg = SP_REGNUM;
27119 /* Allow for the stack frame. */
27120 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27121 delta -= 16;
27122 /* The link register is always the first saved register. */
27123 delta -= 4;
27125 /* Construct the address. */
27126 addr = gen_rtx_REG (SImode, reg);
27127 if (delta > limit)
27129 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27130 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27131 addr = scratch;
27133 else
27134 addr = plus_constant (Pmode, addr, delta);
27136 /* The store needs to be marked to prevent DSE from deleting
27137 it as dead if it is based on fp. */
27138 mem = gen_frame_mem (Pmode, addr);
27139 MEM_VOLATILE_P (mem) = true;
27140 emit_move_insn (mem, source);
27142 else
27143 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27146 /* Implements target hook vector_mode_supported_p. */
27147 bool
27148 arm_vector_mode_supported_p (machine_mode mode)
27150 /* Neon also supports V2SImode, etc. listed in the clause below. */
27151 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27152 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27153 || mode == V2DImode || mode == V8HFmode))
27154 return true;
27156 if ((TARGET_NEON || TARGET_IWMMXT)
27157 && ((mode == V2SImode)
27158 || (mode == V4HImode)
27159 || (mode == V8QImode)))
27160 return true;
27162 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27163 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27164 || mode == V2HAmode))
27165 return true;
27167 return false;
27170 /* Implements target hook array_mode_supported_p. */
27172 static bool
27173 arm_array_mode_supported_p (machine_mode mode,
27174 unsigned HOST_WIDE_INT nelems)
27176 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27177 for now, as the lane-swapping logic needs to be extended in the expanders.
27178 See PR target/82518. */
27179 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27180 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27181 && (nelems >= 2 && nelems <= 4))
27182 return true;
27184 return false;
27187 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27188 registers when autovectorizing for Neon, at least until multiple vector
27189 widths are supported properly by the middle-end. */
27191 static machine_mode
27192 arm_preferred_simd_mode (scalar_mode mode)
27194 if (TARGET_NEON)
27195 switch (mode)
27197 case E_SFmode:
27198 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27199 case E_SImode:
27200 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27201 case E_HImode:
27202 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27203 case E_QImode:
27204 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27205 case E_DImode:
27206 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27207 return V2DImode;
27208 break;
27210 default:;
27213 if (TARGET_REALLY_IWMMXT)
27214 switch (mode)
27216 case E_SImode:
27217 return V2SImode;
27218 case E_HImode:
27219 return V4HImode;
27220 case E_QImode:
27221 return V8QImode;
27223 default:;
27226 return word_mode;
27229 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27231 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27232 using r0-r4 for function arguments, r7 for the stack frame and don't have
27233 enough left over to do doubleword arithmetic. For Thumb-2 all the
27234 potentially problematic instructions accept high registers so this is not
27235 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27236 that require many low registers. */
27237 static bool
27238 arm_class_likely_spilled_p (reg_class_t rclass)
27240 if ((TARGET_THUMB1 && rclass == LO_REGS)
27241 || rclass == CC_REG)
27242 return true;
27244 return false;
27247 /* Implements target hook small_register_classes_for_mode_p. */
27248 bool
27249 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27251 return TARGET_THUMB1;
27254 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27255 ARM insns and therefore guarantee that the shift count is modulo 256.
27256 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27257 guarantee no particular behavior for out-of-range counts. */
27259 static unsigned HOST_WIDE_INT
27260 arm_shift_truncation_mask (machine_mode mode)
27262 return mode == SImode ? 255 : 0;
27266 /* Map internal gcc register numbers to DWARF2 register numbers. */
27268 unsigned int
27269 arm_dbx_register_number (unsigned int regno)
27271 if (regno < 16)
27272 return regno;
27274 if (IS_VFP_REGNUM (regno))
27276 /* See comment in arm_dwarf_register_span. */
27277 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27278 return 64 + regno - FIRST_VFP_REGNUM;
27279 else
27280 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27283 if (IS_IWMMXT_GR_REGNUM (regno))
27284 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27286 if (IS_IWMMXT_REGNUM (regno))
27287 return 112 + regno - FIRST_IWMMXT_REGNUM;
27289 return DWARF_FRAME_REGISTERS;
27292 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27293 GCC models tham as 64 32-bit registers, so we need to describe this to
27294 the DWARF generation code. Other registers can use the default. */
27295 static rtx
27296 arm_dwarf_register_span (rtx rtl)
27298 machine_mode mode;
27299 unsigned regno;
27300 rtx parts[16];
27301 int nregs;
27302 int i;
27304 regno = REGNO (rtl);
27305 if (!IS_VFP_REGNUM (regno))
27306 return NULL_RTX;
27308 /* XXX FIXME: The EABI defines two VFP register ranges:
27309 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27310 256-287: D0-D31
27311 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27312 corresponding D register. Until GDB supports this, we shall use the
27313 legacy encodings. We also use these encodings for D0-D15 for
27314 compatibility with older debuggers. */
27315 mode = GET_MODE (rtl);
27316 if (GET_MODE_SIZE (mode) < 8)
27317 return NULL_RTX;
27319 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27321 nregs = GET_MODE_SIZE (mode) / 4;
27322 for (i = 0; i < nregs; i += 2)
27323 if (TARGET_BIG_END)
27325 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27326 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27328 else
27330 parts[i] = gen_rtx_REG (SImode, regno + i);
27331 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27334 else
27336 nregs = GET_MODE_SIZE (mode) / 8;
27337 for (i = 0; i < nregs; i++)
27338 parts[i] = gen_rtx_REG (DImode, regno + i);
27341 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27344 #if ARM_UNWIND_INFO
27345 /* Emit unwind directives for a store-multiple instruction or stack pointer
27346 push during alignment.
27347 These should only ever be generated by the function prologue code, so
27348 expect them to have a particular form.
27349 The store-multiple instruction sometimes pushes pc as the last register,
27350 although it should not be tracked into unwind information, or for -Os
27351 sometimes pushes some dummy registers before first register that needs
27352 to be tracked in unwind information; such dummy registers are there just
27353 to avoid separate stack adjustment, and will not be restored in the
27354 epilogue. */
27356 static void
27357 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27359 int i;
27360 HOST_WIDE_INT offset;
27361 HOST_WIDE_INT nregs;
27362 int reg_size;
27363 unsigned reg;
27364 unsigned lastreg;
27365 unsigned padfirst = 0, padlast = 0;
27366 rtx e;
27368 e = XVECEXP (p, 0, 0);
27369 gcc_assert (GET_CODE (e) == SET);
27371 /* First insn will adjust the stack pointer. */
27372 gcc_assert (GET_CODE (e) == SET
27373 && REG_P (SET_DEST (e))
27374 && REGNO (SET_DEST (e)) == SP_REGNUM
27375 && GET_CODE (SET_SRC (e)) == PLUS);
27377 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27378 nregs = XVECLEN (p, 0) - 1;
27379 gcc_assert (nregs);
27381 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27382 if (reg < 16)
27384 /* For -Os dummy registers can be pushed at the beginning to
27385 avoid separate stack pointer adjustment. */
27386 e = XVECEXP (p, 0, 1);
27387 e = XEXP (SET_DEST (e), 0);
27388 if (GET_CODE (e) == PLUS)
27389 padfirst = INTVAL (XEXP (e, 1));
27390 gcc_assert (padfirst == 0 || optimize_size);
27391 /* The function prologue may also push pc, but not annotate it as it is
27392 never restored. We turn this into a stack pointer adjustment. */
27393 e = XVECEXP (p, 0, nregs);
27394 e = XEXP (SET_DEST (e), 0);
27395 if (GET_CODE (e) == PLUS)
27396 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27397 else
27398 padlast = offset - 4;
27399 gcc_assert (padlast == 0 || padlast == 4);
27400 if (padlast == 4)
27401 fprintf (asm_out_file, "\t.pad #4\n");
27402 reg_size = 4;
27403 fprintf (asm_out_file, "\t.save {");
27405 else if (IS_VFP_REGNUM (reg))
27407 reg_size = 8;
27408 fprintf (asm_out_file, "\t.vsave {");
27410 else
27411 /* Unknown register type. */
27412 gcc_unreachable ();
27414 /* If the stack increment doesn't match the size of the saved registers,
27415 something has gone horribly wrong. */
27416 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27418 offset = padfirst;
27419 lastreg = 0;
27420 /* The remaining insns will describe the stores. */
27421 for (i = 1; i <= nregs; i++)
27423 /* Expect (set (mem <addr>) (reg)).
27424 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27425 e = XVECEXP (p, 0, i);
27426 gcc_assert (GET_CODE (e) == SET
27427 && MEM_P (SET_DEST (e))
27428 && REG_P (SET_SRC (e)));
27430 reg = REGNO (SET_SRC (e));
27431 gcc_assert (reg >= lastreg);
27433 if (i != 1)
27434 fprintf (asm_out_file, ", ");
27435 /* We can't use %r for vfp because we need to use the
27436 double precision register names. */
27437 if (IS_VFP_REGNUM (reg))
27438 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27439 else
27440 asm_fprintf (asm_out_file, "%r", reg);
27442 if (flag_checking)
27444 /* Check that the addresses are consecutive. */
27445 e = XEXP (SET_DEST (e), 0);
27446 if (GET_CODE (e) == PLUS)
27447 gcc_assert (REG_P (XEXP (e, 0))
27448 && REGNO (XEXP (e, 0)) == SP_REGNUM
27449 && CONST_INT_P (XEXP (e, 1))
27450 && offset == INTVAL (XEXP (e, 1)));
27451 else
27452 gcc_assert (i == 1
27453 && REG_P (e)
27454 && REGNO (e) == SP_REGNUM);
27455 offset += reg_size;
27458 fprintf (asm_out_file, "}\n");
27459 if (padfirst)
27460 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27463 /* Emit unwind directives for a SET. */
27465 static void
27466 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27468 rtx e0;
27469 rtx e1;
27470 unsigned reg;
27472 e0 = XEXP (p, 0);
27473 e1 = XEXP (p, 1);
27474 switch (GET_CODE (e0))
27476 case MEM:
27477 /* Pushing a single register. */
27478 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27479 || !REG_P (XEXP (XEXP (e0, 0), 0))
27480 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27481 abort ();
27483 asm_fprintf (asm_out_file, "\t.save ");
27484 if (IS_VFP_REGNUM (REGNO (e1)))
27485 asm_fprintf(asm_out_file, "{d%d}\n",
27486 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27487 else
27488 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27489 break;
27491 case REG:
27492 if (REGNO (e0) == SP_REGNUM)
27494 /* A stack increment. */
27495 if (GET_CODE (e1) != PLUS
27496 || !REG_P (XEXP (e1, 0))
27497 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27498 || !CONST_INT_P (XEXP (e1, 1)))
27499 abort ();
27501 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27502 -INTVAL (XEXP (e1, 1)));
27504 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27506 HOST_WIDE_INT offset;
27508 if (GET_CODE (e1) == PLUS)
27510 if (!REG_P (XEXP (e1, 0))
27511 || !CONST_INT_P (XEXP (e1, 1)))
27512 abort ();
27513 reg = REGNO (XEXP (e1, 0));
27514 offset = INTVAL (XEXP (e1, 1));
27515 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27516 HARD_FRAME_POINTER_REGNUM, reg,
27517 offset);
27519 else if (REG_P (e1))
27521 reg = REGNO (e1);
27522 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27523 HARD_FRAME_POINTER_REGNUM, reg);
27525 else
27526 abort ();
27528 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27530 /* Move from sp to reg. */
27531 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27533 else if (GET_CODE (e1) == PLUS
27534 && REG_P (XEXP (e1, 0))
27535 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27536 && CONST_INT_P (XEXP (e1, 1)))
27538 /* Set reg to offset from sp. */
27539 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27540 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27542 else
27543 abort ();
27544 break;
27546 default:
27547 abort ();
27552 /* Emit unwind directives for the given insn. */
27554 static void
27555 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27557 rtx note, pat;
27558 bool handled_one = false;
27560 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27561 return;
27563 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27564 && (TREE_NOTHROW (current_function_decl)
27565 || crtl->all_throwers_are_sibcalls))
27566 return;
27568 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27569 return;
27571 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27573 switch (REG_NOTE_KIND (note))
27575 case REG_FRAME_RELATED_EXPR:
27576 pat = XEXP (note, 0);
27577 goto found;
27579 case REG_CFA_REGISTER:
27580 pat = XEXP (note, 0);
27581 if (pat == NULL)
27583 pat = PATTERN (insn);
27584 if (GET_CODE (pat) == PARALLEL)
27585 pat = XVECEXP (pat, 0, 0);
27588 /* Only emitted for IS_STACKALIGN re-alignment. */
27590 rtx dest, src;
27591 unsigned reg;
27593 src = SET_SRC (pat);
27594 dest = SET_DEST (pat);
27596 gcc_assert (src == stack_pointer_rtx);
27597 reg = REGNO (dest);
27598 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27599 reg + 0x90, reg);
27601 handled_one = true;
27602 break;
27604 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27605 to get correct dwarf information for shrink-wrap. We should not
27606 emit unwind information for it because these are used either for
27607 pretend arguments or notes to adjust sp and restore registers from
27608 stack. */
27609 case REG_CFA_DEF_CFA:
27610 case REG_CFA_ADJUST_CFA:
27611 case REG_CFA_RESTORE:
27612 return;
27614 case REG_CFA_EXPRESSION:
27615 case REG_CFA_OFFSET:
27616 /* ??? Only handling here what we actually emit. */
27617 gcc_unreachable ();
27619 default:
27620 break;
27623 if (handled_one)
27624 return;
27625 pat = PATTERN (insn);
27626 found:
27628 switch (GET_CODE (pat))
27630 case SET:
27631 arm_unwind_emit_set (asm_out_file, pat);
27632 break;
27634 case SEQUENCE:
27635 /* Store multiple. */
27636 arm_unwind_emit_sequence (asm_out_file, pat);
27637 break;
27639 default:
27640 abort();
27645 /* Output a reference from a function exception table to the type_info
27646 object X. The EABI specifies that the symbol should be relocated by
27647 an R_ARM_TARGET2 relocation. */
27649 static bool
27650 arm_output_ttype (rtx x)
27652 fputs ("\t.word\t", asm_out_file);
27653 output_addr_const (asm_out_file, x);
27654 /* Use special relocations for symbol references. */
27655 if (!CONST_INT_P (x))
27656 fputs ("(TARGET2)", asm_out_file);
27657 fputc ('\n', asm_out_file);
27659 return TRUE;
27662 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27664 static void
27665 arm_asm_emit_except_personality (rtx personality)
27667 fputs ("\t.personality\t", asm_out_file);
27668 output_addr_const (asm_out_file, personality);
27669 fputc ('\n', asm_out_file);
27671 #endif /* ARM_UNWIND_INFO */
27673 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27675 static void
27676 arm_asm_init_sections (void)
27678 #if ARM_UNWIND_INFO
27679 exception_section = get_unnamed_section (0, output_section_asm_op,
27680 "\t.handlerdata");
27681 #endif /* ARM_UNWIND_INFO */
27683 #ifdef OBJECT_FORMAT_ELF
27684 if (target_pure_code)
27685 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27686 #endif
27689 /* Output unwind directives for the start/end of a function. */
27691 void
27692 arm_output_fn_unwind (FILE * f, bool prologue)
27694 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27695 return;
27697 if (prologue)
27698 fputs ("\t.fnstart\n", f);
27699 else
27701 /* If this function will never be unwound, then mark it as such.
27702 The came condition is used in arm_unwind_emit to suppress
27703 the frame annotations. */
27704 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27705 && (TREE_NOTHROW (current_function_decl)
27706 || crtl->all_throwers_are_sibcalls))
27707 fputs("\t.cantunwind\n", f);
27709 fputs ("\t.fnend\n", f);
27713 static bool
27714 arm_emit_tls_decoration (FILE *fp, rtx x)
27716 enum tls_reloc reloc;
27717 rtx val;
27719 val = XVECEXP (x, 0, 0);
27720 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27722 output_addr_const (fp, val);
27724 switch (reloc)
27726 case TLS_GD32:
27727 fputs ("(tlsgd)", fp);
27728 break;
27729 case TLS_LDM32:
27730 fputs ("(tlsldm)", fp);
27731 break;
27732 case TLS_LDO32:
27733 fputs ("(tlsldo)", fp);
27734 break;
27735 case TLS_IE32:
27736 fputs ("(gottpoff)", fp);
27737 break;
27738 case TLS_LE32:
27739 fputs ("(tpoff)", fp);
27740 break;
27741 case TLS_DESCSEQ:
27742 fputs ("(tlsdesc)", fp);
27743 break;
27744 default:
27745 gcc_unreachable ();
27748 switch (reloc)
27750 case TLS_GD32:
27751 case TLS_LDM32:
27752 case TLS_IE32:
27753 case TLS_DESCSEQ:
27754 fputs (" + (. - ", fp);
27755 output_addr_const (fp, XVECEXP (x, 0, 2));
27756 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27757 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27758 output_addr_const (fp, XVECEXP (x, 0, 3));
27759 fputc (')', fp);
27760 break;
27761 default:
27762 break;
27765 return TRUE;
27768 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27770 static void
27771 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27773 gcc_assert (size == 4);
27774 fputs ("\t.word\t", file);
27775 output_addr_const (file, x);
27776 fputs ("(tlsldo)", file);
27779 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27781 static bool
27782 arm_output_addr_const_extra (FILE *fp, rtx x)
27784 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27785 return arm_emit_tls_decoration (fp, x);
27786 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27788 char label[256];
27789 int labelno = INTVAL (XVECEXP (x, 0, 0));
27791 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27792 assemble_name_raw (fp, label);
27794 return TRUE;
27796 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27798 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27799 if (GOT_PCREL)
27800 fputs ("+.", fp);
27801 fputs ("-(", fp);
27802 output_addr_const (fp, XVECEXP (x, 0, 0));
27803 fputc (')', fp);
27804 return TRUE;
27806 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27808 output_addr_const (fp, XVECEXP (x, 0, 0));
27809 if (GOT_PCREL)
27810 fputs ("+.", fp);
27811 fputs ("-(", fp);
27812 output_addr_const (fp, XVECEXP (x, 0, 1));
27813 fputc (')', fp);
27814 return TRUE;
27816 else if (GET_CODE (x) == CONST_VECTOR)
27817 return arm_emit_vector_const (fp, x);
27819 return FALSE;
27822 /* Output assembly for a shift instruction.
27823 SET_FLAGS determines how the instruction modifies the condition codes.
27824 0 - Do not set condition codes.
27825 1 - Set condition codes.
27826 2 - Use smallest instruction. */
27827 const char *
27828 arm_output_shift(rtx * operands, int set_flags)
27830 char pattern[100];
27831 static const char flag_chars[3] = {'?', '.', '!'};
27832 const char *shift;
27833 HOST_WIDE_INT val;
27834 char c;
27836 c = flag_chars[set_flags];
27837 shift = shift_op(operands[3], &val);
27838 if (shift)
27840 if (val != -1)
27841 operands[2] = GEN_INT(val);
27842 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27844 else
27845 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27847 output_asm_insn (pattern, operands);
27848 return "";
27851 /* Output assembly for a WMMX immediate shift instruction. */
27852 const char *
27853 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27855 int shift = INTVAL (operands[2]);
27856 char templ[50];
27857 machine_mode opmode = GET_MODE (operands[0]);
27859 gcc_assert (shift >= 0);
27861 /* If the shift value in the register versions is > 63 (for D qualifier),
27862 31 (for W qualifier) or 15 (for H qualifier). */
27863 if (((opmode == V4HImode) && (shift > 15))
27864 || ((opmode == V2SImode) && (shift > 31))
27865 || ((opmode == DImode) && (shift > 63)))
27867 if (wror_or_wsra)
27869 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27870 output_asm_insn (templ, operands);
27871 if (opmode == DImode)
27873 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27874 output_asm_insn (templ, operands);
27877 else
27879 /* The destination register will contain all zeros. */
27880 sprintf (templ, "wzero\t%%0");
27881 output_asm_insn (templ, operands);
27883 return "";
27886 if ((opmode == DImode) && (shift > 32))
27888 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27889 output_asm_insn (templ, operands);
27890 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27891 output_asm_insn (templ, operands);
27893 else
27895 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27896 output_asm_insn (templ, operands);
27898 return "";
27901 /* Output assembly for a WMMX tinsr instruction. */
27902 const char *
27903 arm_output_iwmmxt_tinsr (rtx *operands)
27905 int mask = INTVAL (operands[3]);
27906 int i;
27907 char templ[50];
27908 int units = mode_nunits[GET_MODE (operands[0])];
27909 gcc_assert ((mask & (mask - 1)) == 0);
27910 for (i = 0; i < units; ++i)
27912 if ((mask & 0x01) == 1)
27914 break;
27916 mask >>= 1;
27918 gcc_assert (i < units);
27920 switch (GET_MODE (operands[0]))
27922 case E_V8QImode:
27923 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27924 break;
27925 case E_V4HImode:
27926 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27927 break;
27928 case E_V2SImode:
27929 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27930 break;
27931 default:
27932 gcc_unreachable ();
27933 break;
27935 output_asm_insn (templ, operands);
27937 return "";
27940 /* Output a Thumb-1 casesi dispatch sequence. */
27941 const char *
27942 thumb1_output_casesi (rtx *operands)
27944 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27946 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27948 switch (GET_MODE(diff_vec))
27950 case E_QImode:
27951 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27952 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27953 case E_HImode:
27954 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27955 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27956 case E_SImode:
27957 return "bl\t%___gnu_thumb1_case_si";
27958 default:
27959 gcc_unreachable ();
27963 /* Output a Thumb-2 casesi instruction. */
27964 const char *
27965 thumb2_output_casesi (rtx *operands)
27967 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27969 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27971 output_asm_insn ("cmp\t%0, %1", operands);
27972 output_asm_insn ("bhi\t%l3", operands);
27973 switch (GET_MODE(diff_vec))
27975 case E_QImode:
27976 return "tbb\t[%|pc, %0]";
27977 case E_HImode:
27978 return "tbh\t[%|pc, %0, lsl #1]";
27979 case E_SImode:
27980 if (flag_pic)
27982 output_asm_insn ("adr\t%4, %l2", operands);
27983 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27984 output_asm_insn ("add\t%4, %4, %5", operands);
27985 return "bx\t%4";
27987 else
27989 output_asm_insn ("adr\t%4, %l2", operands);
27990 return "ldr\t%|pc, [%4, %0, lsl #2]";
27992 default:
27993 gcc_unreachable ();
27997 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27998 per-core tuning structs. */
27999 static int
28000 arm_issue_rate (void)
28002 return current_tune->issue_rate;
28005 /* Return how many instructions should scheduler lookahead to choose the
28006 best one. */
28007 static int
28008 arm_first_cycle_multipass_dfa_lookahead (void)
28010 int issue_rate = arm_issue_rate ();
28012 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28015 /* Enable modeling of L2 auto-prefetcher. */
28016 static int
28017 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28019 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28022 const char *
28023 arm_mangle_type (const_tree type)
28025 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28026 has to be managled as if it is in the "std" namespace. */
28027 if (TARGET_AAPCS_BASED
28028 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28029 return "St9__va_list";
28031 /* Half-precision float. */
28032 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28033 return "Dh";
28035 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28036 builtin type. */
28037 if (TYPE_NAME (type) != NULL)
28038 return arm_mangle_builtin_type (type);
28040 /* Use the default mangling. */
28041 return NULL;
28044 /* Order of allocation of core registers for Thumb: this allocation is
28045 written over the corresponding initial entries of the array
28046 initialized with REG_ALLOC_ORDER. We allocate all low registers
28047 first. Saving and restoring a low register is usually cheaper than
28048 using a call-clobbered high register. */
28050 static const int thumb_core_reg_alloc_order[] =
28052 3, 2, 1, 0, 4, 5, 6, 7,
28053 12, 14, 8, 9, 10, 11
28056 /* Adjust register allocation order when compiling for Thumb. */
28058 void
28059 arm_order_regs_for_local_alloc (void)
28061 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28062 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28063 if (TARGET_THUMB)
28064 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28065 sizeof (thumb_core_reg_alloc_order));
28068 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28070 bool
28071 arm_frame_pointer_required (void)
28073 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28074 return true;
28076 /* If the function receives nonlocal gotos, it needs to save the frame
28077 pointer in the nonlocal_goto_save_area object. */
28078 if (cfun->has_nonlocal_label)
28079 return true;
28081 /* The frame pointer is required for non-leaf APCS frames. */
28082 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28083 return true;
28085 /* If we are probing the stack in the prologue, we will have a faulting
28086 instruction prior to the stack adjustment and this requires a frame
28087 pointer if we want to catch the exception using the EABI unwinder. */
28088 if (!IS_INTERRUPT (arm_current_func_type ())
28089 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28090 || flag_stack_clash_protection)
28091 && arm_except_unwind_info (&global_options) == UI_TARGET
28092 && cfun->can_throw_non_call_exceptions)
28094 HOST_WIDE_INT size = get_frame_size ();
28096 /* That's irrelevant if there is no stack adjustment. */
28097 if (size <= 0)
28098 return false;
28100 /* That's relevant only if there is a stack probe. */
28101 if (crtl->is_leaf && !cfun->calls_alloca)
28103 /* We don't have the final size of the frame so adjust. */
28104 size += 32 * UNITS_PER_WORD;
28105 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28106 return true;
28108 else
28109 return true;
28112 return false;
28115 /* Only thumb1 can't support conditional execution, so return true if
28116 the target is not thumb1. */
28117 static bool
28118 arm_have_conditional_execution (void)
28120 return !TARGET_THUMB1;
28123 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28124 static HOST_WIDE_INT
28125 arm_vector_alignment (const_tree type)
28127 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28129 if (TARGET_AAPCS_BASED)
28130 align = MIN (align, 64);
28132 return align;
28135 static void
28136 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28138 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28140 sizes->safe_push (16);
28141 sizes->safe_push (8);
28145 static bool
28146 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28148 /* Vectors which aren't in packed structures will not be less aligned than
28149 the natural alignment of their element type, so this is safe. */
28150 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28151 return !is_packed;
28153 return default_builtin_vector_alignment_reachable (type, is_packed);
28156 static bool
28157 arm_builtin_support_vector_misalignment (machine_mode mode,
28158 const_tree type, int misalignment,
28159 bool is_packed)
28161 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28163 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28165 if (is_packed)
28166 return align == 1;
28168 /* If the misalignment is unknown, we should be able to handle the access
28169 so long as it is not to a member of a packed data structure. */
28170 if (misalignment == -1)
28171 return true;
28173 /* Return true if the misalignment is a multiple of the natural alignment
28174 of the vector's element type. This is probably always going to be
28175 true in practice, since we've already established that this isn't a
28176 packed access. */
28177 return ((misalignment % align) == 0);
28180 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28181 is_packed);
28184 static void
28185 arm_conditional_register_usage (void)
28187 int regno;
28189 if (TARGET_THUMB1 && optimize_size)
28191 /* When optimizing for size on Thumb-1, it's better not
28192 to use the HI regs, because of the overhead of
28193 stacking them. */
28194 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28195 fixed_regs[regno] = call_used_regs[regno] = 1;
28198 /* The link register can be clobbered by any branch insn,
28199 but we have no way to track that at present, so mark
28200 it as unavailable. */
28201 if (TARGET_THUMB1)
28202 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28204 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28206 /* VFPv3 registers are disabled when earlier VFP
28207 versions are selected due to the definition of
28208 LAST_VFP_REGNUM. */
28209 for (regno = FIRST_VFP_REGNUM;
28210 regno <= LAST_VFP_REGNUM; ++ regno)
28212 fixed_regs[regno] = 0;
28213 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28214 || regno >= FIRST_VFP_REGNUM + 32;
28218 if (TARGET_REALLY_IWMMXT)
28220 regno = FIRST_IWMMXT_GR_REGNUM;
28221 /* The 2002/10/09 revision of the XScale ABI has wCG0
28222 and wCG1 as call-preserved registers. The 2002/11/21
28223 revision changed this so that all wCG registers are
28224 scratch registers. */
28225 for (regno = FIRST_IWMMXT_GR_REGNUM;
28226 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28227 fixed_regs[regno] = 0;
28228 /* The XScale ABI has wR0 - wR9 as scratch registers,
28229 the rest as call-preserved registers. */
28230 for (regno = FIRST_IWMMXT_REGNUM;
28231 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28233 fixed_regs[regno] = 0;
28234 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28238 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28240 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28241 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28243 else if (TARGET_APCS_STACK)
28245 fixed_regs[10] = 1;
28246 call_used_regs[10] = 1;
28248 /* -mcaller-super-interworking reserves r11 for calls to
28249 _interwork_r11_call_via_rN(). Making the register global
28250 is an easy way of ensuring that it remains valid for all
28251 calls. */
28252 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28253 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28255 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28256 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28257 if (TARGET_CALLER_INTERWORKING)
28258 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28260 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28263 static reg_class_t
28264 arm_preferred_rename_class (reg_class_t rclass)
28266 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28267 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28268 and code size can be reduced. */
28269 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28270 return LO_REGS;
28271 else
28272 return NO_REGS;
28275 /* Compute the attribute "length" of insn "*push_multi".
28276 So this function MUST be kept in sync with that insn pattern. */
28278 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28280 int i, regno, hi_reg;
28281 int num_saves = XVECLEN (parallel_op, 0);
28283 /* ARM mode. */
28284 if (TARGET_ARM)
28285 return 4;
28286 /* Thumb1 mode. */
28287 if (TARGET_THUMB1)
28288 return 2;
28290 /* Thumb2 mode. */
28291 regno = REGNO (first_op);
28292 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28293 list is 8-bit. Normally this means all registers in the list must be
28294 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28295 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28296 with 16-bit encoding. */
28297 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28298 for (i = 1; i < num_saves && !hi_reg; i++)
28300 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28301 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28304 if (!hi_reg)
28305 return 2;
28306 return 4;
28309 /* Compute the attribute "length" of insn. Currently, this function is used
28310 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28311 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28312 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28313 true if OPERANDS contains insn which explicit updates base register. */
28316 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28318 /* ARM mode. */
28319 if (TARGET_ARM)
28320 return 4;
28321 /* Thumb1 mode. */
28322 if (TARGET_THUMB1)
28323 return 2;
28325 rtx parallel_op = operands[0];
28326 /* Initialize to elements number of PARALLEL. */
28327 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28328 /* Initialize the value to base register. */
28329 unsigned regno = REGNO (operands[1]);
28330 /* Skip return and write back pattern.
28331 We only need register pop pattern for later analysis. */
28332 unsigned first_indx = 0;
28333 first_indx += return_pc ? 1 : 0;
28334 first_indx += write_back_p ? 1 : 0;
28336 /* A pop operation can be done through LDM or POP. If the base register is SP
28337 and if it's with write back, then a LDM will be alias of POP. */
28338 bool pop_p = (regno == SP_REGNUM && write_back_p);
28339 bool ldm_p = !pop_p;
28341 /* Check base register for LDM. */
28342 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28343 return 4;
28345 /* Check each register in the list. */
28346 for (; indx >= first_indx; indx--)
28348 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28349 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28350 comment in arm_attr_length_push_multi. */
28351 if (REGNO_REG_CLASS (regno) == HI_REGS
28352 && (regno != PC_REGNUM || ldm_p))
28353 return 4;
28356 return 2;
28359 /* Compute the number of instructions emitted by output_move_double. */
28361 arm_count_output_move_double_insns (rtx *operands)
28363 int count;
28364 rtx ops[2];
28365 /* output_move_double may modify the operands array, so call it
28366 here on a copy of the array. */
28367 ops[0] = operands[0];
28368 ops[1] = operands[1];
28369 output_move_double (ops, false, &count);
28370 return count;
28374 vfp3_const_double_for_fract_bits (rtx operand)
28376 REAL_VALUE_TYPE r0;
28378 if (!CONST_DOUBLE_P (operand))
28379 return 0;
28381 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28382 if (exact_real_inverse (DFmode, &r0)
28383 && !REAL_VALUE_NEGATIVE (r0))
28385 if (exact_real_truncate (DFmode, &r0))
28387 HOST_WIDE_INT value = real_to_integer (&r0);
28388 value = value & 0xffffffff;
28389 if ((value != 0) && ( (value & (value - 1)) == 0))
28391 int ret = exact_log2 (value);
28392 gcc_assert (IN_RANGE (ret, 0, 31));
28393 return ret;
28397 return 0;
28400 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28401 log2 is in [1, 32], return that log2. Otherwise return -1.
28402 This is used in the patterns for vcvt.s32.f32 floating-point to
28403 fixed-point conversions. */
28406 vfp3_const_double_for_bits (rtx x)
28408 const REAL_VALUE_TYPE *r;
28410 if (!CONST_DOUBLE_P (x))
28411 return -1;
28413 r = CONST_DOUBLE_REAL_VALUE (x);
28415 if (REAL_VALUE_NEGATIVE (*r)
28416 || REAL_VALUE_ISNAN (*r)
28417 || REAL_VALUE_ISINF (*r)
28418 || !real_isinteger (r, SFmode))
28419 return -1;
28421 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28423 /* The exact_log2 above will have returned -1 if this is
28424 not an exact log2. */
28425 if (!IN_RANGE (hwint, 1, 32))
28426 return -1;
28428 return hwint;
28432 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28434 static void
28435 arm_pre_atomic_barrier (enum memmodel model)
28437 if (need_atomic_barrier_p (model, true))
28438 emit_insn (gen_memory_barrier ());
28441 static void
28442 arm_post_atomic_barrier (enum memmodel model)
28444 if (need_atomic_barrier_p (model, false))
28445 emit_insn (gen_memory_barrier ());
28448 /* Emit the load-exclusive and store-exclusive instructions.
28449 Use acquire and release versions if necessary. */
28451 static void
28452 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28454 rtx (*gen) (rtx, rtx);
28456 if (acq)
28458 switch (mode)
28460 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28461 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28462 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28463 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28464 default:
28465 gcc_unreachable ();
28468 else
28470 switch (mode)
28472 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28473 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28474 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28475 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28476 default:
28477 gcc_unreachable ();
28481 emit_insn (gen (rval, mem));
28484 static void
28485 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28486 rtx mem, bool rel)
28488 rtx (*gen) (rtx, rtx, rtx);
28490 if (rel)
28492 switch (mode)
28494 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28495 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28496 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28497 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28498 default:
28499 gcc_unreachable ();
28502 else
28504 switch (mode)
28506 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28507 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28508 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28509 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28510 default:
28511 gcc_unreachable ();
28515 emit_insn (gen (bval, rval, mem));
28518 /* Mark the previous jump instruction as unlikely. */
28520 static void
28521 emit_unlikely_jump (rtx insn)
28523 rtx_insn *jump = emit_jump_insn (insn);
28524 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28527 /* Expand a compare and swap pattern. */
28529 void
28530 arm_expand_compare_and_swap (rtx operands[])
28532 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28533 machine_mode mode;
28534 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28536 bval = operands[0];
28537 rval = operands[1];
28538 mem = operands[2];
28539 oldval = operands[3];
28540 newval = operands[4];
28541 is_weak = operands[5];
28542 mod_s = operands[6];
28543 mod_f = operands[7];
28544 mode = GET_MODE (mem);
28546 /* Normally the succ memory model must be stronger than fail, but in the
28547 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28548 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28550 if (TARGET_HAVE_LDACQ
28551 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28552 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28553 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28555 switch (mode)
28557 case E_QImode:
28558 case E_HImode:
28559 /* For narrow modes, we're going to perform the comparison in SImode,
28560 so do the zero-extension now. */
28561 rval = gen_reg_rtx (SImode);
28562 oldval = convert_modes (SImode, mode, oldval, true);
28563 /* FALLTHRU */
28565 case E_SImode:
28566 /* Force the value into a register if needed. We waited until after
28567 the zero-extension above to do this properly. */
28568 if (!arm_add_operand (oldval, SImode))
28569 oldval = force_reg (SImode, oldval);
28570 break;
28572 case E_DImode:
28573 if (!cmpdi_operand (oldval, mode))
28574 oldval = force_reg (mode, oldval);
28575 break;
28577 default:
28578 gcc_unreachable ();
28581 if (TARGET_THUMB1)
28583 switch (mode)
28585 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28586 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28587 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28588 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28589 default:
28590 gcc_unreachable ();
28593 else
28595 switch (mode)
28597 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28598 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28599 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28600 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28601 default:
28602 gcc_unreachable ();
28606 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28607 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28609 if (mode == QImode || mode == HImode)
28610 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28612 /* In all cases, we arrange for success to be signaled by Z set.
28613 This arrangement allows for the boolean result to be used directly
28614 in a subsequent branch, post optimization. For Thumb-1 targets, the
28615 boolean negation of the result is also stored in bval because Thumb-1
28616 backend lacks dependency tracking for CC flag due to flag-setting not
28617 being represented at RTL level. */
28618 if (TARGET_THUMB1)
28619 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28620 else
28622 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28623 emit_insn (gen_rtx_SET (bval, x));
28627 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28628 another memory store between the load-exclusive and store-exclusive can
28629 reset the monitor from Exclusive to Open state. This means we must wait
28630 until after reload to split the pattern, lest we get a register spill in
28631 the middle of the atomic sequence. Success of the compare and swap is
28632 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28633 for Thumb-1 targets (ie. negation of the boolean value returned by
28634 atomic_compare_and_swapmode standard pattern in operand 0). */
28636 void
28637 arm_split_compare_and_swap (rtx operands[])
28639 rtx rval, mem, oldval, newval, neg_bval;
28640 machine_mode mode;
28641 enum memmodel mod_s, mod_f;
28642 bool is_weak;
28643 rtx_code_label *label1, *label2;
28644 rtx x, cond;
28646 rval = operands[1];
28647 mem = operands[2];
28648 oldval = operands[3];
28649 newval = operands[4];
28650 is_weak = (operands[5] != const0_rtx);
28651 mod_s = memmodel_from_int (INTVAL (operands[6]));
28652 mod_f = memmodel_from_int (INTVAL (operands[7]));
28653 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28654 mode = GET_MODE (mem);
28656 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28658 bool use_acquire = TARGET_HAVE_LDACQ
28659 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28660 || is_mm_release (mod_s));
28662 bool use_release = TARGET_HAVE_LDACQ
28663 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28664 || is_mm_acquire (mod_s));
28666 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28667 a full barrier is emitted after the store-release. */
28668 if (is_armv8_sync)
28669 use_acquire = false;
28671 /* Checks whether a barrier is needed and emits one accordingly. */
28672 if (!(use_acquire || use_release))
28673 arm_pre_atomic_barrier (mod_s);
28675 label1 = NULL;
28676 if (!is_weak)
28678 label1 = gen_label_rtx ();
28679 emit_label (label1);
28681 label2 = gen_label_rtx ();
28683 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28685 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28686 as required to communicate with arm_expand_compare_and_swap. */
28687 if (TARGET_32BIT)
28689 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28690 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28691 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28692 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28693 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28695 else
28697 emit_move_insn (neg_bval, const1_rtx);
28698 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28699 if (thumb1_cmpneg_operand (oldval, SImode))
28700 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28701 label2, cond));
28702 else
28703 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28706 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28708 /* Weak or strong, we want EQ to be true for success, so that we
28709 match the flags that we got from the compare above. */
28710 if (TARGET_32BIT)
28712 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28713 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28714 emit_insn (gen_rtx_SET (cond, x));
28717 if (!is_weak)
28719 /* Z is set to boolean value of !neg_bval, as required to communicate
28720 with arm_expand_compare_and_swap. */
28721 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28722 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28725 if (!is_mm_relaxed (mod_f))
28726 emit_label (label2);
28728 /* Checks whether a barrier is needed and emits one accordingly. */
28729 if (is_armv8_sync
28730 || !(use_acquire || use_release))
28731 arm_post_atomic_barrier (mod_s);
28733 if (is_mm_relaxed (mod_f))
28734 emit_label (label2);
28737 /* Split an atomic operation pattern. Operation is given by CODE and is one
28738 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28739 operation). Operation is performed on the content at MEM and on VALUE
28740 following the memory model MODEL_RTX. The content at MEM before and after
28741 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28742 success of the operation is returned in COND. Using a scratch register or
28743 an operand register for these determines what result is returned for that
28744 pattern. */
28746 void
28747 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28748 rtx value, rtx model_rtx, rtx cond)
28750 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28751 machine_mode mode = GET_MODE (mem);
28752 machine_mode wmode = (mode == DImode ? DImode : SImode);
28753 rtx_code_label *label;
28754 bool all_low_regs, bind_old_new;
28755 rtx x;
28757 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28759 bool use_acquire = TARGET_HAVE_LDACQ
28760 && !(is_mm_relaxed (model) || is_mm_consume (model)
28761 || is_mm_release (model));
28763 bool use_release = TARGET_HAVE_LDACQ
28764 && !(is_mm_relaxed (model) || is_mm_consume (model)
28765 || is_mm_acquire (model));
28767 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28768 a full barrier is emitted after the store-release. */
28769 if (is_armv8_sync)
28770 use_acquire = false;
28772 /* Checks whether a barrier is needed and emits one accordingly. */
28773 if (!(use_acquire || use_release))
28774 arm_pre_atomic_barrier (model);
28776 label = gen_label_rtx ();
28777 emit_label (label);
28779 if (new_out)
28780 new_out = gen_lowpart (wmode, new_out);
28781 if (old_out)
28782 old_out = gen_lowpart (wmode, old_out);
28783 else
28784 old_out = new_out;
28785 value = simplify_gen_subreg (wmode, value, mode, 0);
28787 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28789 /* Does the operation require destination and first operand to use the same
28790 register? This is decided by register constraints of relevant insn
28791 patterns in thumb1.md. */
28792 gcc_assert (!new_out || REG_P (new_out));
28793 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28794 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28795 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28796 bind_old_new =
28797 (TARGET_THUMB1
28798 && code != SET
28799 && code != MINUS
28800 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28802 /* We want to return the old value while putting the result of the operation
28803 in the same register as the old value so copy the old value over to the
28804 destination register and use that register for the operation. */
28805 if (old_out && bind_old_new)
28807 emit_move_insn (new_out, old_out);
28808 old_out = new_out;
28811 switch (code)
28813 case SET:
28814 new_out = value;
28815 break;
28817 case NOT:
28818 x = gen_rtx_AND (wmode, old_out, value);
28819 emit_insn (gen_rtx_SET (new_out, x));
28820 x = gen_rtx_NOT (wmode, new_out);
28821 emit_insn (gen_rtx_SET (new_out, x));
28822 break;
28824 case MINUS:
28825 if (CONST_INT_P (value))
28827 value = GEN_INT (-INTVAL (value));
28828 code = PLUS;
28830 /* FALLTHRU */
28832 case PLUS:
28833 if (mode == DImode)
28835 /* DImode plus/minus need to clobber flags. */
28836 /* The adddi3 and subdi3 patterns are incorrectly written so that
28837 they require matching operands, even when we could easily support
28838 three operands. Thankfully, this can be fixed up post-splitting,
28839 as the individual add+adc patterns do accept three operands and
28840 post-reload cprop can make these moves go away. */
28841 emit_move_insn (new_out, old_out);
28842 if (code == PLUS)
28843 x = gen_adddi3 (new_out, new_out, value);
28844 else
28845 x = gen_subdi3 (new_out, new_out, value);
28846 emit_insn (x);
28847 break;
28849 /* FALLTHRU */
28851 default:
28852 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28853 emit_insn (gen_rtx_SET (new_out, x));
28854 break;
28857 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28858 use_release);
28860 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28861 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28863 /* Checks whether a barrier is needed and emits one accordingly. */
28864 if (is_armv8_sync
28865 || !(use_acquire || use_release))
28866 arm_post_atomic_barrier (model);
28869 #define MAX_VECT_LEN 16
28871 struct expand_vec_perm_d
28873 rtx target, op0, op1;
28874 vec_perm_indices perm;
28875 machine_mode vmode;
28876 bool one_vector_p;
28877 bool testing_p;
28880 /* Generate a variable permutation. */
28882 static void
28883 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28885 machine_mode vmode = GET_MODE (target);
28886 bool one_vector_p = rtx_equal_p (op0, op1);
28888 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28889 gcc_checking_assert (GET_MODE (op0) == vmode);
28890 gcc_checking_assert (GET_MODE (op1) == vmode);
28891 gcc_checking_assert (GET_MODE (sel) == vmode);
28892 gcc_checking_assert (TARGET_NEON);
28894 if (one_vector_p)
28896 if (vmode == V8QImode)
28897 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28898 else
28899 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28901 else
28903 rtx pair;
28905 if (vmode == V8QImode)
28907 pair = gen_reg_rtx (V16QImode);
28908 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28909 pair = gen_lowpart (TImode, pair);
28910 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28912 else
28914 pair = gen_reg_rtx (OImode);
28915 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28916 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28921 void
28922 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28924 machine_mode vmode = GET_MODE (target);
28925 unsigned int nelt = GET_MODE_NUNITS (vmode);
28926 bool one_vector_p = rtx_equal_p (op0, op1);
28927 rtx mask;
28929 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28930 numbering of elements for big-endian, we must reverse the order. */
28931 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28933 /* The VTBL instruction does not use a modulo index, so we must take care
28934 of that ourselves. */
28935 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28936 mask = gen_const_vec_duplicate (vmode, mask);
28937 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28939 arm_expand_vec_perm_1 (target, op0, op1, sel);
28942 /* Map lane ordering between architectural lane order, and GCC lane order,
28943 taking into account ABI. See comment above output_move_neon for details. */
28945 static int
28946 neon_endian_lane_map (machine_mode mode, int lane)
28948 if (BYTES_BIG_ENDIAN)
28950 int nelems = GET_MODE_NUNITS (mode);
28951 /* Reverse lane order. */
28952 lane = (nelems - 1 - lane);
28953 /* Reverse D register order, to match ABI. */
28954 if (GET_MODE_SIZE (mode) == 16)
28955 lane = lane ^ (nelems / 2);
28957 return lane;
28960 /* Some permutations index into pairs of vectors, this is a helper function
28961 to map indexes into those pairs of vectors. */
28963 static int
28964 neon_pair_endian_lane_map (machine_mode mode, int lane)
28966 int nelem = GET_MODE_NUNITS (mode);
28967 if (BYTES_BIG_ENDIAN)
28968 lane =
28969 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28970 return lane;
28973 /* Generate or test for an insn that supports a constant permutation. */
28975 /* Recognize patterns for the VUZP insns. */
28977 static bool
28978 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28980 unsigned int i, odd, mask, nelt = d->perm.length ();
28981 rtx out0, out1, in0, in1;
28982 rtx (*gen)(rtx, rtx, rtx, rtx);
28983 int first_elem;
28984 int swap_nelt;
28986 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28987 return false;
28989 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28990 big endian pattern on 64 bit vectors, so we correct for that. */
28991 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28992 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28994 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28996 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28997 odd = 0;
28998 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28999 odd = 1;
29000 else
29001 return false;
29002 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29004 for (i = 0; i < nelt; i++)
29006 unsigned elt =
29007 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29008 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29009 return false;
29012 /* Success! */
29013 if (d->testing_p)
29014 return true;
29016 switch (d->vmode)
29018 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29019 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29020 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29021 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29022 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29023 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29024 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29025 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29026 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29027 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29028 default:
29029 gcc_unreachable ();
29032 in0 = d->op0;
29033 in1 = d->op1;
29034 if (swap_nelt != 0)
29035 std::swap (in0, in1);
29037 out0 = d->target;
29038 out1 = gen_reg_rtx (d->vmode);
29039 if (odd)
29040 std::swap (out0, out1);
29042 emit_insn (gen (out0, in0, in1, out1));
29043 return true;
29046 /* Recognize patterns for the VZIP insns. */
29048 static bool
29049 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29051 unsigned int i, high, mask, nelt = d->perm.length ();
29052 rtx out0, out1, in0, in1;
29053 rtx (*gen)(rtx, rtx, rtx, rtx);
29054 int first_elem;
29055 bool is_swapped;
29057 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29058 return false;
29060 is_swapped = BYTES_BIG_ENDIAN;
29062 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29064 high = nelt / 2;
29065 if (first_elem == neon_endian_lane_map (d->vmode, high))
29067 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29068 high = 0;
29069 else
29070 return false;
29071 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29073 for (i = 0; i < nelt / 2; i++)
29075 unsigned elt =
29076 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29077 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29078 != elt)
29079 return false;
29080 elt =
29081 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29082 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29083 != elt)
29084 return false;
29087 /* Success! */
29088 if (d->testing_p)
29089 return true;
29091 switch (d->vmode)
29093 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29094 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29095 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29096 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29097 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29098 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29099 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29100 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29101 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29102 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29103 default:
29104 gcc_unreachable ();
29107 in0 = d->op0;
29108 in1 = d->op1;
29109 if (is_swapped)
29110 std::swap (in0, in1);
29112 out0 = d->target;
29113 out1 = gen_reg_rtx (d->vmode);
29114 if (high)
29115 std::swap (out0, out1);
29117 emit_insn (gen (out0, in0, in1, out1));
29118 return true;
29121 /* Recognize patterns for the VREV insns. */
29123 static bool
29124 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29126 unsigned int i, j, diff, nelt = d->perm.length ();
29127 rtx (*gen)(rtx, rtx);
29129 if (!d->one_vector_p)
29130 return false;
29132 diff = d->perm[0];
29133 switch (diff)
29135 case 7:
29136 switch (d->vmode)
29138 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29139 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29140 default:
29141 return false;
29143 break;
29144 case 3:
29145 switch (d->vmode)
29147 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29148 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29149 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29150 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29151 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29152 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29153 default:
29154 return false;
29156 break;
29157 case 1:
29158 switch (d->vmode)
29160 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29161 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29162 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29163 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29164 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29165 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29166 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29167 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29168 default:
29169 return false;
29171 break;
29172 default:
29173 return false;
29176 for (i = 0; i < nelt ; i += diff + 1)
29177 for (j = 0; j <= diff; j += 1)
29179 /* This is guaranteed to be true as the value of diff
29180 is 7, 3, 1 and we should have enough elements in the
29181 queue to generate this. Getting a vector mask with a
29182 value of diff other than these values implies that
29183 something is wrong by the time we get here. */
29184 gcc_assert (i + j < nelt);
29185 if (d->perm[i + j] != i + diff - j)
29186 return false;
29189 /* Success! */
29190 if (d->testing_p)
29191 return true;
29193 emit_insn (gen (d->target, d->op0));
29194 return true;
29197 /* Recognize patterns for the VTRN insns. */
29199 static bool
29200 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29202 unsigned int i, odd, mask, nelt = d->perm.length ();
29203 rtx out0, out1, in0, in1;
29204 rtx (*gen)(rtx, rtx, rtx, rtx);
29206 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29207 return false;
29209 /* Note that these are little-endian tests. Adjust for big-endian later. */
29210 if (d->perm[0] == 0)
29211 odd = 0;
29212 else if (d->perm[0] == 1)
29213 odd = 1;
29214 else
29215 return false;
29216 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29218 for (i = 0; i < nelt; i += 2)
29220 if (d->perm[i] != i + odd)
29221 return false;
29222 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29223 return false;
29226 /* Success! */
29227 if (d->testing_p)
29228 return true;
29230 switch (d->vmode)
29232 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29233 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29234 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29235 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29236 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29237 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29238 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29239 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29240 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29241 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29242 default:
29243 gcc_unreachable ();
29246 in0 = d->op0;
29247 in1 = d->op1;
29248 if (BYTES_BIG_ENDIAN)
29250 std::swap (in0, in1);
29251 odd = !odd;
29254 out0 = d->target;
29255 out1 = gen_reg_rtx (d->vmode);
29256 if (odd)
29257 std::swap (out0, out1);
29259 emit_insn (gen (out0, in0, in1, out1));
29260 return true;
29263 /* Recognize patterns for the VEXT insns. */
29265 static bool
29266 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29268 unsigned int i, nelt = d->perm.length ();
29269 rtx (*gen) (rtx, rtx, rtx, rtx);
29270 rtx offset;
29272 unsigned int location;
29274 unsigned int next = d->perm[0] + 1;
29276 /* TODO: Handle GCC's numbering of elements for big-endian. */
29277 if (BYTES_BIG_ENDIAN)
29278 return false;
29280 /* Check if the extracted indexes are increasing by one. */
29281 for (i = 1; i < nelt; next++, i++)
29283 /* If we hit the most significant element of the 2nd vector in
29284 the previous iteration, no need to test further. */
29285 if (next == 2 * nelt)
29286 return false;
29288 /* If we are operating on only one vector: it could be a
29289 rotation. If there are only two elements of size < 64, let
29290 arm_evpc_neon_vrev catch it. */
29291 if (d->one_vector_p && (next == nelt))
29293 if ((nelt == 2) && (d->vmode != V2DImode))
29294 return false;
29295 else
29296 next = 0;
29299 if (d->perm[i] != next)
29300 return false;
29303 location = d->perm[0];
29305 switch (d->vmode)
29307 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29308 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29309 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29310 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29311 case E_V2SImode: gen = gen_neon_vextv2si; break;
29312 case E_V4SImode: gen = gen_neon_vextv4si; break;
29313 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29314 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29315 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29316 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29317 case E_V2DImode: gen = gen_neon_vextv2di; break;
29318 default:
29319 return false;
29322 /* Success! */
29323 if (d->testing_p)
29324 return true;
29326 offset = GEN_INT (location);
29327 emit_insn (gen (d->target, d->op0, d->op1, offset));
29328 return true;
29331 /* The NEON VTBL instruction is a fully variable permuation that's even
29332 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29333 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29334 can do slightly better by expanding this as a constant where we don't
29335 have to apply a mask. */
29337 static bool
29338 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29340 rtx rperm[MAX_VECT_LEN], sel;
29341 machine_mode vmode = d->vmode;
29342 unsigned int i, nelt = d->perm.length ();
29344 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29345 numbering of elements for big-endian, we must reverse the order. */
29346 if (BYTES_BIG_ENDIAN)
29347 return false;
29349 if (d->testing_p)
29350 return true;
29352 /* Generic code will try constant permutation twice. Once with the
29353 original mode and again with the elements lowered to QImode.
29354 So wait and don't do the selector expansion ourselves. */
29355 if (vmode != V8QImode && vmode != V16QImode)
29356 return false;
29358 for (i = 0; i < nelt; ++i)
29359 rperm[i] = GEN_INT (d->perm[i]);
29360 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29361 sel = force_reg (vmode, sel);
29363 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29364 return true;
29367 static bool
29368 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29370 /* Check if the input mask matches vext before reordering the
29371 operands. */
29372 if (TARGET_NEON)
29373 if (arm_evpc_neon_vext (d))
29374 return true;
29376 /* The pattern matching functions above are written to look for a small
29377 number to begin the sequence (0, 1, N/2). If we begin with an index
29378 from the second operand, we can swap the operands. */
29379 unsigned int nelt = d->perm.length ();
29380 if (d->perm[0] >= nelt)
29382 d->perm.rotate_inputs (1);
29383 std::swap (d->op0, d->op1);
29386 if (TARGET_NEON)
29388 if (arm_evpc_neon_vuzp (d))
29389 return true;
29390 if (arm_evpc_neon_vzip (d))
29391 return true;
29392 if (arm_evpc_neon_vrev (d))
29393 return true;
29394 if (arm_evpc_neon_vtrn (d))
29395 return true;
29396 return arm_evpc_neon_vtbl (d);
29398 return false;
29401 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29403 static bool
29404 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29405 const vec_perm_indices &sel)
29407 struct expand_vec_perm_d d;
29408 int i, nelt, which;
29410 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29411 return false;
29413 d.target = target;
29414 d.op0 = op0;
29415 d.op1 = op1;
29417 d.vmode = vmode;
29418 gcc_assert (VECTOR_MODE_P (d.vmode));
29419 d.testing_p = !target;
29421 nelt = GET_MODE_NUNITS (d.vmode);
29422 for (i = which = 0; i < nelt; ++i)
29424 int ei = sel[i] & (2 * nelt - 1);
29425 which |= (ei < nelt ? 1 : 2);
29428 switch (which)
29430 default:
29431 gcc_unreachable();
29433 case 3:
29434 d.one_vector_p = false;
29435 if (d.testing_p || !rtx_equal_p (op0, op1))
29436 break;
29438 /* The elements of PERM do not suggest that only the first operand
29439 is used, but both operands are identical. Allow easier matching
29440 of the permutation by folding the permutation into the single
29441 input vector. */
29442 /* FALLTHRU */
29443 case 2:
29444 d.op0 = op1;
29445 d.one_vector_p = true;
29446 break;
29448 case 1:
29449 d.op1 = op0;
29450 d.one_vector_p = true;
29451 break;
29454 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29456 if (!d.testing_p)
29457 return arm_expand_vec_perm_const_1 (&d);
29459 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29460 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29461 if (!d.one_vector_p)
29462 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29464 start_sequence ();
29465 bool ret = arm_expand_vec_perm_const_1 (&d);
29466 end_sequence ();
29468 return ret;
29471 bool
29472 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29474 /* If we are soft float and we do not have ldrd
29475 then all auto increment forms are ok. */
29476 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29477 return true;
29479 switch (code)
29481 /* Post increment and Pre Decrement are supported for all
29482 instruction forms except for vector forms. */
29483 case ARM_POST_INC:
29484 case ARM_PRE_DEC:
29485 if (VECTOR_MODE_P (mode))
29487 if (code != ARM_PRE_DEC)
29488 return true;
29489 else
29490 return false;
29493 return true;
29495 case ARM_POST_DEC:
29496 case ARM_PRE_INC:
29497 /* Without LDRD and mode size greater than
29498 word size, there is no point in auto-incrementing
29499 because ldm and stm will not have these forms. */
29500 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29501 return false;
29503 /* Vector and floating point modes do not support
29504 these auto increment forms. */
29505 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29506 return false;
29508 return true;
29510 default:
29511 return false;
29515 return false;
29518 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29519 on ARM, since we know that shifts by negative amounts are no-ops.
29520 Additionally, the default expansion code is not available or suitable
29521 for post-reload insn splits (this can occur when the register allocator
29522 chooses not to do a shift in NEON).
29524 This function is used in both initial expand and post-reload splits, and
29525 handles all kinds of 64-bit shifts.
29527 Input requirements:
29528 - It is safe for the input and output to be the same register, but
29529 early-clobber rules apply for the shift amount and scratch registers.
29530 - Shift by register requires both scratch registers. In all other cases
29531 the scratch registers may be NULL.
29532 - Ashiftrt by a register also clobbers the CC register. */
29533 void
29534 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29535 rtx amount, rtx scratch1, rtx scratch2)
29537 rtx out_high = gen_highpart (SImode, out);
29538 rtx out_low = gen_lowpart (SImode, out);
29539 rtx in_high = gen_highpart (SImode, in);
29540 rtx in_low = gen_lowpart (SImode, in);
29542 /* Terminology:
29543 in = the register pair containing the input value.
29544 out = the destination register pair.
29545 up = the high- or low-part of each pair.
29546 down = the opposite part to "up".
29547 In a shift, we can consider bits to shift from "up"-stream to
29548 "down"-stream, so in a left-shift "up" is the low-part and "down"
29549 is the high-part of each register pair. */
29551 rtx out_up = code == ASHIFT ? out_low : out_high;
29552 rtx out_down = code == ASHIFT ? out_high : out_low;
29553 rtx in_up = code == ASHIFT ? in_low : in_high;
29554 rtx in_down = code == ASHIFT ? in_high : in_low;
29556 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29557 gcc_assert (out
29558 && (REG_P (out) || GET_CODE (out) == SUBREG)
29559 && GET_MODE (out) == DImode);
29560 gcc_assert (in
29561 && (REG_P (in) || GET_CODE (in) == SUBREG)
29562 && GET_MODE (in) == DImode);
29563 gcc_assert (amount
29564 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29565 && GET_MODE (amount) == SImode)
29566 || CONST_INT_P (amount)));
29567 gcc_assert (scratch1 == NULL
29568 || (GET_CODE (scratch1) == SCRATCH)
29569 || (GET_MODE (scratch1) == SImode
29570 && REG_P (scratch1)));
29571 gcc_assert (scratch2 == NULL
29572 || (GET_CODE (scratch2) == SCRATCH)
29573 || (GET_MODE (scratch2) == SImode
29574 && REG_P (scratch2)));
29575 gcc_assert (!REG_P (out) || !REG_P (amount)
29576 || !HARD_REGISTER_P (out)
29577 || (REGNO (out) != REGNO (amount)
29578 && REGNO (out) + 1 != REGNO (amount)));
29580 /* Macros to make following code more readable. */
29581 #define SUB_32(DEST,SRC) \
29582 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29583 #define RSB_32(DEST,SRC) \
29584 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29585 #define SUB_S_32(DEST,SRC) \
29586 gen_addsi3_compare0 ((DEST), (SRC), \
29587 GEN_INT (-32))
29588 #define SET(DEST,SRC) \
29589 gen_rtx_SET ((DEST), (SRC))
29590 #define SHIFT(CODE,SRC,AMOUNT) \
29591 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29592 #define LSHIFT(CODE,SRC,AMOUNT) \
29593 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29594 SImode, (SRC), (AMOUNT))
29595 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29596 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29597 SImode, (SRC), (AMOUNT))
29598 #define ORR(A,B) \
29599 gen_rtx_IOR (SImode, (A), (B))
29600 #define BRANCH(COND,LABEL) \
29601 gen_arm_cond_branch ((LABEL), \
29602 gen_rtx_ ## COND (CCmode, cc_reg, \
29603 const0_rtx), \
29604 cc_reg)
29606 /* Shifts by register and shifts by constant are handled separately. */
29607 if (CONST_INT_P (amount))
29609 /* We have a shift-by-constant. */
29611 /* First, handle out-of-range shift amounts.
29612 In both cases we try to match the result an ARM instruction in a
29613 shift-by-register would give. This helps reduce execution
29614 differences between optimization levels, but it won't stop other
29615 parts of the compiler doing different things. This is "undefined
29616 behavior, in any case. */
29617 if (INTVAL (amount) <= 0)
29618 emit_insn (gen_movdi (out, in));
29619 else if (INTVAL (amount) >= 64)
29621 if (code == ASHIFTRT)
29623 rtx const31_rtx = GEN_INT (31);
29624 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29625 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29627 else
29628 emit_insn (gen_movdi (out, const0_rtx));
29631 /* Now handle valid shifts. */
29632 else if (INTVAL (amount) < 32)
29634 /* Shifts by a constant less than 32. */
29635 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29637 /* Clearing the out register in DImode first avoids lots
29638 of spilling and results in less stack usage.
29639 Later this redundant insn is completely removed.
29640 Do that only if "in" and "out" are different registers. */
29641 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29642 emit_insn (SET (out, const0_rtx));
29643 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29644 emit_insn (SET (out_down,
29645 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29646 out_down)));
29647 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29649 else
29651 /* Shifts by a constant greater than 31. */
29652 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29654 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29655 emit_insn (SET (out, const0_rtx));
29656 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29657 if (code == ASHIFTRT)
29658 emit_insn (gen_ashrsi3 (out_up, in_up,
29659 GEN_INT (31)));
29660 else
29661 emit_insn (SET (out_up, const0_rtx));
29664 else
29666 /* We have a shift-by-register. */
29667 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29669 /* This alternative requires the scratch registers. */
29670 gcc_assert (scratch1 && REG_P (scratch1));
29671 gcc_assert (scratch2 && REG_P (scratch2));
29673 /* We will need the values "amount-32" and "32-amount" later.
29674 Swapping them around now allows the later code to be more general. */
29675 switch (code)
29677 case ASHIFT:
29678 emit_insn (SUB_32 (scratch1, amount));
29679 emit_insn (RSB_32 (scratch2, amount));
29680 break;
29681 case ASHIFTRT:
29682 emit_insn (RSB_32 (scratch1, amount));
29683 /* Also set CC = amount > 32. */
29684 emit_insn (SUB_S_32 (scratch2, amount));
29685 break;
29686 case LSHIFTRT:
29687 emit_insn (RSB_32 (scratch1, amount));
29688 emit_insn (SUB_32 (scratch2, amount));
29689 break;
29690 default:
29691 gcc_unreachable ();
29694 /* Emit code like this:
29696 arithmetic-left:
29697 out_down = in_down << amount;
29698 out_down = (in_up << (amount - 32)) | out_down;
29699 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29700 out_up = in_up << amount;
29702 arithmetic-right:
29703 out_down = in_down >> amount;
29704 out_down = (in_up << (32 - amount)) | out_down;
29705 if (amount < 32)
29706 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29707 out_up = in_up << amount;
29709 logical-right:
29710 out_down = in_down >> amount;
29711 out_down = (in_up << (32 - amount)) | out_down;
29712 if (amount < 32)
29713 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29714 out_up = in_up << amount;
29716 The ARM and Thumb2 variants are the same but implemented slightly
29717 differently. If this were only called during expand we could just
29718 use the Thumb2 case and let combine do the right thing, but this
29719 can also be called from post-reload splitters. */
29721 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29723 if (!TARGET_THUMB2)
29725 /* Emit code for ARM mode. */
29726 emit_insn (SET (out_down,
29727 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29728 if (code == ASHIFTRT)
29730 rtx_code_label *done_label = gen_label_rtx ();
29731 emit_jump_insn (BRANCH (LT, done_label));
29732 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29733 out_down)));
29734 emit_label (done_label);
29736 else
29737 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29738 out_down)));
29740 else
29742 /* Emit code for Thumb2 mode.
29743 Thumb2 can't do shift and or in one insn. */
29744 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29745 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29747 if (code == ASHIFTRT)
29749 rtx_code_label *done_label = gen_label_rtx ();
29750 emit_jump_insn (BRANCH (LT, done_label));
29751 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29752 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29753 emit_label (done_label);
29755 else
29757 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29758 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29762 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29765 #undef SUB_32
29766 #undef RSB_32
29767 #undef SUB_S_32
29768 #undef SET
29769 #undef SHIFT
29770 #undef LSHIFT
29771 #undef REV_LSHIFT
29772 #undef ORR
29773 #undef BRANCH
29776 /* Returns true if the pattern is a valid symbolic address, which is either a
29777 symbol_ref or (symbol_ref + addend).
29779 According to the ARM ELF ABI, the initial addend of REL-type relocations
29780 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29781 literal field of the instruction as a 16-bit signed value in the range
29782 -32768 <= A < 32768. */
29784 bool
29785 arm_valid_symbolic_address_p (rtx addr)
29787 rtx xop0, xop1 = NULL_RTX;
29788 rtx tmp = addr;
29790 if (target_word_relocations)
29791 return false;
29793 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29794 return true;
29796 /* (const (plus: symbol_ref const_int)) */
29797 if (GET_CODE (addr) == CONST)
29798 tmp = XEXP (addr, 0);
29800 if (GET_CODE (tmp) == PLUS)
29802 xop0 = XEXP (tmp, 0);
29803 xop1 = XEXP (tmp, 1);
29805 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29806 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29809 return false;
29812 /* Returns true if a valid comparison operation and makes
29813 the operands in a form that is valid. */
29814 bool
29815 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29817 enum rtx_code code = GET_CODE (*comparison);
29818 int code_int;
29819 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29820 ? GET_MODE (*op2) : GET_MODE (*op1);
29822 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29824 if (code == UNEQ || code == LTGT)
29825 return false;
29827 code_int = (int)code;
29828 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29829 PUT_CODE (*comparison, (enum rtx_code)code_int);
29831 switch (mode)
29833 case E_SImode:
29834 if (!arm_add_operand (*op1, mode))
29835 *op1 = force_reg (mode, *op1);
29836 if (!arm_add_operand (*op2, mode))
29837 *op2 = force_reg (mode, *op2);
29838 return true;
29840 case E_DImode:
29841 if (!cmpdi_operand (*op1, mode))
29842 *op1 = force_reg (mode, *op1);
29843 if (!cmpdi_operand (*op2, mode))
29844 *op2 = force_reg (mode, *op2);
29845 return true;
29847 case E_HFmode:
29848 if (!TARGET_VFP_FP16INST)
29849 break;
29850 /* FP16 comparisons are done in SF mode. */
29851 mode = SFmode;
29852 *op1 = convert_to_mode (mode, *op1, 1);
29853 *op2 = convert_to_mode (mode, *op2, 1);
29854 /* Fall through. */
29855 case E_SFmode:
29856 case E_DFmode:
29857 if (!vfp_compare_operand (*op1, mode))
29858 *op1 = force_reg (mode, *op1);
29859 if (!vfp_compare_operand (*op2, mode))
29860 *op2 = force_reg (mode, *op2);
29861 return true;
29862 default:
29863 break;
29866 return false;
29870 /* Maximum number of instructions to set block of memory. */
29871 static int
29872 arm_block_set_max_insns (void)
29874 if (optimize_function_for_size_p (cfun))
29875 return 4;
29876 else
29877 return current_tune->max_insns_inline_memset;
29880 /* Return TRUE if it's profitable to set block of memory for
29881 non-vectorized case. VAL is the value to set the memory
29882 with. LENGTH is the number of bytes to set. ALIGN is the
29883 alignment of the destination memory in bytes. UNALIGNED_P
29884 is TRUE if we can only set the memory with instructions
29885 meeting alignment requirements. USE_STRD_P is TRUE if we
29886 can use strd to set the memory. */
29887 static bool
29888 arm_block_set_non_vect_profit_p (rtx val,
29889 unsigned HOST_WIDE_INT length,
29890 unsigned HOST_WIDE_INT align,
29891 bool unaligned_p, bool use_strd_p)
29893 int num = 0;
29894 /* For leftovers in bytes of 0-7, we can set the memory block using
29895 strb/strh/str with minimum instruction number. */
29896 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29898 if (unaligned_p)
29900 num = arm_const_inline_cost (SET, val);
29901 num += length / align + length % align;
29903 else if (use_strd_p)
29905 num = arm_const_double_inline_cost (val);
29906 num += (length >> 3) + leftover[length & 7];
29908 else
29910 num = arm_const_inline_cost (SET, val);
29911 num += (length >> 2) + leftover[length & 3];
29914 /* We may be able to combine last pair STRH/STRB into a single STR
29915 by shifting one byte back. */
29916 if (unaligned_access && length > 3 && (length & 3) == 3)
29917 num--;
29919 return (num <= arm_block_set_max_insns ());
29922 /* Return TRUE if it's profitable to set block of memory for
29923 vectorized case. LENGTH is the number of bytes to set.
29924 ALIGN is the alignment of destination memory in bytes.
29925 MODE is the vector mode used to set the memory. */
29926 static bool
29927 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29928 unsigned HOST_WIDE_INT align,
29929 machine_mode mode)
29931 int num;
29932 bool unaligned_p = ((align & 3) != 0);
29933 unsigned int nelt = GET_MODE_NUNITS (mode);
29935 /* Instruction loading constant value. */
29936 num = 1;
29937 /* Instructions storing the memory. */
29938 num += (length + nelt - 1) / nelt;
29939 /* Instructions adjusting the address expression. Only need to
29940 adjust address expression if it's 4 bytes aligned and bytes
29941 leftover can only be stored by mis-aligned store instruction. */
29942 if (!unaligned_p && (length & 3) != 0)
29943 num++;
29945 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29946 if (!unaligned_p && mode == V16QImode)
29947 num--;
29949 return (num <= arm_block_set_max_insns ());
29952 /* Set a block of memory using vectorization instructions for the
29953 unaligned case. We fill the first LENGTH bytes of the memory
29954 area starting from DSTBASE with byte constant VALUE. ALIGN is
29955 the alignment requirement of memory. Return TRUE if succeeded. */
29956 static bool
29957 arm_block_set_unaligned_vect (rtx dstbase,
29958 unsigned HOST_WIDE_INT length,
29959 unsigned HOST_WIDE_INT value,
29960 unsigned HOST_WIDE_INT align)
29962 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29963 rtx dst, mem;
29964 rtx val_vec, reg;
29965 rtx (*gen_func) (rtx, rtx);
29966 machine_mode mode;
29967 unsigned HOST_WIDE_INT v = value;
29968 unsigned int offset = 0;
29969 gcc_assert ((align & 0x3) != 0);
29970 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29971 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29972 if (length >= nelt_v16)
29974 mode = V16QImode;
29975 gen_func = gen_movmisalignv16qi;
29977 else
29979 mode = V8QImode;
29980 gen_func = gen_movmisalignv8qi;
29982 nelt_mode = GET_MODE_NUNITS (mode);
29983 gcc_assert (length >= nelt_mode);
29984 /* Skip if it isn't profitable. */
29985 if (!arm_block_set_vect_profit_p (length, align, mode))
29986 return false;
29988 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29989 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29991 v = sext_hwi (v, BITS_PER_WORD);
29993 reg = gen_reg_rtx (mode);
29994 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29995 /* Emit instruction loading the constant value. */
29996 emit_move_insn (reg, val_vec);
29998 /* Handle nelt_mode bytes in a vector. */
29999 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30001 emit_insn ((*gen_func) (mem, reg));
30002 if (i + 2 * nelt_mode <= length)
30004 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30005 offset += nelt_mode;
30006 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30010 /* If there are not less than nelt_v8 bytes leftover, we must be in
30011 V16QI mode. */
30012 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30014 /* Handle (8, 16) bytes leftover. */
30015 if (i + nelt_v8 < length)
30017 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30018 offset += length - i;
30019 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30021 /* We are shifting bytes back, set the alignment accordingly. */
30022 if ((length & 1) != 0 && align >= 2)
30023 set_mem_align (mem, BITS_PER_UNIT);
30025 emit_insn (gen_movmisalignv16qi (mem, reg));
30027 /* Handle (0, 8] bytes leftover. */
30028 else if (i < length && i + nelt_v8 >= length)
30030 if (mode == V16QImode)
30031 reg = gen_lowpart (V8QImode, reg);
30033 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30034 + (nelt_mode - nelt_v8))));
30035 offset += (length - i) + (nelt_mode - nelt_v8);
30036 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30038 /* We are shifting bytes back, set the alignment accordingly. */
30039 if ((length & 1) != 0 && align >= 2)
30040 set_mem_align (mem, BITS_PER_UNIT);
30042 emit_insn (gen_movmisalignv8qi (mem, reg));
30045 return true;
30048 /* Set a block of memory using vectorization instructions for the
30049 aligned case. We fill the first LENGTH bytes of the memory area
30050 starting from DSTBASE with byte constant VALUE. ALIGN is the
30051 alignment requirement of memory. Return TRUE if succeeded. */
30052 static bool
30053 arm_block_set_aligned_vect (rtx dstbase,
30054 unsigned HOST_WIDE_INT length,
30055 unsigned HOST_WIDE_INT value,
30056 unsigned HOST_WIDE_INT align)
30058 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30059 rtx dst, addr, mem;
30060 rtx val_vec, reg;
30061 machine_mode mode;
30062 unsigned HOST_WIDE_INT v = value;
30063 unsigned int offset = 0;
30065 gcc_assert ((align & 0x3) == 0);
30066 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30067 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30068 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30069 mode = V16QImode;
30070 else
30071 mode = V8QImode;
30073 nelt_mode = GET_MODE_NUNITS (mode);
30074 gcc_assert (length >= nelt_mode);
30075 /* Skip if it isn't profitable. */
30076 if (!arm_block_set_vect_profit_p (length, align, mode))
30077 return false;
30079 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30081 v = sext_hwi (v, BITS_PER_WORD);
30083 reg = gen_reg_rtx (mode);
30084 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30085 /* Emit instruction loading the constant value. */
30086 emit_move_insn (reg, val_vec);
30088 i = 0;
30089 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30090 if (mode == V16QImode)
30092 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30093 emit_insn (gen_movmisalignv16qi (mem, reg));
30094 i += nelt_mode;
30095 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30096 if (i + nelt_v8 < length && i + nelt_v16 > length)
30098 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30099 offset += length - nelt_mode;
30100 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30101 /* We are shifting bytes back, set the alignment accordingly. */
30102 if ((length & 0x3) == 0)
30103 set_mem_align (mem, BITS_PER_UNIT * 4);
30104 else if ((length & 0x1) == 0)
30105 set_mem_align (mem, BITS_PER_UNIT * 2);
30106 else
30107 set_mem_align (mem, BITS_PER_UNIT);
30109 emit_insn (gen_movmisalignv16qi (mem, reg));
30110 return true;
30112 /* Fall through for bytes leftover. */
30113 mode = V8QImode;
30114 nelt_mode = GET_MODE_NUNITS (mode);
30115 reg = gen_lowpart (V8QImode, reg);
30118 /* Handle 8 bytes in a vector. */
30119 for (; (i + nelt_mode <= length); i += nelt_mode)
30121 addr = plus_constant (Pmode, dst, i);
30122 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30123 emit_move_insn (mem, reg);
30126 /* Handle single word leftover by shifting 4 bytes back. We can
30127 use aligned access for this case. */
30128 if (i + UNITS_PER_WORD == length)
30130 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30131 offset += i - UNITS_PER_WORD;
30132 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30133 /* We are shifting 4 bytes back, set the alignment accordingly. */
30134 if (align > UNITS_PER_WORD)
30135 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30137 emit_move_insn (mem, reg);
30139 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30140 We have to use unaligned access for this case. */
30141 else if (i < length)
30143 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30144 offset += length - nelt_mode;
30145 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30146 /* We are shifting bytes back, set the alignment accordingly. */
30147 if ((length & 1) == 0)
30148 set_mem_align (mem, BITS_PER_UNIT * 2);
30149 else
30150 set_mem_align (mem, BITS_PER_UNIT);
30152 emit_insn (gen_movmisalignv8qi (mem, reg));
30155 return true;
30158 /* Set a block of memory using plain strh/strb instructions, only
30159 using instructions allowed by ALIGN on processor. We fill the
30160 first LENGTH bytes of the memory area starting from DSTBASE
30161 with byte constant VALUE. ALIGN is the alignment requirement
30162 of memory. */
30163 static bool
30164 arm_block_set_unaligned_non_vect (rtx dstbase,
30165 unsigned HOST_WIDE_INT length,
30166 unsigned HOST_WIDE_INT value,
30167 unsigned HOST_WIDE_INT align)
30169 unsigned int i;
30170 rtx dst, addr, mem;
30171 rtx val_exp, val_reg, reg;
30172 machine_mode mode;
30173 HOST_WIDE_INT v = value;
30175 gcc_assert (align == 1 || align == 2);
30177 if (align == 2)
30178 v |= (value << BITS_PER_UNIT);
30180 v = sext_hwi (v, BITS_PER_WORD);
30181 val_exp = GEN_INT (v);
30182 /* Skip if it isn't profitable. */
30183 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30184 align, true, false))
30185 return false;
30187 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30188 mode = (align == 2 ? HImode : QImode);
30189 val_reg = force_reg (SImode, val_exp);
30190 reg = gen_lowpart (mode, val_reg);
30192 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30194 addr = plus_constant (Pmode, dst, i);
30195 mem = adjust_automodify_address (dstbase, mode, addr, i);
30196 emit_move_insn (mem, reg);
30199 /* Handle single byte leftover. */
30200 if (i + 1 == length)
30202 reg = gen_lowpart (QImode, val_reg);
30203 addr = plus_constant (Pmode, dst, i);
30204 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30205 emit_move_insn (mem, reg);
30206 i++;
30209 gcc_assert (i == length);
30210 return true;
30213 /* Set a block of memory using plain strd/str/strh/strb instructions,
30214 to permit unaligned copies on processors which support unaligned
30215 semantics for those instructions. We fill the first LENGTH bytes
30216 of the memory area starting from DSTBASE with byte constant VALUE.
30217 ALIGN is the alignment requirement of memory. */
30218 static bool
30219 arm_block_set_aligned_non_vect (rtx dstbase,
30220 unsigned HOST_WIDE_INT length,
30221 unsigned HOST_WIDE_INT value,
30222 unsigned HOST_WIDE_INT align)
30224 unsigned int i;
30225 rtx dst, addr, mem;
30226 rtx val_exp, val_reg, reg;
30227 unsigned HOST_WIDE_INT v;
30228 bool use_strd_p;
30230 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30231 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30233 v = (value | (value << 8) | (value << 16) | (value << 24));
30234 if (length < UNITS_PER_WORD)
30235 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30237 if (use_strd_p)
30238 v |= (v << BITS_PER_WORD);
30239 else
30240 v = sext_hwi (v, BITS_PER_WORD);
30242 val_exp = GEN_INT (v);
30243 /* Skip if it isn't profitable. */
30244 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30245 align, false, use_strd_p))
30247 if (!use_strd_p)
30248 return false;
30250 /* Try without strd. */
30251 v = (v >> BITS_PER_WORD);
30252 v = sext_hwi (v, BITS_PER_WORD);
30253 val_exp = GEN_INT (v);
30254 use_strd_p = false;
30255 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30256 align, false, use_strd_p))
30257 return false;
30260 i = 0;
30261 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30262 /* Handle double words using strd if possible. */
30263 if (use_strd_p)
30265 val_reg = force_reg (DImode, val_exp);
30266 reg = val_reg;
30267 for (; (i + 8 <= length); i += 8)
30269 addr = plus_constant (Pmode, dst, i);
30270 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30271 emit_move_insn (mem, reg);
30274 else
30275 val_reg = force_reg (SImode, val_exp);
30277 /* Handle words. */
30278 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30279 for (; (i + 4 <= length); i += 4)
30281 addr = plus_constant (Pmode, dst, i);
30282 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30283 if ((align & 3) == 0)
30284 emit_move_insn (mem, reg);
30285 else
30286 emit_insn (gen_unaligned_storesi (mem, reg));
30289 /* Merge last pair of STRH and STRB into a STR if possible. */
30290 if (unaligned_access && i > 0 && (i + 3) == length)
30292 addr = plus_constant (Pmode, dst, i - 1);
30293 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30294 /* We are shifting one byte back, set the alignment accordingly. */
30295 if ((align & 1) == 0)
30296 set_mem_align (mem, BITS_PER_UNIT);
30298 /* Most likely this is an unaligned access, and we can't tell at
30299 compilation time. */
30300 emit_insn (gen_unaligned_storesi (mem, reg));
30301 return true;
30304 /* Handle half word leftover. */
30305 if (i + 2 <= length)
30307 reg = gen_lowpart (HImode, val_reg);
30308 addr = plus_constant (Pmode, dst, i);
30309 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30310 if ((align & 1) == 0)
30311 emit_move_insn (mem, reg);
30312 else
30313 emit_insn (gen_unaligned_storehi (mem, reg));
30315 i += 2;
30318 /* Handle single byte leftover. */
30319 if (i + 1 == length)
30321 reg = gen_lowpart (QImode, val_reg);
30322 addr = plus_constant (Pmode, dst, i);
30323 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30324 emit_move_insn (mem, reg);
30327 return true;
30330 /* Set a block of memory using vectorization instructions for both
30331 aligned and unaligned cases. We fill the first LENGTH bytes of
30332 the memory area starting from DSTBASE with byte constant VALUE.
30333 ALIGN is the alignment requirement of memory. */
30334 static bool
30335 arm_block_set_vect (rtx dstbase,
30336 unsigned HOST_WIDE_INT length,
30337 unsigned HOST_WIDE_INT value,
30338 unsigned HOST_WIDE_INT align)
30340 /* Check whether we need to use unaligned store instruction. */
30341 if (((align & 3) != 0 || (length & 3) != 0)
30342 /* Check whether unaligned store instruction is available. */
30343 && (!unaligned_access || BYTES_BIG_ENDIAN))
30344 return false;
30346 if ((align & 3) == 0)
30347 return arm_block_set_aligned_vect (dstbase, length, value, align);
30348 else
30349 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30352 /* Expand string store operation. Firstly we try to do that by using
30353 vectorization instructions, then try with ARM unaligned access and
30354 double-word store if profitable. OPERANDS[0] is the destination,
30355 OPERANDS[1] is the number of bytes, operands[2] is the value to
30356 initialize the memory, OPERANDS[3] is the known alignment of the
30357 destination. */
30358 bool
30359 arm_gen_setmem (rtx *operands)
30361 rtx dstbase = operands[0];
30362 unsigned HOST_WIDE_INT length;
30363 unsigned HOST_WIDE_INT value;
30364 unsigned HOST_WIDE_INT align;
30366 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30367 return false;
30369 length = UINTVAL (operands[1]);
30370 if (length > 64)
30371 return false;
30373 value = (UINTVAL (operands[2]) & 0xFF);
30374 align = UINTVAL (operands[3]);
30375 if (TARGET_NEON && length >= 8
30376 && current_tune->string_ops_prefer_neon
30377 && arm_block_set_vect (dstbase, length, value, align))
30378 return true;
30380 if (!unaligned_access && (align & 3) != 0)
30381 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30383 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30387 static bool
30388 arm_macro_fusion_p (void)
30390 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30393 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30394 for MOVW / MOVT macro fusion. */
30396 static bool
30397 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30399 /* We are trying to fuse
30400 movw imm / movt imm
30401 instructions as a group that gets scheduled together. */
30403 rtx set_dest = SET_DEST (curr_set);
30405 if (GET_MODE (set_dest) != SImode)
30406 return false;
30408 /* We are trying to match:
30409 prev (movw) == (set (reg r0) (const_int imm16))
30410 curr (movt) == (set (zero_extract (reg r0)
30411 (const_int 16)
30412 (const_int 16))
30413 (const_int imm16_1))
30415 prev (movw) == (set (reg r1)
30416 (high (symbol_ref ("SYM"))))
30417 curr (movt) == (set (reg r0)
30418 (lo_sum (reg r1)
30419 (symbol_ref ("SYM")))) */
30421 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30423 if (CONST_INT_P (SET_SRC (curr_set))
30424 && CONST_INT_P (SET_SRC (prev_set))
30425 && REG_P (XEXP (set_dest, 0))
30426 && REG_P (SET_DEST (prev_set))
30427 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30428 return true;
30431 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30432 && REG_P (SET_DEST (curr_set))
30433 && REG_P (SET_DEST (prev_set))
30434 && GET_CODE (SET_SRC (prev_set)) == HIGH
30435 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30436 return true;
30438 return false;
30441 static bool
30442 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30444 rtx prev_set = single_set (prev);
30445 rtx curr_set = single_set (curr);
30447 if (!prev_set
30448 || !curr_set)
30449 return false;
30451 if (any_condjump_p (curr))
30452 return false;
30454 if (!arm_macro_fusion_p ())
30455 return false;
30457 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30458 && aarch_crypto_can_dual_issue (prev, curr))
30459 return true;
30461 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30462 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30463 return true;
30465 return false;
30468 /* Return true iff the instruction fusion described by OP is enabled. */
30469 bool
30470 arm_fusion_enabled_p (tune_params::fuse_ops op)
30472 return current_tune->fusible_ops & op;
30475 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30476 scheduled for speculative execution. Reject the long-running division
30477 and square-root instructions. */
30479 static bool
30480 arm_sched_can_speculate_insn (rtx_insn *insn)
30482 switch (get_attr_type (insn))
30484 case TYPE_SDIV:
30485 case TYPE_UDIV:
30486 case TYPE_FDIVS:
30487 case TYPE_FDIVD:
30488 case TYPE_FSQRTS:
30489 case TYPE_FSQRTD:
30490 case TYPE_NEON_FP_SQRT_S:
30491 case TYPE_NEON_FP_SQRT_D:
30492 case TYPE_NEON_FP_SQRT_S_Q:
30493 case TYPE_NEON_FP_SQRT_D_Q:
30494 case TYPE_NEON_FP_DIV_S:
30495 case TYPE_NEON_FP_DIV_D:
30496 case TYPE_NEON_FP_DIV_S_Q:
30497 case TYPE_NEON_FP_DIV_D_Q:
30498 return false;
30499 default:
30500 return true;
30504 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30506 static unsigned HOST_WIDE_INT
30507 arm_asan_shadow_offset (void)
30509 return HOST_WIDE_INT_1U << 29;
30513 /* This is a temporary fix for PR60655. Ideally we need
30514 to handle most of these cases in the generic part but
30515 currently we reject minus (..) (sym_ref). We try to
30516 ameliorate the case with minus (sym_ref1) (sym_ref2)
30517 where they are in the same section. */
30519 static bool
30520 arm_const_not_ok_for_debug_p (rtx p)
30522 tree decl_op0 = NULL;
30523 tree decl_op1 = NULL;
30525 if (GET_CODE (p) == UNSPEC)
30526 return true;
30527 if (GET_CODE (p) == MINUS)
30529 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30531 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30532 if (decl_op1
30533 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30534 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30536 if ((VAR_P (decl_op1)
30537 || TREE_CODE (decl_op1) == CONST_DECL)
30538 && (VAR_P (decl_op0)
30539 || TREE_CODE (decl_op0) == CONST_DECL))
30540 return (get_variable_section (decl_op1, false)
30541 != get_variable_section (decl_op0, false));
30543 if (TREE_CODE (decl_op1) == LABEL_DECL
30544 && TREE_CODE (decl_op0) == LABEL_DECL)
30545 return (DECL_CONTEXT (decl_op1)
30546 != DECL_CONTEXT (decl_op0));
30549 return true;
30553 return false;
30556 /* return TRUE if x is a reference to a value in a constant pool */
30557 extern bool
30558 arm_is_constant_pool_ref (rtx x)
30560 return (MEM_P (x)
30561 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30562 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30565 /* Remember the last target of arm_set_current_function. */
30566 static GTY(()) tree arm_previous_fndecl;
30568 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30570 void
30571 save_restore_target_globals (tree new_tree)
30573 /* If we have a previous state, use it. */
30574 if (TREE_TARGET_GLOBALS (new_tree))
30575 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30576 else if (new_tree == target_option_default_node)
30577 restore_target_globals (&default_target_globals);
30578 else
30580 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30581 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30584 arm_option_params_internal ();
30587 /* Invalidate arm_previous_fndecl. */
30589 void
30590 arm_reset_previous_fndecl (void)
30592 arm_previous_fndecl = NULL_TREE;
30595 /* Establish appropriate back-end context for processing the function
30596 FNDECL. The argument might be NULL to indicate processing at top
30597 level, outside of any function scope. */
30599 static void
30600 arm_set_current_function (tree fndecl)
30602 if (!fndecl || fndecl == arm_previous_fndecl)
30603 return;
30605 tree old_tree = (arm_previous_fndecl
30606 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30607 : NULL_TREE);
30609 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30611 /* If current function has no attributes but previous one did,
30612 use the default node. */
30613 if (! new_tree && old_tree)
30614 new_tree = target_option_default_node;
30616 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30617 the default have been handled by save_restore_target_globals from
30618 arm_pragma_target_parse. */
30619 if (old_tree == new_tree)
30620 return;
30622 arm_previous_fndecl = fndecl;
30624 /* First set the target options. */
30625 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30627 save_restore_target_globals (new_tree);
30630 /* Implement TARGET_OPTION_PRINT. */
30632 static void
30633 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30635 int flags = ptr->x_target_flags;
30636 const char *fpu_name;
30638 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30639 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30641 fprintf (file, "%*sselected isa %s\n", indent, "",
30642 TARGET_THUMB2_P (flags) ? "thumb2" :
30643 TARGET_THUMB_P (flags) ? "thumb1" :
30644 "arm");
30646 if (ptr->x_arm_arch_string)
30647 fprintf (file, "%*sselected architecture %s\n", indent, "",
30648 ptr->x_arm_arch_string);
30650 if (ptr->x_arm_cpu_string)
30651 fprintf (file, "%*sselected CPU %s\n", indent, "",
30652 ptr->x_arm_cpu_string);
30654 if (ptr->x_arm_tune_string)
30655 fprintf (file, "%*sselected tune %s\n", indent, "",
30656 ptr->x_arm_tune_string);
30658 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30661 /* Hook to determine if one function can safely inline another. */
30663 static bool
30664 arm_can_inline_p (tree caller, tree callee)
30666 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30667 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30668 bool can_inline = true;
30670 struct cl_target_option *caller_opts
30671 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30672 : target_option_default_node);
30674 struct cl_target_option *callee_opts
30675 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30676 : target_option_default_node);
30678 if (callee_opts == caller_opts)
30679 return true;
30681 /* Callee's ISA features should be a subset of the caller's. */
30682 struct arm_build_target caller_target;
30683 struct arm_build_target callee_target;
30684 caller_target.isa = sbitmap_alloc (isa_num_bits);
30685 callee_target.isa = sbitmap_alloc (isa_num_bits);
30687 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30688 false);
30689 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30690 false);
30691 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30692 can_inline = false;
30694 sbitmap_free (caller_target.isa);
30695 sbitmap_free (callee_target.isa);
30697 /* OK to inline between different modes.
30698 Function with mode specific instructions, e.g using asm,
30699 must be explicitly protected with noinline. */
30700 return can_inline;
30703 /* Hook to fix function's alignment affected by target attribute. */
30705 static void
30706 arm_relayout_function (tree fndecl)
30708 if (DECL_USER_ALIGN (fndecl))
30709 return;
30711 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30713 if (!callee_tree)
30714 callee_tree = target_option_default_node;
30716 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30717 SET_DECL_ALIGN
30718 (fndecl,
30719 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30722 /* Inner function to process the attribute((target(...))), take an argument and
30723 set the current options from the argument. If we have a list, recursively
30724 go over the list. */
30726 static bool
30727 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30729 if (TREE_CODE (args) == TREE_LIST)
30731 bool ret = true;
30733 for (; args; args = TREE_CHAIN (args))
30734 if (TREE_VALUE (args)
30735 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30736 ret = false;
30737 return ret;
30740 else if (TREE_CODE (args) != STRING_CST)
30742 error ("attribute %<target%> argument not a string");
30743 return false;
30746 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30747 char *q;
30749 while ((q = strtok (argstr, ",")) != NULL)
30751 while (ISSPACE (*q)) ++q;
30753 argstr = NULL;
30754 if (!strncmp (q, "thumb", 5))
30755 opts->x_target_flags |= MASK_THUMB;
30757 else if (!strncmp (q, "arm", 3))
30758 opts->x_target_flags &= ~MASK_THUMB;
30760 else if (!strncmp (q, "fpu=", 4))
30762 int fpu_index;
30763 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30764 &fpu_index, CL_TARGET))
30766 error ("invalid fpu for target attribute or pragma %qs", q);
30767 return false;
30769 if (fpu_index == TARGET_FPU_auto)
30771 /* This doesn't really make sense until we support
30772 general dynamic selection of the architecture and all
30773 sub-features. */
30774 sorry ("auto fpu selection not currently permitted here");
30775 return false;
30777 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30779 else if (!strncmp (q, "arch=", 5))
30781 char* arch = q+5;
30782 const arch_option *arm_selected_arch
30783 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30785 if (!arm_selected_arch)
30787 error ("invalid architecture for target attribute or pragma %qs",
30789 return false;
30792 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30794 else if (q[0] == '+')
30796 opts->x_arm_arch_string
30797 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30799 else
30801 error ("unknown target attribute or pragma %qs", q);
30802 return false;
30806 return true;
30809 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30811 tree
30812 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30813 struct gcc_options *opts_set)
30815 struct cl_target_option cl_opts;
30817 if (!arm_valid_target_attribute_rec (args, opts))
30818 return NULL_TREE;
30820 cl_target_option_save (&cl_opts, opts);
30821 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30822 arm_option_check_internal (opts);
30823 /* Do any overrides, such as global options arch=xxx.
30824 We do this since arm_active_target was overridden. */
30825 arm_option_reconfigure_globals ();
30826 arm_options_perform_arch_sanity_checks ();
30827 arm_option_override_internal (opts, opts_set);
30829 return build_target_option_node (opts);
30832 static void
30833 add_attribute (const char * mode, tree *attributes)
30835 size_t len = strlen (mode);
30836 tree value = build_string (len, mode);
30838 TREE_TYPE (value) = build_array_type (char_type_node,
30839 build_index_type (size_int (len)));
30841 *attributes = tree_cons (get_identifier ("target"),
30842 build_tree_list (NULL_TREE, value),
30843 *attributes);
30846 /* For testing. Insert thumb or arm modes alternatively on functions. */
30848 static void
30849 arm_insert_attributes (tree fndecl, tree * attributes)
30851 const char *mode;
30853 if (! TARGET_FLIP_THUMB)
30854 return;
30856 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30857 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30858 return;
30860 /* Nested definitions must inherit mode. */
30861 if (current_function_decl)
30863 mode = TARGET_THUMB ? "thumb" : "arm";
30864 add_attribute (mode, attributes);
30865 return;
30868 /* If there is already a setting don't change it. */
30869 if (lookup_attribute ("target", *attributes) != NULL)
30870 return;
30872 mode = thumb_flipper ? "thumb" : "arm";
30873 add_attribute (mode, attributes);
30875 thumb_flipper = !thumb_flipper;
30878 /* Hook to validate attribute((target("string"))). */
30880 static bool
30881 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30882 tree args, int ARG_UNUSED (flags))
30884 bool ret = true;
30885 struct gcc_options func_options;
30886 tree cur_tree, new_optimize;
30887 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30889 /* Get the optimization options of the current function. */
30890 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30892 /* If the function changed the optimization levels as well as setting target
30893 options, start with the optimizations specified. */
30894 if (!func_optimize)
30895 func_optimize = optimization_default_node;
30897 /* Init func_options. */
30898 memset (&func_options, 0, sizeof (func_options));
30899 init_options_struct (&func_options, NULL);
30900 lang_hooks.init_options_struct (&func_options);
30902 /* Initialize func_options to the defaults. */
30903 cl_optimization_restore (&func_options,
30904 TREE_OPTIMIZATION (func_optimize));
30906 cl_target_option_restore (&func_options,
30907 TREE_TARGET_OPTION (target_option_default_node));
30909 /* Set func_options flags with new target mode. */
30910 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30911 &global_options_set);
30913 if (cur_tree == NULL_TREE)
30914 ret = false;
30916 new_optimize = build_optimization_node (&func_options);
30918 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30920 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30922 finalize_options_struct (&func_options);
30924 return ret;
30927 /* Match an ISA feature bitmap to a named FPU. We always use the
30928 first entry that exactly matches the feature set, so that we
30929 effectively canonicalize the FPU name for the assembler. */
30930 static const char*
30931 arm_identify_fpu_from_isa (sbitmap isa)
30933 auto_sbitmap fpubits (isa_num_bits);
30934 auto_sbitmap cand_fpubits (isa_num_bits);
30936 bitmap_and (fpubits, isa, isa_all_fpubits);
30938 /* If there are no ISA feature bits relating to the FPU, we must be
30939 doing soft-float. */
30940 if (bitmap_empty_p (fpubits))
30941 return "softvfp";
30943 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30945 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30946 if (bitmap_equal_p (fpubits, cand_fpubits))
30947 return all_fpus[i].name;
30949 /* We must find an entry, or things have gone wrong. */
30950 gcc_unreachable ();
30953 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30954 by the function fndecl. */
30955 void
30956 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30958 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30960 struct cl_target_option *targ_options;
30961 if (target_parts)
30962 targ_options = TREE_TARGET_OPTION (target_parts);
30963 else
30964 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30965 gcc_assert (targ_options);
30967 /* Only update the assembler .arch string if it is distinct from the last
30968 such string we printed. arch_to_print is set conditionally in case
30969 targ_options->x_arm_arch_string is NULL which can be the case
30970 when cc1 is invoked directly without passing -march option. */
30971 std::string arch_to_print;
30972 if (targ_options->x_arm_arch_string)
30973 arch_to_print = targ_options->x_arm_arch_string;
30975 if (arch_to_print != arm_last_printed_arch_string)
30977 std::string arch_name
30978 = arch_to_print.substr (0, arch_to_print.find ("+"));
30979 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30980 const arch_option *arch
30981 = arm_parse_arch_option_name (all_architectures, "-march",
30982 targ_options->x_arm_arch_string);
30983 auto_sbitmap opt_bits (isa_num_bits);
30985 gcc_assert (arch);
30986 if (arch->common.extensions)
30988 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30989 opt->name != NULL;
30990 opt++)
30992 if (!opt->remove)
30994 arm_initialize_isa (opt_bits, opt->isa_bits);
30995 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30996 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30997 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30998 opt->name);
31003 arm_last_printed_arch_string = arch_to_print;
31006 fprintf (stream, "\t.syntax unified\n");
31008 if (TARGET_THUMB)
31010 if (is_called_in_ARM_mode (decl)
31011 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31012 && cfun->is_thunk))
31013 fprintf (stream, "\t.code 32\n");
31014 else if (TARGET_THUMB1)
31015 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31016 else
31017 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31019 else
31020 fprintf (stream, "\t.arm\n");
31022 std::string fpu_to_print
31023 = TARGET_SOFT_FLOAT
31024 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31026 if (fpu_to_print != arm_last_printed_arch_string)
31028 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31029 arm_last_printed_fpu_string = fpu_to_print;
31032 if (TARGET_POKE_FUNCTION_NAME)
31033 arm_poke_function_name (stream, (const char *) name);
31036 /* If MEM is in the form of [base+offset], extract the two parts
31037 of address and set to BASE and OFFSET, otherwise return false
31038 after clearing BASE and OFFSET. */
31040 static bool
31041 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31043 rtx addr;
31045 gcc_assert (MEM_P (mem));
31047 addr = XEXP (mem, 0);
31049 /* Strip off const from addresses like (const (addr)). */
31050 if (GET_CODE (addr) == CONST)
31051 addr = XEXP (addr, 0);
31053 if (GET_CODE (addr) == REG)
31055 *base = addr;
31056 *offset = const0_rtx;
31057 return true;
31060 if (GET_CODE (addr) == PLUS
31061 && GET_CODE (XEXP (addr, 0)) == REG
31062 && CONST_INT_P (XEXP (addr, 1)))
31064 *base = XEXP (addr, 0);
31065 *offset = XEXP (addr, 1);
31066 return true;
31069 *base = NULL_RTX;
31070 *offset = NULL_RTX;
31072 return false;
31075 /* If INSN is a load or store of address in the form of [base+offset],
31076 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31077 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31078 otherwise return FALSE. */
31080 static bool
31081 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31083 rtx x, dest, src;
31085 gcc_assert (INSN_P (insn));
31086 x = PATTERN (insn);
31087 if (GET_CODE (x) != SET)
31088 return false;
31090 src = SET_SRC (x);
31091 dest = SET_DEST (x);
31092 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31094 *is_load = false;
31095 extract_base_offset_in_addr (dest, base, offset);
31097 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31099 *is_load = true;
31100 extract_base_offset_in_addr (src, base, offset);
31102 else
31103 return false;
31105 return (*base != NULL_RTX && *offset != NULL_RTX);
31108 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31110 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31111 and PRI are only calculated for these instructions. For other instruction,
31112 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31113 instruction fusion can be supported by returning different priorities.
31115 It's important that irrelevant instructions get the largest FUSION_PRI. */
31117 static void
31118 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31119 int *fusion_pri, int *pri)
31121 int tmp, off_val;
31122 bool is_load;
31123 rtx base, offset;
31125 gcc_assert (INSN_P (insn));
31127 tmp = max_pri - 1;
31128 if (!fusion_load_store (insn, &base, &offset, &is_load))
31130 *pri = tmp;
31131 *fusion_pri = tmp;
31132 return;
31135 /* Load goes first. */
31136 if (is_load)
31137 *fusion_pri = tmp - 1;
31138 else
31139 *fusion_pri = tmp - 2;
31141 tmp /= 2;
31143 /* INSN with smaller base register goes first. */
31144 tmp -= ((REGNO (base) & 0xff) << 20);
31146 /* INSN with smaller offset goes first. */
31147 off_val = (int)(INTVAL (offset));
31148 if (off_val >= 0)
31149 tmp -= (off_val & 0xfffff);
31150 else
31151 tmp += ((- off_val) & 0xfffff);
31153 *pri = tmp;
31154 return;
31158 /* Construct and return a PARALLEL RTX vector with elements numbering the
31159 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31160 the vector - from the perspective of the architecture. This does not
31161 line up with GCC's perspective on lane numbers, so we end up with
31162 different masks depending on our target endian-ness. The diagram
31163 below may help. We must draw the distinction when building masks
31164 which select one half of the vector. An instruction selecting
31165 architectural low-lanes for a big-endian target, must be described using
31166 a mask selecting GCC high-lanes.
31168 Big-Endian Little-Endian
31170 GCC 0 1 2 3 3 2 1 0
31171 | x | x | x | x | | x | x | x | x |
31172 Architecture 3 2 1 0 3 2 1 0
31174 Low Mask: { 2, 3 } { 0, 1 }
31175 High Mask: { 0, 1 } { 2, 3 }
31179 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31181 int nunits = GET_MODE_NUNITS (mode);
31182 rtvec v = rtvec_alloc (nunits / 2);
31183 int high_base = nunits / 2;
31184 int low_base = 0;
31185 int base;
31186 rtx t1;
31187 int i;
31189 if (BYTES_BIG_ENDIAN)
31190 base = high ? low_base : high_base;
31191 else
31192 base = high ? high_base : low_base;
31194 for (i = 0; i < nunits / 2; i++)
31195 RTVEC_ELT (v, i) = GEN_INT (base + i);
31197 t1 = gen_rtx_PARALLEL (mode, v);
31198 return t1;
31201 /* Check OP for validity as a PARALLEL RTX vector with elements
31202 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31203 from the perspective of the architecture. See the diagram above
31204 arm_simd_vect_par_cnst_half_p for more details. */
31206 bool
31207 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31208 bool high)
31210 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31211 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31212 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31213 int i = 0;
31215 if (!VECTOR_MODE_P (mode))
31216 return false;
31218 if (count_op != count_ideal)
31219 return false;
31221 for (i = 0; i < count_ideal; i++)
31223 rtx elt_op = XVECEXP (op, 0, i);
31224 rtx elt_ideal = XVECEXP (ideal, 0, i);
31226 if (!CONST_INT_P (elt_op)
31227 || INTVAL (elt_ideal) != INTVAL (elt_op))
31228 return false;
31230 return true;
31233 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31234 in Thumb1. */
31235 static bool
31236 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31237 const_tree)
31239 /* For now, we punt and not handle this for TARGET_THUMB1. */
31240 if (vcall_offset && TARGET_THUMB1)
31241 return false;
31243 /* Otherwise ok. */
31244 return true;
31247 /* Generate RTL for a conditional branch with rtx comparison CODE in
31248 mode CC_MODE. The destination of the unlikely conditional branch
31249 is LABEL_REF. */
31251 void
31252 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31253 rtx label_ref)
31255 rtx x;
31256 x = gen_rtx_fmt_ee (code, VOIDmode,
31257 gen_rtx_REG (cc_mode, CC_REGNUM),
31258 const0_rtx);
31260 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31261 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31262 pc_rtx);
31263 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31266 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31268 For pure-code sections there is no letter code for this attribute, so
31269 output all the section flags numerically when this is needed. */
31271 static bool
31272 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31275 if (flags & SECTION_ARM_PURECODE)
31277 *num = 0x20000000;
31279 if (!(flags & SECTION_DEBUG))
31280 *num |= 0x2;
31281 if (flags & SECTION_EXCLUDE)
31282 *num |= 0x80000000;
31283 if (flags & SECTION_WRITE)
31284 *num |= 0x1;
31285 if (flags & SECTION_CODE)
31286 *num |= 0x4;
31287 if (flags & SECTION_MERGE)
31288 *num |= 0x10;
31289 if (flags & SECTION_STRINGS)
31290 *num |= 0x20;
31291 if (flags & SECTION_TLS)
31292 *num |= 0x400;
31293 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31294 *num |= 0x200;
31296 return true;
31299 return false;
31302 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31304 If pure-code is passed as an option, make sure all functions are in
31305 sections that have the SHF_ARM_PURECODE attribute. */
31307 static section *
31308 arm_function_section (tree decl, enum node_frequency freq,
31309 bool startup, bool exit)
31311 const char * section_name;
31312 section * sec;
31314 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31315 return default_function_section (decl, freq, startup, exit);
31317 if (!target_pure_code)
31318 return default_function_section (decl, freq, startup, exit);
31321 section_name = DECL_SECTION_NAME (decl);
31323 /* If a function is not in a named section then it falls under the 'default'
31324 text section, also known as '.text'. We can preserve previous behavior as
31325 the default text section already has the SHF_ARM_PURECODE section
31326 attribute. */
31327 if (!section_name)
31329 section *default_sec = default_function_section (decl, freq, startup,
31330 exit);
31332 /* If default_sec is not null, then it must be a special section like for
31333 example .text.startup. We set the pure-code attribute and return the
31334 same section to preserve existing behavior. */
31335 if (default_sec)
31336 default_sec->common.flags |= SECTION_ARM_PURECODE;
31337 return default_sec;
31340 /* Otherwise look whether a section has already been created with
31341 'section_name'. */
31342 sec = get_named_section (decl, section_name, 0);
31343 if (!sec)
31344 /* If that is not the case passing NULL as the section's name to
31345 'get_named_section' will create a section with the declaration's
31346 section name. */
31347 sec = get_named_section (decl, NULL, 0);
31349 /* Set the SHF_ARM_PURECODE attribute. */
31350 sec->common.flags |= SECTION_ARM_PURECODE;
31352 return sec;
31355 /* Implements the TARGET_SECTION_FLAGS hook.
31357 If DECL is a function declaration and pure-code is passed as an option
31358 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31359 section's name and RELOC indicates whether the declarations initializer may
31360 contain runtime relocations. */
31362 static unsigned int
31363 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31365 unsigned int flags = default_section_type_flags (decl, name, reloc);
31367 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31368 flags |= SECTION_ARM_PURECODE;
31370 return flags;
31373 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31375 static void
31376 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31377 rtx op0, rtx op1,
31378 rtx *quot_p, rtx *rem_p)
31380 if (mode == SImode)
31381 gcc_assert (!TARGET_IDIV);
31383 scalar_int_mode libval_mode
31384 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31386 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31387 libval_mode,
31388 op0, GET_MODE (op0),
31389 op1, GET_MODE (op1));
31391 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31392 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31393 GET_MODE_SIZE (mode));
31395 gcc_assert (quotient);
31396 gcc_assert (remainder);
31398 *quot_p = quotient;
31399 *rem_p = remainder;
31402 /* This function checks for the availability of the coprocessor builtin passed
31403 in BUILTIN for the current target. Returns true if it is available and
31404 false otherwise. If a BUILTIN is passed for which this function has not
31405 been implemented it will cause an exception. */
31407 bool
31408 arm_coproc_builtin_available (enum unspecv builtin)
31410 /* None of these builtins are available in Thumb mode if the target only
31411 supports Thumb-1. */
31412 if (TARGET_THUMB1)
31413 return false;
31415 switch (builtin)
31417 case VUNSPEC_CDP:
31418 case VUNSPEC_LDC:
31419 case VUNSPEC_LDCL:
31420 case VUNSPEC_STC:
31421 case VUNSPEC_STCL:
31422 case VUNSPEC_MCR:
31423 case VUNSPEC_MRC:
31424 if (arm_arch4)
31425 return true;
31426 break;
31427 case VUNSPEC_CDP2:
31428 case VUNSPEC_LDC2:
31429 case VUNSPEC_LDC2L:
31430 case VUNSPEC_STC2:
31431 case VUNSPEC_STC2L:
31432 case VUNSPEC_MCR2:
31433 case VUNSPEC_MRC2:
31434 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31435 ARMv8-{A,M}. */
31436 if (arm_arch5)
31437 return true;
31438 break;
31439 case VUNSPEC_MCRR:
31440 case VUNSPEC_MRRC:
31441 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31442 ARMv8-{A,M}. */
31443 if (arm_arch6 || arm_arch5te)
31444 return true;
31445 break;
31446 case VUNSPEC_MCRR2:
31447 case VUNSPEC_MRRC2:
31448 if (arm_arch6)
31449 return true;
31450 break;
31451 default:
31452 gcc_unreachable ();
31454 return false;
31457 /* This function returns true if OP is a valid memory operand for the ldc and
31458 stc coprocessor instructions and false otherwise. */
31460 bool
31461 arm_coproc_ldc_stc_legitimate_address (rtx op)
31463 HOST_WIDE_INT range;
31464 /* Has to be a memory operand. */
31465 if (!MEM_P (op))
31466 return false;
31468 op = XEXP (op, 0);
31470 /* We accept registers. */
31471 if (REG_P (op))
31472 return true;
31474 switch GET_CODE (op)
31476 case PLUS:
31478 /* Or registers with an offset. */
31479 if (!REG_P (XEXP (op, 0)))
31480 return false;
31482 op = XEXP (op, 1);
31484 /* The offset must be an immediate though. */
31485 if (!CONST_INT_P (op))
31486 return false;
31488 range = INTVAL (op);
31490 /* Within the range of [-1020,1020]. */
31491 if (!IN_RANGE (range, -1020, 1020))
31492 return false;
31494 /* And a multiple of 4. */
31495 return (range % 4) == 0;
31497 case PRE_INC:
31498 case POST_INC:
31499 case PRE_DEC:
31500 case POST_DEC:
31501 return REG_P (XEXP (op, 0));
31502 default:
31503 gcc_unreachable ();
31505 return false;
31508 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31510 In VFPv1, VFP registers could only be accessed in the mode they were
31511 set, so subregs would be invalid there. However, we don't support
31512 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31514 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31515 VFP registers in little-endian order. We can't describe that accurately to
31516 GCC, so avoid taking subregs of such values.
31518 The only exception is going from a 128-bit to a 64-bit type. In that
31519 case the data layout happens to be consistent for big-endian, so we
31520 explicitly allow that case. */
31522 static bool
31523 arm_can_change_mode_class (machine_mode from, machine_mode to,
31524 reg_class_t rclass)
31526 if (TARGET_BIG_END
31527 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31528 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31529 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31530 && reg_classes_intersect_p (VFP_REGS, rclass))
31531 return false;
31532 return true;
31535 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31536 strcpy from constants will be faster. */
31538 static HOST_WIDE_INT
31539 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31541 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31542 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31543 return MAX (align, BITS_PER_WORD * factor);
31544 return align;
31547 #if CHECKING_P
31548 namespace selftest {
31550 /* Scan the static data tables generated by parsecpu.awk looking for
31551 potential issues with the data. We primarily check for
31552 inconsistencies in the option extensions at present (extensions
31553 that duplicate others but aren't marked as aliases). Furthermore,
31554 for correct canonicalization later options must never be a subset
31555 of an earlier option. Any extension should also only specify other
31556 feature bits and never an architecture bit. The architecture is inferred
31557 from the declaration of the extension. */
31558 static void
31559 arm_test_cpu_arch_data (void)
31561 const arch_option *arch;
31562 const cpu_option *cpu;
31563 auto_sbitmap target_isa (isa_num_bits);
31564 auto_sbitmap isa1 (isa_num_bits);
31565 auto_sbitmap isa2 (isa_num_bits);
31567 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31569 const cpu_arch_extension *ext1, *ext2;
31571 if (arch->common.extensions == NULL)
31572 continue;
31574 arm_initialize_isa (target_isa, arch->common.isa_bits);
31576 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31578 if (ext1->alias)
31579 continue;
31581 arm_initialize_isa (isa1, ext1->isa_bits);
31582 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31584 if (ext2->alias || ext1->remove != ext2->remove)
31585 continue;
31587 arm_initialize_isa (isa2, ext2->isa_bits);
31588 /* If the option is a subset of the parent option, it doesn't
31589 add anything and so isn't useful. */
31590 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31592 /* If the extension specifies any architectural bits then
31593 disallow it. Extensions should only specify feature bits. */
31594 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31599 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31601 const cpu_arch_extension *ext1, *ext2;
31603 if (cpu->common.extensions == NULL)
31604 continue;
31606 arm_initialize_isa (target_isa, arch->common.isa_bits);
31608 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31610 if (ext1->alias)
31611 continue;
31613 arm_initialize_isa (isa1, ext1->isa_bits);
31614 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31616 if (ext2->alias || ext1->remove != ext2->remove)
31617 continue;
31619 arm_initialize_isa (isa2, ext2->isa_bits);
31620 /* If the option is a subset of the parent option, it doesn't
31621 add anything and so isn't useful. */
31622 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31624 /* If the extension specifies any architectural bits then
31625 disallow it. Extensions should only specify feature bits. */
31626 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31632 /* Scan the static data tables generated by parsecpu.awk looking for
31633 potential issues with the data. Here we check for consistency between the
31634 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31635 a feature bit that is not defined by any FPU flag. */
31636 static void
31637 arm_test_fpu_data (void)
31639 auto_sbitmap isa_all_fpubits (isa_num_bits);
31640 auto_sbitmap fpubits (isa_num_bits);
31641 auto_sbitmap tmpset (isa_num_bits);
31643 static const enum isa_feature fpu_bitlist[]
31644 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31645 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31647 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31649 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31650 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31651 bitmap_clear (isa_all_fpubits);
31652 bitmap_copy (isa_all_fpubits, tmpset);
31655 if (!bitmap_empty_p (isa_all_fpubits))
31657 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31658 " group that are not defined by any FPU.\n"
31659 " Check your arm-cpus.in.\n");
31660 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31664 static void
31665 arm_run_selftests (void)
31667 arm_test_cpu_arch_data ();
31668 arm_test_fpu_data ();
31670 } /* Namespace selftest. */
31672 #undef TARGET_RUN_TARGET_SELFTESTS
31673 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31674 #endif /* CHECKING_P */
31676 struct gcc_target targetm = TARGET_INITIALIZER;
31678 #include "gt-arm.h"