Turn HARD_REGNO_NREGS into a target hook
[official-gcc.git] / gcc / config / arm / arm.c
blob36c9e0b96b685d965bec7e90dcf18c61e99cdbdb
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
78 void (*arm_lang_output_object_attributes_hook)(void);
80 struct four_ints
82 int i[4];
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
291 const unsigned char *sel);
293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
296 tree vectype,
297 int misalign ATTRIBUTE_UNUSED);
298 static unsigned arm_add_stmt_cost (void *data, int count,
299 enum vect_cost_for_stmt kind,
300 struct _stmt_vec_info *stmt_info,
301 int misalign,
302 enum vect_cost_model_location where);
304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
305 bool op0_preserve_value);
306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
310 const_tree);
311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
314 int reloc);
315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
317 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
318 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
319 static bool arm_modes_tieable_p (machine_mode, machine_mode);
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
622 #endif /* ARM_UNWIND_INFO */
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
797 /* Obstack for minipool constant handling. */
798 static struct obstack minipool_obstack;
799 static char * minipool_startobj;
801 /* The maximum number of insns skipped which
802 will be conditionalised if possible. */
803 static int max_insns_skipped = 5;
805 extern FILE * asm_out_file;
807 /* True if we are currently building a constant table. */
808 int making_const_table;
810 /* The processor for which instructions should be scheduled. */
811 enum processor_type arm_tune = TARGET_CPU_arm_none;
813 /* The current tuning set. */
814 const struct tune_params *current_tune;
816 /* Which floating point hardware to schedule for. */
817 int arm_fpu_attr;
819 /* Used for Thumb call_via trampolines. */
820 rtx thumb_call_via_label[14];
821 static int thumb_call_reg_needed;
823 /* The bits in this mask specify which instruction scheduling options should
824 be used. */
825 unsigned int tune_flags = 0;
827 /* The highest ARM architecture version supported by the
828 target. */
829 enum base_architecture arm_base_arch = BASE_ARCH_0;
831 /* Active target architecture and tuning. */
833 struct arm_build_target arm_active_target;
835 /* The following are used in the arm.md file as equivalents to bits
836 in the above two flag variables. */
838 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
839 int arm_arch3m = 0;
841 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
842 int arm_arch4 = 0;
844 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
845 int arm_arch4t = 0;
847 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
848 int arm_arch5 = 0;
850 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
851 int arm_arch5e = 0;
853 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
854 int arm_arch5te = 0;
856 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
857 int arm_arch6 = 0;
859 /* Nonzero if this chip supports the ARM 6K extensions. */
860 int arm_arch6k = 0;
862 /* Nonzero if this chip supports the ARM 6KZ extensions. */
863 int arm_arch6kz = 0;
865 /* Nonzero if instructions present in ARMv6-M can be used. */
866 int arm_arch6m = 0;
868 /* Nonzero if this chip supports the ARM 7 extensions. */
869 int arm_arch7 = 0;
871 /* Nonzero if this chip supports the Large Physical Address Extension. */
872 int arm_arch_lpae = 0;
874 /* Nonzero if instructions not present in the 'M' profile can be used. */
875 int arm_arch_notm = 0;
877 /* Nonzero if instructions present in ARMv7E-M can be used. */
878 int arm_arch7em = 0;
880 /* Nonzero if instructions present in ARMv8 can be used. */
881 int arm_arch8 = 0;
883 /* Nonzero if this chip supports the ARMv8.1 extensions. */
884 int arm_arch8_1 = 0;
886 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
887 int arm_arch8_2 = 0;
889 /* Nonzero if this chip supports the FP16 instructions extension of ARM
890 Architecture 8.2. */
891 int arm_fp16_inst = 0;
893 /* Nonzero if this chip can benefit from load scheduling. */
894 int arm_ld_sched = 0;
896 /* Nonzero if this chip is a StrongARM. */
897 int arm_tune_strongarm = 0;
899 /* Nonzero if this chip supports Intel Wireless MMX technology. */
900 int arm_arch_iwmmxt = 0;
902 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
903 int arm_arch_iwmmxt2 = 0;
905 /* Nonzero if this chip is an XScale. */
906 int arm_arch_xscale = 0;
908 /* Nonzero if tuning for XScale */
909 int arm_tune_xscale = 0;
911 /* Nonzero if we want to tune for stores that access the write-buffer.
912 This typically means an ARM6 or ARM7 with MMU or MPU. */
913 int arm_tune_wbuf = 0;
915 /* Nonzero if tuning for Cortex-A9. */
916 int arm_tune_cortex_a9 = 0;
918 /* Nonzero if we should define __THUMB_INTERWORK__ in the
919 preprocessor.
920 XXX This is a bit of a hack, it's intended to help work around
921 problems in GLD which doesn't understand that armv5t code is
922 interworking clean. */
923 int arm_cpp_interwork = 0;
925 /* Nonzero if chip supports Thumb 1. */
926 int arm_arch_thumb1;
928 /* Nonzero if chip supports Thumb 2. */
929 int arm_arch_thumb2;
931 /* Nonzero if chip supports integer division instruction. */
932 int arm_arch_arm_hwdiv;
933 int arm_arch_thumb_hwdiv;
935 /* Nonzero if chip disallows volatile memory access in IT block. */
936 int arm_arch_no_volatile_ce;
938 /* Nonzero if we should use Neon to handle 64-bits operations rather
939 than core registers. */
940 int prefer_neon_for_64bits = 0;
942 /* Nonzero if we shouldn't use literal pools. */
943 bool arm_disable_literal_pool = false;
945 /* The register number to be used for the PIC offset register. */
946 unsigned arm_pic_register = INVALID_REGNUM;
948 enum arm_pcs arm_pcs_default;
950 /* For an explanation of these variables, see final_prescan_insn below. */
951 int arm_ccfsm_state;
952 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
953 enum arm_cond_code arm_current_cc;
955 rtx arm_target_insn;
956 int arm_target_label;
957 /* The number of conditionally executed insns, including the current insn. */
958 int arm_condexec_count = 0;
959 /* A bitmask specifying the patterns for the IT block.
960 Zero means do not output an IT block before this insn. */
961 int arm_condexec_mask = 0;
962 /* The number of bits used in arm_condexec_mask. */
963 int arm_condexec_masklen = 0;
965 /* Nonzero if chip supports the ARMv8 CRC instructions. */
966 int arm_arch_crc = 0;
968 /* Nonzero if chip supports the ARMv8-M security extensions. */
969 int arm_arch_cmse = 0;
971 /* Nonzero if the core has a very small, high-latency, multiply unit. */
972 int arm_m_profile_small_mul = 0;
974 /* The condition codes of the ARM, and the inverse function. */
975 static const char * const arm_condition_codes[] =
977 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
978 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
981 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
982 int arm_regs_in_sequence[] =
984 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
987 #define ARM_LSL_NAME "lsl"
988 #define streq(string1, string2) (strcmp (string1, string2) == 0)
990 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
991 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
992 | (1 << PIC_OFFSET_TABLE_REGNUM)))
994 /* Initialization code. */
996 struct cpu_tune
998 enum processor_type scheduler;
999 unsigned int tune_flags;
1000 const struct tune_params *tune;
1003 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1004 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1006 num_slots, \
1007 l1_size, \
1008 l1_line_size \
1011 /* arm generic vectorizer costs. */
1012 static const
1013 struct cpu_vec_costs arm_default_vec_cost = {
1014 1, /* scalar_stmt_cost. */
1015 1, /* scalar load_cost. */
1016 1, /* scalar_store_cost. */
1017 1, /* vec_stmt_cost. */
1018 1, /* vec_to_scalar_cost. */
1019 1, /* scalar_to_vec_cost. */
1020 1, /* vec_align_load_cost. */
1021 1, /* vec_unalign_load_cost. */
1022 1, /* vec_unalign_store_cost. */
1023 1, /* vec_store_cost. */
1024 3, /* cond_taken_branch_cost. */
1025 1, /* cond_not_taken_branch_cost. */
1028 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1029 #include "aarch-cost-tables.h"
1033 const struct cpu_cost_table cortexa9_extra_costs =
1035 /* ALU */
1037 0, /* arith. */
1038 0, /* logical. */
1039 0, /* shift. */
1040 COSTS_N_INSNS (1), /* shift_reg. */
1041 COSTS_N_INSNS (1), /* arith_shift. */
1042 COSTS_N_INSNS (2), /* arith_shift_reg. */
1043 0, /* log_shift. */
1044 COSTS_N_INSNS (1), /* log_shift_reg. */
1045 COSTS_N_INSNS (1), /* extend. */
1046 COSTS_N_INSNS (2), /* extend_arith. */
1047 COSTS_N_INSNS (1), /* bfi. */
1048 COSTS_N_INSNS (1), /* bfx. */
1049 0, /* clz. */
1050 0, /* rev. */
1051 0, /* non_exec. */
1052 true /* non_exec_costs_exec. */
1055 /* MULT SImode */
1057 COSTS_N_INSNS (3), /* simple. */
1058 COSTS_N_INSNS (3), /* flag_setting. */
1059 COSTS_N_INSNS (2), /* extend. */
1060 COSTS_N_INSNS (3), /* add. */
1061 COSTS_N_INSNS (2), /* extend_add. */
1062 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1064 /* MULT DImode */
1066 0, /* simple (N/A). */
1067 0, /* flag_setting (N/A). */
1068 COSTS_N_INSNS (4), /* extend. */
1069 0, /* add (N/A). */
1070 COSTS_N_INSNS (4), /* extend_add. */
1071 0 /* idiv (N/A). */
1074 /* LD/ST */
1076 COSTS_N_INSNS (2), /* load. */
1077 COSTS_N_INSNS (2), /* load_sign_extend. */
1078 COSTS_N_INSNS (2), /* ldrd. */
1079 COSTS_N_INSNS (2), /* ldm_1st. */
1080 1, /* ldm_regs_per_insn_1st. */
1081 2, /* ldm_regs_per_insn_subsequent. */
1082 COSTS_N_INSNS (5), /* loadf. */
1083 COSTS_N_INSNS (5), /* loadd. */
1084 COSTS_N_INSNS (1), /* load_unaligned. */
1085 COSTS_N_INSNS (2), /* store. */
1086 COSTS_N_INSNS (2), /* strd. */
1087 COSTS_N_INSNS (2), /* stm_1st. */
1088 1, /* stm_regs_per_insn_1st. */
1089 2, /* stm_regs_per_insn_subsequent. */
1090 COSTS_N_INSNS (1), /* storef. */
1091 COSTS_N_INSNS (1), /* stored. */
1092 COSTS_N_INSNS (1), /* store_unaligned. */
1093 COSTS_N_INSNS (1), /* loadv. */
1094 COSTS_N_INSNS (1) /* storev. */
1097 /* FP SFmode */
1099 COSTS_N_INSNS (14), /* div. */
1100 COSTS_N_INSNS (4), /* mult. */
1101 COSTS_N_INSNS (7), /* mult_addsub. */
1102 COSTS_N_INSNS (30), /* fma. */
1103 COSTS_N_INSNS (3), /* addsub. */
1104 COSTS_N_INSNS (1), /* fpconst. */
1105 COSTS_N_INSNS (1), /* neg. */
1106 COSTS_N_INSNS (3), /* compare. */
1107 COSTS_N_INSNS (3), /* widen. */
1108 COSTS_N_INSNS (3), /* narrow. */
1109 COSTS_N_INSNS (3), /* toint. */
1110 COSTS_N_INSNS (3), /* fromint. */
1111 COSTS_N_INSNS (3) /* roundint. */
1113 /* FP DFmode */
1115 COSTS_N_INSNS (24), /* div. */
1116 COSTS_N_INSNS (5), /* mult. */
1117 COSTS_N_INSNS (8), /* mult_addsub. */
1118 COSTS_N_INSNS (30), /* fma. */
1119 COSTS_N_INSNS (3), /* addsub. */
1120 COSTS_N_INSNS (1), /* fpconst. */
1121 COSTS_N_INSNS (1), /* neg. */
1122 COSTS_N_INSNS (3), /* compare. */
1123 COSTS_N_INSNS (3), /* widen. */
1124 COSTS_N_INSNS (3), /* narrow. */
1125 COSTS_N_INSNS (3), /* toint. */
1126 COSTS_N_INSNS (3), /* fromint. */
1127 COSTS_N_INSNS (3) /* roundint. */
1130 /* Vector */
1132 COSTS_N_INSNS (1) /* alu. */
1136 const struct cpu_cost_table cortexa8_extra_costs =
1138 /* ALU */
1140 0, /* arith. */
1141 0, /* logical. */
1142 COSTS_N_INSNS (1), /* shift. */
1143 0, /* shift_reg. */
1144 COSTS_N_INSNS (1), /* arith_shift. */
1145 0, /* arith_shift_reg. */
1146 COSTS_N_INSNS (1), /* log_shift. */
1147 0, /* log_shift_reg. */
1148 0, /* extend. */
1149 0, /* extend_arith. */
1150 0, /* bfi. */
1151 0, /* bfx. */
1152 0, /* clz. */
1153 0, /* rev. */
1154 0, /* non_exec. */
1155 true /* non_exec_costs_exec. */
1158 /* MULT SImode */
1160 COSTS_N_INSNS (1), /* simple. */
1161 COSTS_N_INSNS (1), /* flag_setting. */
1162 COSTS_N_INSNS (1), /* extend. */
1163 COSTS_N_INSNS (1), /* add. */
1164 COSTS_N_INSNS (1), /* extend_add. */
1165 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1167 /* MULT DImode */
1169 0, /* simple (N/A). */
1170 0, /* flag_setting (N/A). */
1171 COSTS_N_INSNS (2), /* extend. */
1172 0, /* add (N/A). */
1173 COSTS_N_INSNS (2), /* extend_add. */
1174 0 /* idiv (N/A). */
1177 /* LD/ST */
1179 COSTS_N_INSNS (1), /* load. */
1180 COSTS_N_INSNS (1), /* load_sign_extend. */
1181 COSTS_N_INSNS (1), /* ldrd. */
1182 COSTS_N_INSNS (1), /* ldm_1st. */
1183 1, /* ldm_regs_per_insn_1st. */
1184 2, /* ldm_regs_per_insn_subsequent. */
1185 COSTS_N_INSNS (1), /* loadf. */
1186 COSTS_N_INSNS (1), /* loadd. */
1187 COSTS_N_INSNS (1), /* load_unaligned. */
1188 COSTS_N_INSNS (1), /* store. */
1189 COSTS_N_INSNS (1), /* strd. */
1190 COSTS_N_INSNS (1), /* stm_1st. */
1191 1, /* stm_regs_per_insn_1st. */
1192 2, /* stm_regs_per_insn_subsequent. */
1193 COSTS_N_INSNS (1), /* storef. */
1194 COSTS_N_INSNS (1), /* stored. */
1195 COSTS_N_INSNS (1), /* store_unaligned. */
1196 COSTS_N_INSNS (1), /* loadv. */
1197 COSTS_N_INSNS (1) /* storev. */
1200 /* FP SFmode */
1202 COSTS_N_INSNS (36), /* div. */
1203 COSTS_N_INSNS (11), /* mult. */
1204 COSTS_N_INSNS (20), /* mult_addsub. */
1205 COSTS_N_INSNS (30), /* fma. */
1206 COSTS_N_INSNS (9), /* addsub. */
1207 COSTS_N_INSNS (3), /* fpconst. */
1208 COSTS_N_INSNS (3), /* neg. */
1209 COSTS_N_INSNS (6), /* compare. */
1210 COSTS_N_INSNS (4), /* widen. */
1211 COSTS_N_INSNS (4), /* narrow. */
1212 COSTS_N_INSNS (8), /* toint. */
1213 COSTS_N_INSNS (8), /* fromint. */
1214 COSTS_N_INSNS (8) /* roundint. */
1216 /* FP DFmode */
1218 COSTS_N_INSNS (64), /* div. */
1219 COSTS_N_INSNS (16), /* mult. */
1220 COSTS_N_INSNS (25), /* mult_addsub. */
1221 COSTS_N_INSNS (30), /* fma. */
1222 COSTS_N_INSNS (9), /* addsub. */
1223 COSTS_N_INSNS (3), /* fpconst. */
1224 COSTS_N_INSNS (3), /* neg. */
1225 COSTS_N_INSNS (6), /* compare. */
1226 COSTS_N_INSNS (6), /* widen. */
1227 COSTS_N_INSNS (6), /* narrow. */
1228 COSTS_N_INSNS (8), /* toint. */
1229 COSTS_N_INSNS (8), /* fromint. */
1230 COSTS_N_INSNS (8) /* roundint. */
1233 /* Vector */
1235 COSTS_N_INSNS (1) /* alu. */
1239 const struct cpu_cost_table cortexa5_extra_costs =
1241 /* ALU */
1243 0, /* arith. */
1244 0, /* logical. */
1245 COSTS_N_INSNS (1), /* shift. */
1246 COSTS_N_INSNS (1), /* shift_reg. */
1247 COSTS_N_INSNS (1), /* arith_shift. */
1248 COSTS_N_INSNS (1), /* arith_shift_reg. */
1249 COSTS_N_INSNS (1), /* log_shift. */
1250 COSTS_N_INSNS (1), /* log_shift_reg. */
1251 COSTS_N_INSNS (1), /* extend. */
1252 COSTS_N_INSNS (1), /* extend_arith. */
1253 COSTS_N_INSNS (1), /* bfi. */
1254 COSTS_N_INSNS (1), /* bfx. */
1255 COSTS_N_INSNS (1), /* clz. */
1256 COSTS_N_INSNS (1), /* rev. */
1257 0, /* non_exec. */
1258 true /* non_exec_costs_exec. */
1262 /* MULT SImode */
1264 0, /* simple. */
1265 COSTS_N_INSNS (1), /* flag_setting. */
1266 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (1), /* add. */
1268 COSTS_N_INSNS (1), /* extend_add. */
1269 COSTS_N_INSNS (7) /* idiv. */
1271 /* MULT DImode */
1273 0, /* simple (N/A). */
1274 0, /* flag_setting (N/A). */
1275 COSTS_N_INSNS (1), /* extend. */
1276 0, /* add. */
1277 COSTS_N_INSNS (2), /* extend_add. */
1278 0 /* idiv (N/A). */
1281 /* LD/ST */
1283 COSTS_N_INSNS (1), /* load. */
1284 COSTS_N_INSNS (1), /* load_sign_extend. */
1285 COSTS_N_INSNS (6), /* ldrd. */
1286 COSTS_N_INSNS (1), /* ldm_1st. */
1287 1, /* ldm_regs_per_insn_1st. */
1288 2, /* ldm_regs_per_insn_subsequent. */
1289 COSTS_N_INSNS (2), /* loadf. */
1290 COSTS_N_INSNS (4), /* loadd. */
1291 COSTS_N_INSNS (1), /* load_unaligned. */
1292 COSTS_N_INSNS (1), /* store. */
1293 COSTS_N_INSNS (3), /* strd. */
1294 COSTS_N_INSNS (1), /* stm_1st. */
1295 1, /* stm_regs_per_insn_1st. */
1296 2, /* stm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* storef. */
1298 COSTS_N_INSNS (2), /* stored. */
1299 COSTS_N_INSNS (1), /* store_unaligned. */
1300 COSTS_N_INSNS (1), /* loadv. */
1301 COSTS_N_INSNS (1) /* storev. */
1304 /* FP SFmode */
1306 COSTS_N_INSNS (15), /* div. */
1307 COSTS_N_INSNS (3), /* mult. */
1308 COSTS_N_INSNS (7), /* mult_addsub. */
1309 COSTS_N_INSNS (7), /* fma. */
1310 COSTS_N_INSNS (3), /* addsub. */
1311 COSTS_N_INSNS (3), /* fpconst. */
1312 COSTS_N_INSNS (3), /* neg. */
1313 COSTS_N_INSNS (3), /* compare. */
1314 COSTS_N_INSNS (3), /* widen. */
1315 COSTS_N_INSNS (3), /* narrow. */
1316 COSTS_N_INSNS (3), /* toint. */
1317 COSTS_N_INSNS (3), /* fromint. */
1318 COSTS_N_INSNS (3) /* roundint. */
1320 /* FP DFmode */
1322 COSTS_N_INSNS (30), /* div. */
1323 COSTS_N_INSNS (6), /* mult. */
1324 COSTS_N_INSNS (10), /* mult_addsub. */
1325 COSTS_N_INSNS (7), /* fma. */
1326 COSTS_N_INSNS (3), /* addsub. */
1327 COSTS_N_INSNS (3), /* fpconst. */
1328 COSTS_N_INSNS (3), /* neg. */
1329 COSTS_N_INSNS (3), /* compare. */
1330 COSTS_N_INSNS (3), /* widen. */
1331 COSTS_N_INSNS (3), /* narrow. */
1332 COSTS_N_INSNS (3), /* toint. */
1333 COSTS_N_INSNS (3), /* fromint. */
1334 COSTS_N_INSNS (3) /* roundint. */
1337 /* Vector */
1339 COSTS_N_INSNS (1) /* alu. */
1344 const struct cpu_cost_table cortexa7_extra_costs =
1346 /* ALU */
1348 0, /* arith. */
1349 0, /* logical. */
1350 COSTS_N_INSNS (1), /* shift. */
1351 COSTS_N_INSNS (1), /* shift_reg. */
1352 COSTS_N_INSNS (1), /* arith_shift. */
1353 COSTS_N_INSNS (1), /* arith_shift_reg. */
1354 COSTS_N_INSNS (1), /* log_shift. */
1355 COSTS_N_INSNS (1), /* log_shift_reg. */
1356 COSTS_N_INSNS (1), /* extend. */
1357 COSTS_N_INSNS (1), /* extend_arith. */
1358 COSTS_N_INSNS (1), /* bfi. */
1359 COSTS_N_INSNS (1), /* bfx. */
1360 COSTS_N_INSNS (1), /* clz. */
1361 COSTS_N_INSNS (1), /* rev. */
1362 0, /* non_exec. */
1363 true /* non_exec_costs_exec. */
1367 /* MULT SImode */
1369 0, /* simple. */
1370 COSTS_N_INSNS (1), /* flag_setting. */
1371 COSTS_N_INSNS (1), /* extend. */
1372 COSTS_N_INSNS (1), /* add. */
1373 COSTS_N_INSNS (1), /* extend_add. */
1374 COSTS_N_INSNS (7) /* idiv. */
1376 /* MULT DImode */
1378 0, /* simple (N/A). */
1379 0, /* flag_setting (N/A). */
1380 COSTS_N_INSNS (1), /* extend. */
1381 0, /* add. */
1382 COSTS_N_INSNS (2), /* extend_add. */
1383 0 /* idiv (N/A). */
1386 /* LD/ST */
1388 COSTS_N_INSNS (1), /* load. */
1389 COSTS_N_INSNS (1), /* load_sign_extend. */
1390 COSTS_N_INSNS (3), /* ldrd. */
1391 COSTS_N_INSNS (1), /* ldm_1st. */
1392 1, /* ldm_regs_per_insn_1st. */
1393 2, /* ldm_regs_per_insn_subsequent. */
1394 COSTS_N_INSNS (2), /* loadf. */
1395 COSTS_N_INSNS (2), /* loadd. */
1396 COSTS_N_INSNS (1), /* load_unaligned. */
1397 COSTS_N_INSNS (1), /* store. */
1398 COSTS_N_INSNS (3), /* strd. */
1399 COSTS_N_INSNS (1), /* stm_1st. */
1400 1, /* stm_regs_per_insn_1st. */
1401 2, /* stm_regs_per_insn_subsequent. */
1402 COSTS_N_INSNS (2), /* storef. */
1403 COSTS_N_INSNS (2), /* stored. */
1404 COSTS_N_INSNS (1), /* store_unaligned. */
1405 COSTS_N_INSNS (1), /* loadv. */
1406 COSTS_N_INSNS (1) /* storev. */
1409 /* FP SFmode */
1411 COSTS_N_INSNS (15), /* div. */
1412 COSTS_N_INSNS (3), /* mult. */
1413 COSTS_N_INSNS (7), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1425 /* FP DFmode */
1427 COSTS_N_INSNS (30), /* div. */
1428 COSTS_N_INSNS (6), /* mult. */
1429 COSTS_N_INSNS (10), /* mult_addsub. */
1430 COSTS_N_INSNS (7), /* fma. */
1431 COSTS_N_INSNS (3), /* addsub. */
1432 COSTS_N_INSNS (3), /* fpconst. */
1433 COSTS_N_INSNS (3), /* neg. */
1434 COSTS_N_INSNS (3), /* compare. */
1435 COSTS_N_INSNS (3), /* widen. */
1436 COSTS_N_INSNS (3), /* narrow. */
1437 COSTS_N_INSNS (3), /* toint. */
1438 COSTS_N_INSNS (3), /* fromint. */
1439 COSTS_N_INSNS (3) /* roundint. */
1442 /* Vector */
1444 COSTS_N_INSNS (1) /* alu. */
1448 const struct cpu_cost_table cortexa12_extra_costs =
1450 /* ALU */
1452 0, /* arith. */
1453 0, /* logical. */
1454 0, /* shift. */
1455 COSTS_N_INSNS (1), /* shift_reg. */
1456 COSTS_N_INSNS (1), /* arith_shift. */
1457 COSTS_N_INSNS (1), /* arith_shift_reg. */
1458 COSTS_N_INSNS (1), /* log_shift. */
1459 COSTS_N_INSNS (1), /* log_shift_reg. */
1460 0, /* extend. */
1461 COSTS_N_INSNS (1), /* extend_arith. */
1462 0, /* bfi. */
1463 COSTS_N_INSNS (1), /* bfx. */
1464 COSTS_N_INSNS (1), /* clz. */
1465 COSTS_N_INSNS (1), /* rev. */
1466 0, /* non_exec. */
1467 true /* non_exec_costs_exec. */
1469 /* MULT SImode */
1472 COSTS_N_INSNS (2), /* simple. */
1473 COSTS_N_INSNS (3), /* flag_setting. */
1474 COSTS_N_INSNS (2), /* extend. */
1475 COSTS_N_INSNS (3), /* add. */
1476 COSTS_N_INSNS (2), /* extend_add. */
1477 COSTS_N_INSNS (18) /* idiv. */
1479 /* MULT DImode */
1481 0, /* simple (N/A). */
1482 0, /* flag_setting (N/A). */
1483 COSTS_N_INSNS (3), /* extend. */
1484 0, /* add (N/A). */
1485 COSTS_N_INSNS (3), /* extend_add. */
1486 0 /* idiv (N/A). */
1489 /* LD/ST */
1491 COSTS_N_INSNS (3), /* load. */
1492 COSTS_N_INSNS (3), /* load_sign_extend. */
1493 COSTS_N_INSNS (3), /* ldrd. */
1494 COSTS_N_INSNS (3), /* ldm_1st. */
1495 1, /* ldm_regs_per_insn_1st. */
1496 2, /* ldm_regs_per_insn_subsequent. */
1497 COSTS_N_INSNS (3), /* loadf. */
1498 COSTS_N_INSNS (3), /* loadd. */
1499 0, /* load_unaligned. */
1500 0, /* store. */
1501 0, /* strd. */
1502 0, /* stm_1st. */
1503 1, /* stm_regs_per_insn_1st. */
1504 2, /* stm_regs_per_insn_subsequent. */
1505 COSTS_N_INSNS (2), /* storef. */
1506 COSTS_N_INSNS (2), /* stored. */
1507 0, /* store_unaligned. */
1508 COSTS_N_INSNS (1), /* loadv. */
1509 COSTS_N_INSNS (1) /* storev. */
1512 /* FP SFmode */
1514 COSTS_N_INSNS (17), /* div. */
1515 COSTS_N_INSNS (4), /* mult. */
1516 COSTS_N_INSNS (8), /* mult_addsub. */
1517 COSTS_N_INSNS (8), /* fma. */
1518 COSTS_N_INSNS (4), /* addsub. */
1519 COSTS_N_INSNS (2), /* fpconst. */
1520 COSTS_N_INSNS (2), /* neg. */
1521 COSTS_N_INSNS (2), /* compare. */
1522 COSTS_N_INSNS (4), /* widen. */
1523 COSTS_N_INSNS (4), /* narrow. */
1524 COSTS_N_INSNS (4), /* toint. */
1525 COSTS_N_INSNS (4), /* fromint. */
1526 COSTS_N_INSNS (4) /* roundint. */
1528 /* FP DFmode */
1530 COSTS_N_INSNS (31), /* div. */
1531 COSTS_N_INSNS (4), /* mult. */
1532 COSTS_N_INSNS (8), /* mult_addsub. */
1533 COSTS_N_INSNS (8), /* fma. */
1534 COSTS_N_INSNS (4), /* addsub. */
1535 COSTS_N_INSNS (2), /* fpconst. */
1536 COSTS_N_INSNS (2), /* neg. */
1537 COSTS_N_INSNS (2), /* compare. */
1538 COSTS_N_INSNS (4), /* widen. */
1539 COSTS_N_INSNS (4), /* narrow. */
1540 COSTS_N_INSNS (4), /* toint. */
1541 COSTS_N_INSNS (4), /* fromint. */
1542 COSTS_N_INSNS (4) /* roundint. */
1545 /* Vector */
1547 COSTS_N_INSNS (1) /* alu. */
1551 const struct cpu_cost_table cortexa15_extra_costs =
1553 /* ALU */
1555 0, /* arith. */
1556 0, /* logical. */
1557 0, /* shift. */
1558 0, /* shift_reg. */
1559 COSTS_N_INSNS (1), /* arith_shift. */
1560 COSTS_N_INSNS (1), /* arith_shift_reg. */
1561 COSTS_N_INSNS (1), /* log_shift. */
1562 COSTS_N_INSNS (1), /* log_shift_reg. */
1563 0, /* extend. */
1564 COSTS_N_INSNS (1), /* extend_arith. */
1565 COSTS_N_INSNS (1), /* bfi. */
1566 0, /* bfx. */
1567 0, /* clz. */
1568 0, /* rev. */
1569 0, /* non_exec. */
1570 true /* non_exec_costs_exec. */
1572 /* MULT SImode */
1575 COSTS_N_INSNS (2), /* simple. */
1576 COSTS_N_INSNS (3), /* flag_setting. */
1577 COSTS_N_INSNS (2), /* extend. */
1578 COSTS_N_INSNS (2), /* add. */
1579 COSTS_N_INSNS (2), /* extend_add. */
1580 COSTS_N_INSNS (18) /* idiv. */
1582 /* MULT DImode */
1584 0, /* simple (N/A). */
1585 0, /* flag_setting (N/A). */
1586 COSTS_N_INSNS (3), /* extend. */
1587 0, /* add (N/A). */
1588 COSTS_N_INSNS (3), /* extend_add. */
1589 0 /* idiv (N/A). */
1592 /* LD/ST */
1594 COSTS_N_INSNS (3), /* load. */
1595 COSTS_N_INSNS (3), /* load_sign_extend. */
1596 COSTS_N_INSNS (3), /* ldrd. */
1597 COSTS_N_INSNS (4), /* ldm_1st. */
1598 1, /* ldm_regs_per_insn_1st. */
1599 2, /* ldm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (4), /* loadf. */
1601 COSTS_N_INSNS (4), /* loadd. */
1602 0, /* load_unaligned. */
1603 0, /* store. */
1604 0, /* strd. */
1605 COSTS_N_INSNS (1), /* stm_1st. */
1606 1, /* stm_regs_per_insn_1st. */
1607 2, /* stm_regs_per_insn_subsequent. */
1608 0, /* storef. */
1609 0, /* stored. */
1610 0, /* store_unaligned. */
1611 COSTS_N_INSNS (1), /* loadv. */
1612 COSTS_N_INSNS (1) /* storev. */
1615 /* FP SFmode */
1617 COSTS_N_INSNS (17), /* div. */
1618 COSTS_N_INSNS (4), /* mult. */
1619 COSTS_N_INSNS (8), /* mult_addsub. */
1620 COSTS_N_INSNS (8), /* fma. */
1621 COSTS_N_INSNS (4), /* addsub. */
1622 COSTS_N_INSNS (2), /* fpconst. */
1623 COSTS_N_INSNS (2), /* neg. */
1624 COSTS_N_INSNS (5), /* compare. */
1625 COSTS_N_INSNS (4), /* widen. */
1626 COSTS_N_INSNS (4), /* narrow. */
1627 COSTS_N_INSNS (4), /* toint. */
1628 COSTS_N_INSNS (4), /* fromint. */
1629 COSTS_N_INSNS (4) /* roundint. */
1631 /* FP DFmode */
1633 COSTS_N_INSNS (31), /* div. */
1634 COSTS_N_INSNS (4), /* mult. */
1635 COSTS_N_INSNS (8), /* mult_addsub. */
1636 COSTS_N_INSNS (8), /* fma. */
1637 COSTS_N_INSNS (4), /* addsub. */
1638 COSTS_N_INSNS (2), /* fpconst. */
1639 COSTS_N_INSNS (2), /* neg. */
1640 COSTS_N_INSNS (2), /* compare. */
1641 COSTS_N_INSNS (4), /* widen. */
1642 COSTS_N_INSNS (4), /* narrow. */
1643 COSTS_N_INSNS (4), /* toint. */
1644 COSTS_N_INSNS (4), /* fromint. */
1645 COSTS_N_INSNS (4) /* roundint. */
1648 /* Vector */
1650 COSTS_N_INSNS (1) /* alu. */
1654 const struct cpu_cost_table v7m_extra_costs =
1656 /* ALU */
1658 0, /* arith. */
1659 0, /* logical. */
1660 0, /* shift. */
1661 0, /* shift_reg. */
1662 0, /* arith_shift. */
1663 COSTS_N_INSNS (1), /* arith_shift_reg. */
1664 0, /* log_shift. */
1665 COSTS_N_INSNS (1), /* log_shift_reg. */
1666 0, /* extend. */
1667 COSTS_N_INSNS (1), /* extend_arith. */
1668 0, /* bfi. */
1669 0, /* bfx. */
1670 0, /* clz. */
1671 0, /* rev. */
1672 COSTS_N_INSNS (1), /* non_exec. */
1673 false /* non_exec_costs_exec. */
1676 /* MULT SImode */
1678 COSTS_N_INSNS (1), /* simple. */
1679 COSTS_N_INSNS (1), /* flag_setting. */
1680 COSTS_N_INSNS (2), /* extend. */
1681 COSTS_N_INSNS (1), /* add. */
1682 COSTS_N_INSNS (3), /* extend_add. */
1683 COSTS_N_INSNS (8) /* idiv. */
1685 /* MULT DImode */
1687 0, /* simple (N/A). */
1688 0, /* flag_setting (N/A). */
1689 COSTS_N_INSNS (2), /* extend. */
1690 0, /* add (N/A). */
1691 COSTS_N_INSNS (3), /* extend_add. */
1692 0 /* idiv (N/A). */
1695 /* LD/ST */
1697 COSTS_N_INSNS (2), /* load. */
1698 0, /* load_sign_extend. */
1699 COSTS_N_INSNS (3), /* ldrd. */
1700 COSTS_N_INSNS (2), /* ldm_1st. */
1701 1, /* ldm_regs_per_insn_1st. */
1702 1, /* ldm_regs_per_insn_subsequent. */
1703 COSTS_N_INSNS (2), /* loadf. */
1704 COSTS_N_INSNS (3), /* loadd. */
1705 COSTS_N_INSNS (1), /* load_unaligned. */
1706 COSTS_N_INSNS (2), /* store. */
1707 COSTS_N_INSNS (3), /* strd. */
1708 COSTS_N_INSNS (2), /* stm_1st. */
1709 1, /* stm_regs_per_insn_1st. */
1710 1, /* stm_regs_per_insn_subsequent. */
1711 COSTS_N_INSNS (2), /* storef. */
1712 COSTS_N_INSNS (3), /* stored. */
1713 COSTS_N_INSNS (1), /* store_unaligned. */
1714 COSTS_N_INSNS (1), /* loadv. */
1715 COSTS_N_INSNS (1) /* storev. */
1718 /* FP SFmode */
1720 COSTS_N_INSNS (7), /* div. */
1721 COSTS_N_INSNS (2), /* mult. */
1722 COSTS_N_INSNS (5), /* mult_addsub. */
1723 COSTS_N_INSNS (3), /* fma. */
1724 COSTS_N_INSNS (1), /* addsub. */
1725 0, /* fpconst. */
1726 0, /* neg. */
1727 0, /* compare. */
1728 0, /* widen. */
1729 0, /* narrow. */
1730 0, /* toint. */
1731 0, /* fromint. */
1732 0 /* roundint. */
1734 /* FP DFmode */
1736 COSTS_N_INSNS (15), /* div. */
1737 COSTS_N_INSNS (5), /* mult. */
1738 COSTS_N_INSNS (7), /* mult_addsub. */
1739 COSTS_N_INSNS (7), /* fma. */
1740 COSTS_N_INSNS (3), /* addsub. */
1741 0, /* fpconst. */
1742 0, /* neg. */
1743 0, /* compare. */
1744 0, /* widen. */
1745 0, /* narrow. */
1746 0, /* toint. */
1747 0, /* fromint. */
1748 0 /* roundint. */
1751 /* Vector */
1753 COSTS_N_INSNS (1) /* alu. */
1757 const struct tune_params arm_slowmul_tune =
1759 &generic_extra_costs, /* Insn extra costs. */
1760 NULL, /* Sched adj cost. */
1761 arm_default_branch_cost,
1762 &arm_default_vec_cost,
1763 3, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 8, /* Memset max inline. */
1766 1, /* Issue rate. */
1767 ARM_PREFETCH_NOT_BENEFICIAL,
1768 tune_params::PREF_CONST_POOL_TRUE,
1769 tune_params::PREF_LDRD_FALSE,
1770 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1772 tune_params::DISPARAGE_FLAGS_NEITHER,
1773 tune_params::PREF_NEON_64_FALSE,
1774 tune_params::PREF_NEON_STRINGOPS_FALSE,
1775 tune_params::FUSE_NOTHING,
1776 tune_params::SCHED_AUTOPREF_OFF
1779 const struct tune_params arm_fastmul_tune =
1781 &generic_extra_costs, /* Insn extra costs. */
1782 NULL, /* Sched adj cost. */
1783 arm_default_branch_cost,
1784 &arm_default_vec_cost,
1785 1, /* Constant limit. */
1786 5, /* Max cond insns. */
1787 8, /* Memset max inline. */
1788 1, /* Issue rate. */
1789 ARM_PREFETCH_NOT_BENEFICIAL,
1790 tune_params::PREF_CONST_POOL_TRUE,
1791 tune_params::PREF_LDRD_FALSE,
1792 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1793 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1794 tune_params::DISPARAGE_FLAGS_NEITHER,
1795 tune_params::PREF_NEON_64_FALSE,
1796 tune_params::PREF_NEON_STRINGOPS_FALSE,
1797 tune_params::FUSE_NOTHING,
1798 tune_params::SCHED_AUTOPREF_OFF
1801 /* StrongARM has early execution of branches, so a sequence that is worth
1802 skipping is shorter. Set max_insns_skipped to a lower value. */
1804 const struct tune_params arm_strongarm_tune =
1806 &generic_extra_costs, /* Insn extra costs. */
1807 NULL, /* Sched adj cost. */
1808 arm_default_branch_cost,
1809 &arm_default_vec_cost,
1810 1, /* Constant limit. */
1811 3, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 1, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL,
1815 tune_params::PREF_CONST_POOL_TRUE,
1816 tune_params::PREF_LDRD_FALSE,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER,
1820 tune_params::PREF_NEON_64_FALSE,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE,
1822 tune_params::FUSE_NOTHING,
1823 tune_params::SCHED_AUTOPREF_OFF
1826 const struct tune_params arm_xscale_tune =
1828 &generic_extra_costs, /* Insn extra costs. */
1829 xscale_sched_adjust_cost,
1830 arm_default_branch_cost,
1831 &arm_default_vec_cost,
1832 2, /* Constant limit. */
1833 3, /* Max cond insns. */
1834 8, /* Memset max inline. */
1835 1, /* Issue rate. */
1836 ARM_PREFETCH_NOT_BENEFICIAL,
1837 tune_params::PREF_CONST_POOL_TRUE,
1838 tune_params::PREF_LDRD_FALSE,
1839 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1841 tune_params::DISPARAGE_FLAGS_NEITHER,
1842 tune_params::PREF_NEON_64_FALSE,
1843 tune_params::PREF_NEON_STRINGOPS_FALSE,
1844 tune_params::FUSE_NOTHING,
1845 tune_params::SCHED_AUTOPREF_OFF
1848 const struct tune_params arm_9e_tune =
1850 &generic_extra_costs, /* Insn extra costs. */
1851 NULL, /* Sched adj cost. */
1852 arm_default_branch_cost,
1853 &arm_default_vec_cost,
1854 1, /* Constant limit. */
1855 5, /* Max cond insns. */
1856 8, /* Memset max inline. */
1857 1, /* Issue rate. */
1858 ARM_PREFETCH_NOT_BENEFICIAL,
1859 tune_params::PREF_CONST_POOL_TRUE,
1860 tune_params::PREF_LDRD_FALSE,
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1863 tune_params::DISPARAGE_FLAGS_NEITHER,
1864 tune_params::PREF_NEON_64_FALSE,
1865 tune_params::PREF_NEON_STRINGOPS_FALSE,
1866 tune_params::FUSE_NOTHING,
1867 tune_params::SCHED_AUTOPREF_OFF
1870 const struct tune_params arm_marvell_pj4_tune =
1872 &generic_extra_costs, /* Insn extra costs. */
1873 NULL, /* Sched adj cost. */
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 2, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_TRUE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1892 const struct tune_params arm_v6t2_tune =
1894 &generic_extra_costs, /* Insn extra costs. */
1895 NULL, /* Sched adj cost. */
1896 arm_default_branch_cost,
1897 &arm_default_vec_cost,
1898 1, /* Constant limit. */
1899 5, /* Max cond insns. */
1900 8, /* Memset max inline. */
1901 1, /* Issue rate. */
1902 ARM_PREFETCH_NOT_BENEFICIAL,
1903 tune_params::PREF_CONST_POOL_FALSE,
1904 tune_params::PREF_LDRD_FALSE,
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1907 tune_params::DISPARAGE_FLAGS_NEITHER,
1908 tune_params::PREF_NEON_64_FALSE,
1909 tune_params::PREF_NEON_STRINGOPS_FALSE,
1910 tune_params::FUSE_NOTHING,
1911 tune_params::SCHED_AUTOPREF_OFF
1915 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1916 const struct tune_params arm_cortex_tune =
1918 &generic_extra_costs,
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_FALSE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_FALSE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_cortex_a8_tune =
1940 &cortexa8_extra_costs,
1941 NULL, /* Sched adj cost. */
1942 arm_default_branch_cost,
1943 &arm_default_vec_cost,
1944 1, /* Constant limit. */
1945 5, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 2, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 tune_params::PREF_CONST_POOL_FALSE,
1950 tune_params::PREF_LDRD_FALSE,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER,
1954 tune_params::PREF_NEON_64_FALSE,
1955 tune_params::PREF_NEON_STRINGOPS_TRUE,
1956 tune_params::FUSE_NOTHING,
1957 tune_params::SCHED_AUTOPREF_OFF
1960 const struct tune_params arm_cortex_a7_tune =
1962 &cortexa7_extra_costs,
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 2, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_FALSE,
1972 tune_params::PREF_LDRD_FALSE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER,
1976 tune_params::PREF_NEON_64_FALSE,
1977 tune_params::PREF_NEON_STRINGOPS_TRUE,
1978 tune_params::FUSE_NOTHING,
1979 tune_params::SCHED_AUTOPREF_OFF
1982 const struct tune_params arm_cortex_a15_tune =
1984 &cortexa15_extra_costs,
1985 NULL, /* Sched adj cost. */
1986 arm_default_branch_cost,
1987 &arm_default_vec_cost,
1988 1, /* Constant limit. */
1989 2, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 3, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 tune_params::PREF_CONST_POOL_FALSE,
1994 tune_params::PREF_LDRD_TRUE,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_ALL,
1998 tune_params::PREF_NEON_64_FALSE,
1999 tune_params::PREF_NEON_STRINGOPS_TRUE,
2000 tune_params::FUSE_NOTHING,
2001 tune_params::SCHED_AUTOPREF_FULL
2004 const struct tune_params arm_cortex_a35_tune =
2006 &cortexa53_extra_costs,
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 1, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a53_tune =
2028 &cortexa53_extra_costs,
2029 NULL, /* Sched adj cost. */
2030 arm_default_branch_cost,
2031 &arm_default_vec_cost,
2032 1, /* Constant limit. */
2033 5, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 2, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL,
2037 tune_params::PREF_CONST_POOL_FALSE,
2038 tune_params::PREF_LDRD_FALSE,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_NEITHER,
2042 tune_params::PREF_NEON_64_FALSE,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE,
2044 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2045 tune_params::SCHED_AUTOPREF_OFF
2048 const struct tune_params arm_cortex_a57_tune =
2050 &cortexa57_extra_costs,
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 2, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 3, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_TRUE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_ALL,
2064 tune_params::PREF_NEON_64_FALSE,
2065 tune_params::PREF_NEON_STRINGOPS_TRUE,
2066 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2067 tune_params::SCHED_AUTOPREF_FULL
2070 const struct tune_params arm_exynosm1_tune =
2072 &exynosm1_extra_costs,
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 2, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 3, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_TRUE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_ALL,
2086 tune_params::PREF_NEON_64_FALSE,
2087 tune_params::PREF_NEON_STRINGOPS_TRUE,
2088 tune_params::FUSE_NOTHING,
2089 tune_params::SCHED_AUTOPREF_OFF
2092 const struct tune_params arm_xgene1_tune =
2094 &xgene1_extra_costs,
2095 NULL, /* Sched adj cost. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost,
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 32, /* Memset max inline. */
2101 4, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL,
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_TRUE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_ALL,
2108 tune_params::PREF_NEON_64_FALSE,
2109 tune_params::PREF_NEON_STRINGOPS_FALSE,
2110 tune_params::FUSE_NOTHING,
2111 tune_params::SCHED_AUTOPREF_OFF
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115 less appealing. Set max_insns_skipped to a low value. */
2117 const struct tune_params arm_cortex_a5_tune =
2119 &cortexa5_extra_costs,
2120 NULL, /* Sched adj cost. */
2121 arm_cortex_a5_branch_cost,
2122 &arm_default_vec_cost,
2123 1, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL,
2128 tune_params::PREF_CONST_POOL_FALSE,
2129 tune_params::PREF_LDRD_FALSE,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER,
2133 tune_params::PREF_NEON_64_FALSE,
2134 tune_params::PREF_NEON_STRINGOPS_TRUE,
2135 tune_params::FUSE_NOTHING,
2136 tune_params::SCHED_AUTOPREF_OFF
2139 const struct tune_params arm_cortex_a9_tune =
2141 &cortexa9_extra_costs,
2142 cortex_a9_sched_adjust_cost,
2143 arm_default_branch_cost,
2144 &arm_default_vec_cost,
2145 1, /* Constant limit. */
2146 5, /* Max cond insns. */
2147 8, /* Memset max inline. */
2148 2, /* Issue rate. */
2149 ARM_PREFETCH_BENEFICIAL(4,32,32),
2150 tune_params::PREF_CONST_POOL_FALSE,
2151 tune_params::PREF_LDRD_FALSE,
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2154 tune_params::DISPARAGE_FLAGS_NEITHER,
2155 tune_params::PREF_NEON_64_FALSE,
2156 tune_params::PREF_NEON_STRINGOPS_FALSE,
2157 tune_params::FUSE_NOTHING,
2158 tune_params::SCHED_AUTOPREF_OFF
2161 const struct tune_params arm_cortex_a12_tune =
2163 &cortexa12_extra_costs,
2164 NULL, /* Sched adj cost. */
2165 arm_default_branch_cost,
2166 &arm_default_vec_cost, /* Vectorizer costs. */
2167 1, /* Constant limit. */
2168 2, /* Max cond insns. */
2169 8, /* Memset max inline. */
2170 2, /* Issue rate. */
2171 ARM_PREFETCH_NOT_BENEFICIAL,
2172 tune_params::PREF_CONST_POOL_FALSE,
2173 tune_params::PREF_LDRD_TRUE,
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2176 tune_params::DISPARAGE_FLAGS_ALL,
2177 tune_params::PREF_NEON_64_FALSE,
2178 tune_params::PREF_NEON_STRINGOPS_TRUE,
2179 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2180 tune_params::SCHED_AUTOPREF_OFF
2183 const struct tune_params arm_cortex_a73_tune =
2185 &cortexa57_extra_costs,
2186 NULL, /* Sched adj cost. */
2187 arm_default_branch_cost,
2188 &arm_default_vec_cost, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 2, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 2, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL,
2194 tune_params::PREF_CONST_POOL_FALSE,
2195 tune_params::PREF_LDRD_TRUE,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_ALL,
2199 tune_params::PREF_NEON_64_FALSE,
2200 tune_params::PREF_NEON_STRINGOPS_TRUE,
2201 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2202 tune_params::SCHED_AUTOPREF_FULL
2205 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2206 cycle to execute each. An LDR from the constant pool also takes two cycles
2207 to execute, but mildly increases pipelining opportunity (consecutive
2208 loads/stores can be pipelined together, saving one cycle), and may also
2209 improve icache utilisation. Hence we prefer the constant pool for such
2210 processors. */
2212 const struct tune_params arm_v7m_tune =
2214 &v7m_extra_costs,
2215 NULL, /* Sched adj cost. */
2216 arm_cortex_m_branch_cost,
2217 &arm_default_vec_cost,
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 1, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL,
2223 tune_params::PREF_CONST_POOL_TRUE,
2224 tune_params::PREF_LDRD_FALSE,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_NEITHER,
2228 tune_params::PREF_NEON_64_FALSE,
2229 tune_params::PREF_NEON_STRINGOPS_FALSE,
2230 tune_params::FUSE_NOTHING,
2231 tune_params::SCHED_AUTOPREF_OFF
2234 /* Cortex-M7 tuning. */
2236 const struct tune_params arm_cortex_m7_tune =
2238 &v7m_extra_costs,
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_m7_branch_cost,
2241 &arm_default_vec_cost,
2242 0, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_TRUE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_64_FALSE,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE,
2254 tune_params::FUSE_NOTHING,
2255 tune_params::SCHED_AUTOPREF_OFF
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2260 cortex-m23. */
2261 const struct tune_params arm_v6m_tune =
2263 &generic_extra_costs, /* Insn extra costs. */
2264 NULL, /* Sched adj cost. */
2265 arm_default_branch_cost,
2266 &arm_default_vec_cost, /* Vectorizer costs. */
2267 1, /* Constant limit. */
2268 5, /* Max cond insns. */
2269 8, /* Memset max inline. */
2270 1, /* Issue rate. */
2271 ARM_PREFETCH_NOT_BENEFICIAL,
2272 tune_params::PREF_CONST_POOL_FALSE,
2273 tune_params::PREF_LDRD_FALSE,
2274 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2276 tune_params::DISPARAGE_FLAGS_NEITHER,
2277 tune_params::PREF_NEON_64_FALSE,
2278 tune_params::PREF_NEON_STRINGOPS_FALSE,
2279 tune_params::FUSE_NOTHING,
2280 tune_params::SCHED_AUTOPREF_OFF
2283 const struct tune_params arm_fa726te_tune =
2285 &generic_extra_costs, /* Insn extra costs. */
2286 fa726te_sched_adjust_cost,
2287 arm_default_branch_cost,
2288 &arm_default_vec_cost,
2289 1, /* Constant limit. */
2290 5, /* Max cond insns. */
2291 8, /* Memset max inline. */
2292 2, /* Issue rate. */
2293 ARM_PREFETCH_NOT_BENEFICIAL,
2294 tune_params::PREF_CONST_POOL_TRUE,
2295 tune_params::PREF_LDRD_FALSE,
2296 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2298 tune_params::DISPARAGE_FLAGS_NEITHER,
2299 tune_params::PREF_NEON_64_FALSE,
2300 tune_params::PREF_NEON_STRINGOPS_FALSE,
2301 tune_params::FUSE_NOTHING,
2302 tune_params::SCHED_AUTOPREF_OFF
2305 /* Auto-generated CPU, FPU and architecture tables. */
2306 #include "arm-cpu-data.h"
2308 /* The name of the preprocessor macro to define for this architecture. PROFILE
2309 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310 is thus chosen to be big enough to hold the longest architecture name. */
2312 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2314 /* Supported TLS relocations. */
2316 enum tls_reloc {
2317 TLS_GD32,
2318 TLS_LDM32,
2319 TLS_LDO32,
2320 TLS_IE32,
2321 TLS_LE32,
2322 TLS_DESCSEQ /* GNU scheme */
2325 /* The maximum number of insns to be used when loading a constant. */
2326 inline static int
2327 arm_constant_limit (bool size_p)
2329 return size_p ? 1 : current_tune->constant_limit;
2332 /* Emit an insn that's a simple single-set. Both the operands must be known
2333 to be valid. */
2334 inline static rtx_insn *
2335 emit_set_insn (rtx x, rtx y)
2337 return emit_insn (gen_rtx_SET (x, y));
2340 /* Return the number of bits set in VALUE. */
2341 static unsigned
2342 bit_count (unsigned long value)
2344 unsigned long count = 0;
2346 while (value)
2348 count++;
2349 value &= value - 1; /* Clear the least-significant set bit. */
2352 return count;
2355 /* Return the number of bits set in BMAP. */
2356 static unsigned
2357 bitmap_popcount (const sbitmap bmap)
2359 unsigned int count = 0;
2360 unsigned int n = 0;
2361 sbitmap_iterator sbi;
2363 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2364 count++;
2365 return count;
2368 typedef struct
2370 machine_mode mode;
2371 const char *name;
2372 } arm_fixed_mode_set;
2374 /* A small helper for setting fixed-point library libfuncs. */
2376 static void
2377 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2378 const char *funcname, const char *modename,
2379 int num_suffix)
2381 char buffer[50];
2383 if (num_suffix == 0)
2384 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2385 else
2386 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2388 set_optab_libfunc (optable, mode, buffer);
2391 static void
2392 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2393 machine_mode from, const char *funcname,
2394 const char *toname, const char *fromname)
2396 char buffer[50];
2397 const char *maybe_suffix_2 = "";
2399 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2400 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2401 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2402 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2403 maybe_suffix_2 = "2";
2405 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2406 maybe_suffix_2);
2408 set_conv_libfunc (optable, to, from, buffer);
2411 /* Set up library functions unique to ARM. */
2413 static void
2414 arm_init_libfuncs (void)
2416 /* For Linux, we have access to kernel support for atomic operations. */
2417 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2418 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2420 /* There are no special library functions unless we are using the
2421 ARM BPABI. */
2422 if (!TARGET_BPABI)
2423 return;
2425 /* The functions below are described in Section 4 of the "Run-Time
2426 ABI for the ARM architecture", Version 1.0. */
2428 /* Double-precision floating-point arithmetic. Table 2. */
2429 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2430 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2431 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2432 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2433 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2435 /* Double-precision comparisons. Table 3. */
2436 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2437 set_optab_libfunc (ne_optab, DFmode, NULL);
2438 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2439 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2440 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2441 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2442 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2444 /* Single-precision floating-point arithmetic. Table 4. */
2445 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2446 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2447 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2448 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2449 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2451 /* Single-precision comparisons. Table 5. */
2452 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2453 set_optab_libfunc (ne_optab, SFmode, NULL);
2454 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2455 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2456 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2457 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2458 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2460 /* Floating-point to integer conversions. Table 6. */
2461 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2462 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2463 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2464 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2465 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2466 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2467 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2468 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2470 /* Conversions between floating types. Table 7. */
2471 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2472 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2474 /* Integer to floating-point conversions. Table 8. */
2475 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2476 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2477 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2478 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2479 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2480 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2481 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2482 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2484 /* Long long. Table 9. */
2485 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2486 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2487 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2488 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2489 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2490 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2491 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2492 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2494 /* Integer (32/32->32) division. \S 4.3.1. */
2495 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2496 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2498 /* The divmod functions are designed so that they can be used for
2499 plain division, even though they return both the quotient and the
2500 remainder. The quotient is returned in the usual location (i.e.,
2501 r0 for SImode, {r0, r1} for DImode), just as would be expected
2502 for an ordinary division routine. Because the AAPCS calling
2503 conventions specify that all of { r0, r1, r2, r3 } are
2504 callee-saved registers, there is no need to tell the compiler
2505 explicitly that those registers are clobbered by these
2506 routines. */
2507 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2508 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2510 /* For SImode division the ABI provides div-without-mod routines,
2511 which are faster. */
2512 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2513 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2515 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2516 divmod libcalls instead. */
2517 set_optab_libfunc (smod_optab, DImode, NULL);
2518 set_optab_libfunc (umod_optab, DImode, NULL);
2519 set_optab_libfunc (smod_optab, SImode, NULL);
2520 set_optab_libfunc (umod_optab, SImode, NULL);
2522 /* Half-precision float operations. The compiler handles all operations
2523 with NULL libfuncs by converting the SFmode. */
2524 switch (arm_fp16_format)
2526 case ARM_FP16_FORMAT_IEEE:
2527 case ARM_FP16_FORMAT_ALTERNATIVE:
2529 /* Conversions. */
2530 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2531 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2532 ? "__gnu_f2h_ieee"
2533 : "__gnu_f2h_alternative"));
2534 set_conv_libfunc (sext_optab, SFmode, HFmode,
2535 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2536 ? "__gnu_h2f_ieee"
2537 : "__gnu_h2f_alternative"));
2539 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_d2h_ieee"
2542 : "__gnu_d2h_alternative"));
2544 /* Arithmetic. */
2545 set_optab_libfunc (add_optab, HFmode, NULL);
2546 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2547 set_optab_libfunc (smul_optab, HFmode, NULL);
2548 set_optab_libfunc (neg_optab, HFmode, NULL);
2549 set_optab_libfunc (sub_optab, HFmode, NULL);
2551 /* Comparisons. */
2552 set_optab_libfunc (eq_optab, HFmode, NULL);
2553 set_optab_libfunc (ne_optab, HFmode, NULL);
2554 set_optab_libfunc (lt_optab, HFmode, NULL);
2555 set_optab_libfunc (le_optab, HFmode, NULL);
2556 set_optab_libfunc (ge_optab, HFmode, NULL);
2557 set_optab_libfunc (gt_optab, HFmode, NULL);
2558 set_optab_libfunc (unord_optab, HFmode, NULL);
2559 break;
2561 default:
2562 break;
2565 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 const arm_fixed_mode_set fixed_arith_modes[] =
2569 { E_QQmode, "qq" },
2570 { E_UQQmode, "uqq" },
2571 { E_HQmode, "hq" },
2572 { E_UHQmode, "uhq" },
2573 { E_SQmode, "sq" },
2574 { E_USQmode, "usq" },
2575 { E_DQmode, "dq" },
2576 { E_UDQmode, "udq" },
2577 { E_TQmode, "tq" },
2578 { E_UTQmode, "utq" },
2579 { E_HAmode, "ha" },
2580 { E_UHAmode, "uha" },
2581 { E_SAmode, "sa" },
2582 { E_USAmode, "usa" },
2583 { E_DAmode, "da" },
2584 { E_UDAmode, "uda" },
2585 { E_TAmode, "ta" },
2586 { E_UTAmode, "uta" }
2588 const arm_fixed_mode_set fixed_conv_modes[] =
2590 { E_QQmode, "qq" },
2591 { E_UQQmode, "uqq" },
2592 { E_HQmode, "hq" },
2593 { E_UHQmode, "uhq" },
2594 { E_SQmode, "sq" },
2595 { E_USQmode, "usq" },
2596 { E_DQmode, "dq" },
2597 { E_UDQmode, "udq" },
2598 { E_TQmode, "tq" },
2599 { E_UTQmode, "utq" },
2600 { E_HAmode, "ha" },
2601 { E_UHAmode, "uha" },
2602 { E_SAmode, "sa" },
2603 { E_USAmode, "usa" },
2604 { E_DAmode, "da" },
2605 { E_UDAmode, "uda" },
2606 { E_TAmode, "ta" },
2607 { E_UTAmode, "uta" },
2608 { E_QImode, "qi" },
2609 { E_HImode, "hi" },
2610 { E_SImode, "si" },
2611 { E_DImode, "di" },
2612 { E_TImode, "ti" },
2613 { E_SFmode, "sf" },
2614 { E_DFmode, "df" }
2616 unsigned int i, j;
2618 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2620 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2621 "add", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2623 "ssadd", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2625 "usadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2627 "sub", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2629 "sssub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2631 "ussub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2633 "mul", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2635 "ssmul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2637 "usmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2639 "div", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2641 "udiv", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2643 "ssdiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2645 "usdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2647 "neg", fixed_arith_modes[i].name, 2);
2648 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2649 "ssneg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2651 "usneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2653 "ashl", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2655 "ashr", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2657 "lshr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2659 "ssashl", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2661 "usashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2663 "cmp", fixed_arith_modes[i].name, 2);
2666 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2667 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2669 if (i == j
2670 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2671 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2672 continue;
2674 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2675 fixed_conv_modes[j].mode, "fract",
2676 fixed_conv_modes[i].name,
2677 fixed_conv_modes[j].name);
2678 arm_set_fixed_conv_libfunc (satfract_optab,
2679 fixed_conv_modes[i].mode,
2680 fixed_conv_modes[j].mode, "satfract",
2681 fixed_conv_modes[i].name,
2682 fixed_conv_modes[j].name);
2683 arm_set_fixed_conv_libfunc (fractuns_optab,
2684 fixed_conv_modes[i].mode,
2685 fixed_conv_modes[j].mode, "fractuns",
2686 fixed_conv_modes[i].name,
2687 fixed_conv_modes[j].name);
2688 arm_set_fixed_conv_libfunc (satfractuns_optab,
2689 fixed_conv_modes[i].mode,
2690 fixed_conv_modes[j].mode, "satfractuns",
2691 fixed_conv_modes[i].name,
2692 fixed_conv_modes[j].name);
2696 if (TARGET_AAPCS_BASED)
2697 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2700 /* On AAPCS systems, this is the "struct __va_list". */
2701 static GTY(()) tree va_list_type;
2703 /* Return the type to use as __builtin_va_list. */
2704 static tree
2705 arm_build_builtin_va_list (void)
2707 tree va_list_name;
2708 tree ap_field;
2710 if (!TARGET_AAPCS_BASED)
2711 return std_build_builtin_va_list ();
2713 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2714 defined as:
2716 struct __va_list
2718 void *__ap;
2721 The C Library ABI further reinforces this definition in \S
2722 4.1.
2724 We must follow this definition exactly. The structure tag
2725 name is visible in C++ mangled names, and thus forms a part
2726 of the ABI. The field name may be used by people who
2727 #include <stdarg.h>. */
2728 /* Create the type. */
2729 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2730 /* Give it the required name. */
2731 va_list_name = build_decl (BUILTINS_LOCATION,
2732 TYPE_DECL,
2733 get_identifier ("__va_list"),
2734 va_list_type);
2735 DECL_ARTIFICIAL (va_list_name) = 1;
2736 TYPE_NAME (va_list_type) = va_list_name;
2737 TYPE_STUB_DECL (va_list_type) = va_list_name;
2738 /* Create the __ap field. */
2739 ap_field = build_decl (BUILTINS_LOCATION,
2740 FIELD_DECL,
2741 get_identifier ("__ap"),
2742 ptr_type_node);
2743 DECL_ARTIFICIAL (ap_field) = 1;
2744 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2745 TYPE_FIELDS (va_list_type) = ap_field;
2746 /* Compute its layout. */
2747 layout_type (va_list_type);
2749 return va_list_type;
2752 /* Return an expression of type "void *" pointing to the next
2753 available argument in a variable-argument list. VALIST is the
2754 user-level va_list object, of type __builtin_va_list. */
2755 static tree
2756 arm_extract_valist_ptr (tree valist)
2758 if (TREE_TYPE (valist) == error_mark_node)
2759 return error_mark_node;
2761 /* On an AAPCS target, the pointer is stored within "struct
2762 va_list". */
2763 if (TARGET_AAPCS_BASED)
2765 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2766 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2767 valist, ap_field, NULL_TREE);
2770 return valist;
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2774 static void
2775 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2777 valist = arm_extract_valist_ptr (valist);
2778 std_expand_builtin_va_start (valist, nextarg);
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2782 static tree
2783 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2784 gimple_seq *post_p)
2786 valist = arm_extract_valist_ptr (valist);
2787 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2790 /* Check any incompatible options that the user has specified. */
2791 static void
2792 arm_option_check_internal (struct gcc_options *opts)
2794 int flags = opts->x_target_flags;
2796 /* iWMMXt and NEON are incompatible. */
2797 if (TARGET_IWMMXT
2798 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2799 error ("iWMMXt and NEON are incompatible");
2801 /* Make sure that the processor choice does not conflict with any of the
2802 other command line choices. */
2803 if (TARGET_ARM_P (flags)
2804 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2805 error ("target CPU does not support ARM mode");
2807 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2808 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2809 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2812 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814 /* If this target is normally configured to use APCS frames, warn if they
2815 are turned off and debugging is turned on. */
2816 if (TARGET_ARM_P (flags)
2817 && write_symbols != NO_DEBUG
2818 && !TARGET_APCS_FRAME
2819 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2820 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822 /* iWMMXt unsupported under Thumb mode. */
2823 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2824 error ("iWMMXt unsupported under Thumb mode");
2826 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2827 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2831 error ("RTP PIC is incompatible with Thumb");
2832 flag_pic = 0;
2835 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2836 with MOVT. */
2837 if ((target_pure_code || target_slow_flash_data)
2838 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2840 const char *flag = (target_pure_code ? "-mpure-code" :
2841 "-mslow-flash-data");
2842 error ("%s only supports non-pic code on M-profile targets with the "
2843 "MOVT instruction", flag);
2848 /* Recompute the global settings depending on target attribute options. */
2850 static void
2851 arm_option_params_internal (void)
2853 /* If we are not using the default (ARM mode) section anchor offset
2854 ranges, then set the correct ranges now. */
2855 if (TARGET_THUMB1)
2857 /* Thumb-1 LDR instructions cannot have negative offsets.
2858 Permissible positive offset ranges are 5-bit (for byte loads),
2859 6-bit (for halfword loads), or 7-bit (for word loads).
2860 Empirical results suggest a 7-bit anchor range gives the best
2861 overall code size. */
2862 targetm.min_anchor_offset = 0;
2863 targetm.max_anchor_offset = 127;
2865 else if (TARGET_THUMB2)
2867 /* The minimum is set such that the total size of the block
2868 for a particular anchor is 248 + 1 + 4095 bytes, which is
2869 divisible by eight, ensuring natural spacing of anchors. */
2870 targetm.min_anchor_offset = -248;
2871 targetm.max_anchor_offset = 4095;
2873 else
2875 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2876 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2879 /* Increase the number of conditional instructions with -Os. */
2880 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2882 /* For THUMB2, we limit the conditional sequence to one IT block. */
2883 if (TARGET_THUMB2)
2884 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2887 /* True if -mflip-thumb should next add an attribute for the default
2888 mode, false if it should next add an attribute for the opposite mode. */
2889 static GTY(()) bool thumb_flipper;
2891 /* Options after initial target override. */
2892 static GTY(()) tree init_optimize;
2894 static void
2895 arm_override_options_after_change_1 (struct gcc_options *opts)
2897 if (opts->x_align_functions <= 0)
2898 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2899 && opts->x_optimize_size ? 2 : 4;
2902 /* Implement targetm.override_options_after_change. */
2904 static void
2905 arm_override_options_after_change (void)
2907 arm_configure_build_target (&arm_active_target,
2908 TREE_TARGET_OPTION (target_option_default_node),
2909 &global_options_set, false);
2911 arm_override_options_after_change_1 (&global_options);
2914 /* Implement TARGET_OPTION_SAVE. */
2915 static void
2916 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2918 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2919 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2920 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2923 /* Implement TARGET_OPTION_RESTORE. */
2924 static void
2925 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2927 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2928 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2929 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2930 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2931 false);
2934 /* Reset options between modes that the user has specified. */
2935 static void
2936 arm_option_override_internal (struct gcc_options *opts,
2937 struct gcc_options *opts_set)
2939 arm_override_options_after_change_1 (opts);
2941 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2943 /* The default is to enable interworking, so this warning message would
2944 be confusing to users who have just compiled with, eg, -march=armv3. */
2945 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2946 opts->x_target_flags &= ~MASK_INTERWORK;
2949 if (TARGET_THUMB_P (opts->x_target_flags)
2950 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2952 warning (0, "target CPU does not support THUMB instructions");
2953 opts->x_target_flags &= ~MASK_THUMB;
2956 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2958 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2959 opts->x_target_flags &= ~MASK_APCS_FRAME;
2962 /* Callee super interworking implies thumb interworking. Adding
2963 this to the flags here simplifies the logic elsewhere. */
2964 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2965 opts->x_target_flags |= MASK_INTERWORK;
2967 /* need to remember initial values so combinaisons of options like
2968 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2969 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2971 if (! opts_set->x_arm_restrict_it)
2972 opts->x_arm_restrict_it = arm_arch8;
2974 /* ARM execution state and M profile don't have [restrict] IT. */
2975 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2976 opts->x_arm_restrict_it = 0;
2978 /* Enable -munaligned-access by default for
2979 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2980 i.e. Thumb2 and ARM state only.
2981 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2982 - ARMv8 architecture-base processors.
2984 Disable -munaligned-access by default for
2985 - all pre-ARMv6 architecture-based processors
2986 - ARMv6-M architecture-based processors
2987 - ARMv8-M Baseline processors. */
2989 if (! opts_set->x_unaligned_access)
2991 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2992 && arm_arch6 && (arm_arch_notm || arm_arch7));
2994 else if (opts->x_unaligned_access == 1
2995 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2997 warning (0, "target CPU does not support unaligned accesses");
2998 opts->x_unaligned_access = 0;
3001 /* Don't warn since it's on by default in -O2. */
3002 if (TARGET_THUMB1_P (opts->x_target_flags))
3003 opts->x_flag_schedule_insns = 0;
3004 else
3005 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3007 /* Disable shrink-wrap when optimizing function for size, since it tends to
3008 generate additional returns. */
3009 if (optimize_function_for_size_p (cfun)
3010 && TARGET_THUMB2_P (opts->x_target_flags))
3011 opts->x_flag_shrink_wrap = false;
3012 else
3013 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3015 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3016 - epilogue_insns - does not accurately model the corresponding insns
3017 emitted in the asm file. In particular, see the comment in thumb_exit
3018 'Find out how many of the (return) argument registers we can corrupt'.
3019 As a consequence, the epilogue may clobber registers without fipa-ra
3020 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3021 TODO: Accurately model clobbers for epilogue_insns and reenable
3022 fipa-ra. */
3023 if (TARGET_THUMB1_P (opts->x_target_flags))
3024 opts->x_flag_ipa_ra = 0;
3025 else
3026 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3028 /* Thumb2 inline assembly code should always use unified syntax.
3029 This will apply to ARM and Thumb1 eventually. */
3030 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3032 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3033 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3034 #endif
3037 static sbitmap isa_all_fpubits;
3038 static sbitmap isa_quirkbits;
3040 /* Configure a build target TARGET from the user-specified options OPTS and
3041 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3042 architecture have been specified, but the two are not identical. */
3043 void
3044 arm_configure_build_target (struct arm_build_target *target,
3045 struct cl_target_option *opts,
3046 struct gcc_options *opts_set,
3047 bool warn_compatible)
3049 const cpu_option *arm_selected_tune = NULL;
3050 const arch_option *arm_selected_arch = NULL;
3051 const cpu_option *arm_selected_cpu = NULL;
3052 const arm_fpu_desc *arm_selected_fpu = NULL;
3053 const char *tune_opts = NULL;
3054 const char *arch_opts = NULL;
3055 const char *cpu_opts = NULL;
3057 bitmap_clear (target->isa);
3058 target->core_name = NULL;
3059 target->arch_name = NULL;
3061 if (opts_set->x_arm_arch_string)
3063 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3064 "-march",
3065 opts->x_arm_arch_string);
3066 arch_opts = strchr (opts->x_arm_arch_string, '+');
3069 if (opts_set->x_arm_cpu_string)
3071 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3072 opts->x_arm_cpu_string);
3073 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3074 arm_selected_tune = arm_selected_cpu;
3075 /* If taking the tuning from -mcpu, we don't need to rescan the
3076 options for tuning. */
3079 if (opts_set->x_arm_tune_string)
3081 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3082 opts->x_arm_tune_string);
3083 tune_opts = strchr (opts->x_arm_tune_string, '+');
3086 if (arm_selected_arch)
3088 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3089 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3090 arch_opts);
3092 if (arm_selected_cpu)
3094 auto_sbitmap cpu_isa (isa_num_bits);
3095 auto_sbitmap isa_delta (isa_num_bits);
3097 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3098 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3099 cpu_opts);
3100 bitmap_xor (isa_delta, cpu_isa, target->isa);
3101 /* Ignore any bits that are quirk bits. */
3102 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3103 /* Ignore (for now) any bits that might be set by -mfpu. */
3104 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3106 if (!bitmap_empty_p (isa_delta))
3108 if (warn_compatible)
3109 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3110 arm_selected_cpu->common.name,
3111 arm_selected_arch->common.name);
3112 /* -march wins for code generation.
3113 -mcpu wins for default tuning. */
3114 if (!arm_selected_tune)
3115 arm_selected_tune = arm_selected_cpu;
3117 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3118 target->arch_name = arm_selected_arch->common.name;
3120 else
3122 /* Architecture and CPU are essentially the same.
3123 Prefer the CPU setting. */
3124 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3125 target->core_name = arm_selected_cpu->common.name;
3126 /* Copy the CPU's capabilities, so that we inherit the
3127 appropriate extensions and quirks. */
3128 bitmap_copy (target->isa, cpu_isa);
3131 else
3133 /* Pick a CPU based on the architecture. */
3134 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3135 target->arch_name = arm_selected_arch->common.name;
3136 /* Note: target->core_name is left unset in this path. */
3139 else if (arm_selected_cpu)
3141 target->core_name = arm_selected_cpu->common.name;
3142 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3143 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3144 cpu_opts);
3145 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3147 /* If the user did not specify a processor or architecture, choose
3148 one for them. */
3149 else
3151 const cpu_option *sel;
3152 auto_sbitmap sought_isa (isa_num_bits);
3153 bitmap_clear (sought_isa);
3154 auto_sbitmap default_isa (isa_num_bits);
3156 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3157 TARGET_CPU_DEFAULT);
3158 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3159 gcc_assert (arm_selected_cpu->common.name);
3161 /* RWE: All of the selection logic below (to the end of this
3162 'if' clause) looks somewhat suspect. It appears to be mostly
3163 there to support forcing thumb support when the default CPU
3164 does not have thumb (somewhat dubious in terms of what the
3165 user might be expecting). I think it should be removed once
3166 support for the pre-thumb era cores is removed. */
3167 sel = arm_selected_cpu;
3168 arm_initialize_isa (default_isa, sel->common.isa_bits);
3169 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3170 cpu_opts);
3172 /* Now check to see if the user has specified any command line
3173 switches that require certain abilities from the cpu. */
3175 if (TARGET_INTERWORK || TARGET_THUMB)
3177 bitmap_set_bit (sought_isa, isa_bit_thumb);
3178 bitmap_set_bit (sought_isa, isa_bit_mode32);
3180 /* There are no ARM processors that support both APCS-26 and
3181 interworking. Therefore we forcibly remove MODE26 from
3182 from the isa features here (if it was set), so that the
3183 search below will always be able to find a compatible
3184 processor. */
3185 bitmap_clear_bit (default_isa, isa_bit_mode26);
3188 /* If there are such requirements and the default CPU does not
3189 satisfy them, we need to run over the complete list of
3190 cores looking for one that is satisfactory. */
3191 if (!bitmap_empty_p (sought_isa)
3192 && !bitmap_subset_p (sought_isa, default_isa))
3194 auto_sbitmap candidate_isa (isa_num_bits);
3195 /* We're only interested in a CPU with at least the
3196 capabilities of the default CPU and the required
3197 additional features. */
3198 bitmap_ior (default_isa, default_isa, sought_isa);
3200 /* Try to locate a CPU type that supports all of the abilities
3201 of the default CPU, plus the extra abilities requested by
3202 the user. */
3203 for (sel = all_cores; sel->common.name != NULL; sel++)
3205 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3206 /* An exact match? */
3207 if (bitmap_equal_p (default_isa, candidate_isa))
3208 break;
3211 if (sel->common.name == NULL)
3213 unsigned current_bit_count = isa_num_bits;
3214 const cpu_option *best_fit = NULL;
3216 /* Ideally we would like to issue an error message here
3217 saying that it was not possible to find a CPU compatible
3218 with the default CPU, but which also supports the command
3219 line options specified by the programmer, and so they
3220 ought to use the -mcpu=<name> command line option to
3221 override the default CPU type.
3223 If we cannot find a CPU that has exactly the
3224 characteristics of the default CPU and the given
3225 command line options we scan the array again looking
3226 for a best match. The best match must have at least
3227 the capabilities of the perfect match. */
3228 for (sel = all_cores; sel->common.name != NULL; sel++)
3230 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3232 if (bitmap_subset_p (default_isa, candidate_isa))
3234 unsigned count;
3236 bitmap_and_compl (candidate_isa, candidate_isa,
3237 default_isa);
3238 count = bitmap_popcount (candidate_isa);
3240 if (count < current_bit_count)
3242 best_fit = sel;
3243 current_bit_count = count;
3247 gcc_assert (best_fit);
3248 sel = best_fit;
3251 arm_selected_cpu = sel;
3254 /* Now we know the CPU, we can finally initialize the target
3255 structure. */
3256 target->core_name = arm_selected_cpu->common.name;
3257 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3258 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3259 cpu_opts);
3260 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3263 gcc_assert (arm_selected_cpu);
3264 gcc_assert (arm_selected_arch);
3266 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3268 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3269 auto_sbitmap fpu_bits (isa_num_bits);
3271 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3272 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3273 bitmap_ior (target->isa, target->isa, fpu_bits);
3276 if (!arm_selected_tune)
3277 arm_selected_tune = arm_selected_cpu;
3278 else /* Validate the features passed to -mtune. */
3279 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3281 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3283 /* Finish initializing the target structure. */
3284 target->arch_pp_name = arm_selected_arch->arch;
3285 target->base_arch = arm_selected_arch->base_arch;
3286 target->profile = arm_selected_arch->profile;
3288 target->tune_flags = tune_data->tune_flags;
3289 target->tune = tune_data->tune;
3290 target->tune_core = tune_data->scheduler;
3293 /* Fix up any incompatible options that the user has specified. */
3294 static void
3295 arm_option_override (void)
3297 static const enum isa_feature fpu_bitlist[]
3298 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3299 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3300 cl_target_option opts;
3302 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3303 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3305 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3306 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3308 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3310 if (!global_options_set.x_arm_fpu_index)
3312 bool ok;
3313 int fpu_index;
3315 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3316 CL_TARGET);
3317 gcc_assert (ok);
3318 arm_fpu_index = (enum fpu_type) fpu_index;
3321 cl_target_option_save (&opts, &global_options);
3322 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3323 true);
3325 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3326 SUBTARGET_OVERRIDE_OPTIONS;
3327 #endif
3329 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3330 arm_base_arch = arm_active_target.base_arch;
3332 arm_tune = arm_active_target.tune_core;
3333 tune_flags = arm_active_target.tune_flags;
3334 current_tune = arm_active_target.tune;
3336 /* TBD: Dwarf info for apcs frame is not handled yet. */
3337 if (TARGET_APCS_FRAME)
3338 flag_shrink_wrap = false;
3340 /* BPABI targets use linker tricks to allow interworking on cores
3341 without thumb support. */
3342 if (TARGET_INTERWORK
3343 && !TARGET_BPABI
3344 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3346 warning (0, "target CPU does not support interworking" );
3347 target_flags &= ~MASK_INTERWORK;
3350 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3352 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3353 target_flags |= MASK_APCS_FRAME;
3356 if (TARGET_POKE_FUNCTION_NAME)
3357 target_flags |= MASK_APCS_FRAME;
3359 if (TARGET_APCS_REENT && flag_pic)
3360 error ("-fpic and -mapcs-reent are incompatible");
3362 if (TARGET_APCS_REENT)
3363 warning (0, "APCS reentrant code not supported. Ignored");
3365 /* Initialize boolean versions of the architectural flags, for use
3366 in the arm.md file. */
3367 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3368 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3369 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3370 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3371 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3372 arm_arch5te = arm_arch5e
3373 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3374 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3375 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3376 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3377 arm_arch6m = arm_arch6 && !arm_arch_notm;
3378 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3379 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3380 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3381 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3382 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3383 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3384 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3385 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3386 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3387 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3388 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3389 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3390 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3391 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3392 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3393 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3394 if (arm_fp16_inst)
3396 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3397 error ("selected fp16 options are incompatible");
3398 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3402 /* Set up some tuning parameters. */
3403 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3404 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3405 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3406 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3407 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3408 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3410 /* And finally, set up some quirks. */
3411 arm_arch_no_volatile_ce
3412 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3413 arm_arch6kz
3414 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3416 /* V5 code we generate is completely interworking capable, so we turn off
3417 TARGET_INTERWORK here to avoid many tests later on. */
3419 /* XXX However, we must pass the right pre-processor defines to CPP
3420 or GLD can get confused. This is a hack. */
3421 if (TARGET_INTERWORK)
3422 arm_cpp_interwork = 1;
3424 if (arm_arch5)
3425 target_flags &= ~MASK_INTERWORK;
3427 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3428 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3430 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3431 error ("iwmmxt abi requires an iwmmxt capable cpu");
3433 /* If soft-float is specified then don't use FPU. */
3434 if (TARGET_SOFT_FLOAT)
3435 arm_fpu_attr = FPU_NONE;
3436 else
3437 arm_fpu_attr = FPU_VFP;
3439 if (TARGET_AAPCS_BASED)
3441 if (TARGET_CALLER_INTERWORKING)
3442 error ("AAPCS does not support -mcaller-super-interworking");
3443 else
3444 if (TARGET_CALLEE_INTERWORKING)
3445 error ("AAPCS does not support -mcallee-super-interworking");
3448 /* __fp16 support currently assumes the core has ldrh. */
3449 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3450 sorry ("__fp16 and no ldrh");
3452 if (TARGET_AAPCS_BASED)
3454 if (arm_abi == ARM_ABI_IWMMXT)
3455 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3456 else if (TARGET_HARD_FLOAT_ABI)
3458 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3459 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3460 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3462 else
3463 arm_pcs_default = ARM_PCS_AAPCS;
3465 else
3467 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3468 sorry ("-mfloat-abi=hard and VFP");
3470 if (arm_abi == ARM_ABI_APCS)
3471 arm_pcs_default = ARM_PCS_APCS;
3472 else
3473 arm_pcs_default = ARM_PCS_ATPCS;
3476 /* For arm2/3 there is no need to do any scheduling if we are doing
3477 software floating-point. */
3478 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3479 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3481 /* Use the cp15 method if it is available. */
3482 if (target_thread_pointer == TP_AUTO)
3484 if (arm_arch6k && !TARGET_THUMB1)
3485 target_thread_pointer = TP_CP15;
3486 else
3487 target_thread_pointer = TP_SOFT;
3490 /* Override the default structure alignment for AAPCS ABI. */
3491 if (!global_options_set.x_arm_structure_size_boundary)
3493 if (TARGET_AAPCS_BASED)
3494 arm_structure_size_boundary = 8;
3496 else
3498 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3500 if (arm_structure_size_boundary != 8
3501 && arm_structure_size_boundary != 32
3502 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3504 if (ARM_DOUBLEWORD_ALIGN)
3505 warning (0,
3506 "structure size boundary can only be set to 8, 32 or 64");
3507 else
3508 warning (0, "structure size boundary can only be set to 8 or 32");
3509 arm_structure_size_boundary
3510 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3514 if (TARGET_VXWORKS_RTP)
3516 if (!global_options_set.x_arm_pic_data_is_text_relative)
3517 arm_pic_data_is_text_relative = 0;
3519 else if (flag_pic
3520 && !arm_pic_data_is_text_relative
3521 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3522 /* When text & data segments don't have a fixed displacement, the
3523 intended use is with a single, read only, pic base register.
3524 Unless the user explicitly requested not to do that, set
3525 it. */
3526 target_flags |= MASK_SINGLE_PIC_BASE;
3528 /* If stack checking is disabled, we can use r10 as the PIC register,
3529 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3530 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3532 if (TARGET_VXWORKS_RTP)
3533 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3534 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3537 if (flag_pic && TARGET_VXWORKS_RTP)
3538 arm_pic_register = 9;
3540 if (arm_pic_register_string != NULL)
3542 int pic_register = decode_reg_name (arm_pic_register_string);
3544 if (!flag_pic)
3545 warning (0, "-mpic-register= is useless without -fpic");
3547 /* Prevent the user from choosing an obviously stupid PIC register. */
3548 else if (pic_register < 0 || call_used_regs[pic_register]
3549 || pic_register == HARD_FRAME_POINTER_REGNUM
3550 || pic_register == STACK_POINTER_REGNUM
3551 || pic_register >= PC_REGNUM
3552 || (TARGET_VXWORKS_RTP
3553 && (unsigned int) pic_register != arm_pic_register))
3554 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3555 else
3556 arm_pic_register = pic_register;
3559 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3560 if (fix_cm3_ldrd == 2)
3562 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3563 fix_cm3_ldrd = 1;
3564 else
3565 fix_cm3_ldrd = 0;
3568 /* Hot/Cold partitioning is not currently supported, since we can't
3569 handle literal pool placement in that case. */
3570 if (flag_reorder_blocks_and_partition)
3572 inform (input_location,
3573 "-freorder-blocks-and-partition not supported on this architecture");
3574 flag_reorder_blocks_and_partition = 0;
3575 flag_reorder_blocks = 1;
3578 if (flag_pic)
3579 /* Hoisting PIC address calculations more aggressively provides a small,
3580 but measurable, size reduction for PIC code. Therefore, we decrease
3581 the bar for unrestricted expression hoisting to the cost of PIC address
3582 calculation, which is 2 instructions. */
3583 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3584 global_options.x_param_values,
3585 global_options_set.x_param_values);
3587 /* ARM EABI defaults to strict volatile bitfields. */
3588 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3589 && abi_version_at_least(2))
3590 flag_strict_volatile_bitfields = 1;
3592 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3593 have deemed it beneficial (signified by setting
3594 prefetch.num_slots to 1 or more). */
3595 if (flag_prefetch_loop_arrays < 0
3596 && HAVE_prefetch
3597 && optimize >= 3
3598 && current_tune->prefetch.num_slots > 0)
3599 flag_prefetch_loop_arrays = 1;
3601 /* Set up parameters to be used in prefetching algorithm. Do not
3602 override the defaults unless we are tuning for a core we have
3603 researched values for. */
3604 if (current_tune->prefetch.num_slots > 0)
3605 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3606 current_tune->prefetch.num_slots,
3607 global_options.x_param_values,
3608 global_options_set.x_param_values);
3609 if (current_tune->prefetch.l1_cache_line_size >= 0)
3610 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3611 current_tune->prefetch.l1_cache_line_size,
3612 global_options.x_param_values,
3613 global_options_set.x_param_values);
3614 if (current_tune->prefetch.l1_cache_size >= 0)
3615 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3616 current_tune->prefetch.l1_cache_size,
3617 global_options.x_param_values,
3618 global_options_set.x_param_values);
3620 /* Use Neon to perform 64-bits operations rather than core
3621 registers. */
3622 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3623 if (use_neon_for_64bits == 1)
3624 prefer_neon_for_64bits = true;
3626 /* Use the alternative scheduling-pressure algorithm by default. */
3627 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3628 global_options.x_param_values,
3629 global_options_set.x_param_values);
3631 /* Look through ready list and all of queue for instructions
3632 relevant for L2 auto-prefetcher. */
3633 int param_sched_autopref_queue_depth;
3635 switch (current_tune->sched_autopref)
3637 case tune_params::SCHED_AUTOPREF_OFF:
3638 param_sched_autopref_queue_depth = -1;
3639 break;
3641 case tune_params::SCHED_AUTOPREF_RANK:
3642 param_sched_autopref_queue_depth = 0;
3643 break;
3645 case tune_params::SCHED_AUTOPREF_FULL:
3646 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3647 break;
3649 default:
3650 gcc_unreachable ();
3653 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3654 param_sched_autopref_queue_depth,
3655 global_options.x_param_values,
3656 global_options_set.x_param_values);
3658 /* Currently, for slow flash data, we just disable literal pools. We also
3659 disable it for pure-code. */
3660 if (target_slow_flash_data || target_pure_code)
3661 arm_disable_literal_pool = true;
3663 if (use_cmse && !arm_arch_cmse)
3664 error ("target CPU does not support ARMv8-M Security Extensions");
3666 /* Disable scheduling fusion by default if it's not armv7 processor
3667 or doesn't prefer ldrd/strd. */
3668 if (flag_schedule_fusion == 2
3669 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3670 flag_schedule_fusion = 0;
3672 /* Need to remember initial options before they are overriden. */
3673 init_optimize = build_optimization_node (&global_options);
3675 arm_option_override_internal (&global_options, &global_options_set);
3676 arm_option_check_internal (&global_options);
3677 arm_option_params_internal ();
3679 /* Create the default target_options structure. */
3680 target_option_default_node = target_option_current_node
3681 = build_target_option_node (&global_options);
3683 /* Register global variables with the garbage collector. */
3684 arm_add_gc_roots ();
3686 /* Init initial mode for testing. */
3687 thumb_flipper = TARGET_THUMB;
3690 static void
3691 arm_add_gc_roots (void)
3693 gcc_obstack_init(&minipool_obstack);
3694 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3697 /* A table of known ARM exception types.
3698 For use with the interrupt function attribute. */
3700 typedef struct
3702 const char *const arg;
3703 const unsigned long return_value;
3705 isr_attribute_arg;
3707 static const isr_attribute_arg isr_attribute_args [] =
3709 { "IRQ", ARM_FT_ISR },
3710 { "irq", ARM_FT_ISR },
3711 { "FIQ", ARM_FT_FIQ },
3712 { "fiq", ARM_FT_FIQ },
3713 { "ABORT", ARM_FT_ISR },
3714 { "abort", ARM_FT_ISR },
3715 { "ABORT", ARM_FT_ISR },
3716 { "abort", ARM_FT_ISR },
3717 { "UNDEF", ARM_FT_EXCEPTION },
3718 { "undef", ARM_FT_EXCEPTION },
3719 { "SWI", ARM_FT_EXCEPTION },
3720 { "swi", ARM_FT_EXCEPTION },
3721 { NULL, ARM_FT_NORMAL }
3724 /* Returns the (interrupt) function type of the current
3725 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3727 static unsigned long
3728 arm_isr_value (tree argument)
3730 const isr_attribute_arg * ptr;
3731 const char * arg;
3733 if (!arm_arch_notm)
3734 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3736 /* No argument - default to IRQ. */
3737 if (argument == NULL_TREE)
3738 return ARM_FT_ISR;
3740 /* Get the value of the argument. */
3741 if (TREE_VALUE (argument) == NULL_TREE
3742 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3743 return ARM_FT_UNKNOWN;
3745 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3747 /* Check it against the list of known arguments. */
3748 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3749 if (streq (arg, ptr->arg))
3750 return ptr->return_value;
3752 /* An unrecognized interrupt type. */
3753 return ARM_FT_UNKNOWN;
3756 /* Computes the type of the current function. */
3758 static unsigned long
3759 arm_compute_func_type (void)
3761 unsigned long type = ARM_FT_UNKNOWN;
3762 tree a;
3763 tree attr;
3765 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3767 /* Decide if the current function is volatile. Such functions
3768 never return, and many memory cycles can be saved by not storing
3769 register values that will never be needed again. This optimization
3770 was added to speed up context switching in a kernel application. */
3771 if (optimize > 0
3772 && (TREE_NOTHROW (current_function_decl)
3773 || !(flag_unwind_tables
3774 || (flag_exceptions
3775 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3776 && TREE_THIS_VOLATILE (current_function_decl))
3777 type |= ARM_FT_VOLATILE;
3779 if (cfun->static_chain_decl != NULL)
3780 type |= ARM_FT_NESTED;
3782 attr = DECL_ATTRIBUTES (current_function_decl);
3784 a = lookup_attribute ("naked", attr);
3785 if (a != NULL_TREE)
3786 type |= ARM_FT_NAKED;
3788 a = lookup_attribute ("isr", attr);
3789 if (a == NULL_TREE)
3790 a = lookup_attribute ("interrupt", attr);
3792 if (a == NULL_TREE)
3793 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3794 else
3795 type |= arm_isr_value (TREE_VALUE (a));
3797 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3798 type |= ARM_FT_CMSE_ENTRY;
3800 return type;
3803 /* Returns the type of the current function. */
3805 unsigned long
3806 arm_current_func_type (void)
3808 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3809 cfun->machine->func_type = arm_compute_func_type ();
3811 return cfun->machine->func_type;
3814 bool
3815 arm_allocate_stack_slots_for_args (void)
3817 /* Naked functions should not allocate stack slots for arguments. */
3818 return !IS_NAKED (arm_current_func_type ());
3821 static bool
3822 arm_warn_func_return (tree decl)
3824 /* Naked functions are implemented entirely in assembly, including the
3825 return sequence, so suppress warnings about this. */
3826 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3830 /* Output assembler code for a block containing the constant parts
3831 of a trampoline, leaving space for the variable parts.
3833 On the ARM, (if r8 is the static chain regnum, and remembering that
3834 referencing pc adds an offset of 8) the trampoline looks like:
3835 ldr r8, [pc, #0]
3836 ldr pc, [pc]
3837 .word static chain value
3838 .word function's address
3839 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3841 static void
3842 arm_asm_trampoline_template (FILE *f)
3844 fprintf (f, "\t.syntax unified\n");
3846 if (TARGET_ARM)
3848 fprintf (f, "\t.arm\n");
3849 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3850 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3852 else if (TARGET_THUMB2)
3854 fprintf (f, "\t.thumb\n");
3855 /* The Thumb-2 trampoline is similar to the arm implementation.
3856 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3857 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3858 STATIC_CHAIN_REGNUM, PC_REGNUM);
3859 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3861 else
3863 ASM_OUTPUT_ALIGN (f, 2);
3864 fprintf (f, "\t.code\t16\n");
3865 fprintf (f, ".Ltrampoline_start:\n");
3866 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3867 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3868 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3869 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3870 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3871 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3873 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3874 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3877 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3879 static void
3880 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3882 rtx fnaddr, mem, a_tramp;
3884 emit_block_move (m_tramp, assemble_trampoline_template (),
3885 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3887 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3888 emit_move_insn (mem, chain_value);
3890 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3891 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3892 emit_move_insn (mem, fnaddr);
3894 a_tramp = XEXP (m_tramp, 0);
3895 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3896 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3897 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3900 /* Thumb trampolines should be entered in thumb mode, so set
3901 the bottom bit of the address. */
3903 static rtx
3904 arm_trampoline_adjust_address (rtx addr)
3906 if (TARGET_THUMB)
3907 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3908 NULL, 0, OPTAB_LIB_WIDEN);
3909 return addr;
3912 /* Return 1 if it is possible to return using a single instruction.
3913 If SIBLING is non-null, this is a test for a return before a sibling
3914 call. SIBLING is the call insn, so we can examine its register usage. */
3917 use_return_insn (int iscond, rtx sibling)
3919 int regno;
3920 unsigned int func_type;
3921 unsigned long saved_int_regs;
3922 unsigned HOST_WIDE_INT stack_adjust;
3923 arm_stack_offsets *offsets;
3925 /* Never use a return instruction before reload has run. */
3926 if (!reload_completed)
3927 return 0;
3929 func_type = arm_current_func_type ();
3931 /* Naked, volatile and stack alignment functions need special
3932 consideration. */
3933 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3934 return 0;
3936 /* So do interrupt functions that use the frame pointer and Thumb
3937 interrupt functions. */
3938 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3939 return 0;
3941 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3942 && !optimize_function_for_size_p (cfun))
3943 return 0;
3945 offsets = arm_get_frame_offsets ();
3946 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3948 /* As do variadic functions. */
3949 if (crtl->args.pretend_args_size
3950 || cfun->machine->uses_anonymous_args
3951 /* Or if the function calls __builtin_eh_return () */
3952 || crtl->calls_eh_return
3953 /* Or if the function calls alloca */
3954 || cfun->calls_alloca
3955 /* Or if there is a stack adjustment. However, if the stack pointer
3956 is saved on the stack, we can use a pre-incrementing stack load. */
3957 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3958 && stack_adjust == 4))
3959 /* Or if the static chain register was saved above the frame, under the
3960 assumption that the stack pointer isn't saved on the stack. */
3961 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3962 && arm_compute_static_chain_stack_bytes() != 0))
3963 return 0;
3965 saved_int_regs = offsets->saved_regs_mask;
3967 /* Unfortunately, the insn
3969 ldmib sp, {..., sp, ...}
3971 triggers a bug on most SA-110 based devices, such that the stack
3972 pointer won't be correctly restored if the instruction takes a
3973 page fault. We work around this problem by popping r3 along with
3974 the other registers, since that is never slower than executing
3975 another instruction.
3977 We test for !arm_arch5 here, because code for any architecture
3978 less than this could potentially be run on one of the buggy
3979 chips. */
3980 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3982 /* Validate that r3 is a call-clobbered register (always true in
3983 the default abi) ... */
3984 if (!call_used_regs[3])
3985 return 0;
3987 /* ... that it isn't being used for a return value ... */
3988 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3989 return 0;
3991 /* ... or for a tail-call argument ... */
3992 if (sibling)
3994 gcc_assert (CALL_P (sibling));
3996 if (find_regno_fusage (sibling, USE, 3))
3997 return 0;
4000 /* ... and that there are no call-saved registers in r0-r2
4001 (always true in the default ABI). */
4002 if (saved_int_regs & 0x7)
4003 return 0;
4006 /* Can't be done if interworking with Thumb, and any registers have been
4007 stacked. */
4008 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4009 return 0;
4011 /* On StrongARM, conditional returns are expensive if they aren't
4012 taken and multiple registers have been stacked. */
4013 if (iscond && arm_tune_strongarm)
4015 /* Conditional return when just the LR is stored is a simple
4016 conditional-load instruction, that's not expensive. */
4017 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4018 return 0;
4020 if (flag_pic
4021 && arm_pic_register != INVALID_REGNUM
4022 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4023 return 0;
4026 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4027 several instructions if anything needs to be popped. */
4028 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4029 return 0;
4031 /* If there are saved registers but the LR isn't saved, then we need
4032 two instructions for the return. */
4033 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4034 return 0;
4036 /* Can't be done if any of the VFP regs are pushed,
4037 since this also requires an insn. */
4038 if (TARGET_HARD_FLOAT)
4039 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4040 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4041 return 0;
4043 if (TARGET_REALLY_IWMMXT)
4044 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4045 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4046 return 0;
4048 return 1;
4051 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4052 shrink-wrapping if possible. This is the case if we need to emit a
4053 prologue, which we can test by looking at the offsets. */
4054 bool
4055 use_simple_return_p (void)
4057 arm_stack_offsets *offsets;
4059 /* Note this function can be called before or after reload. */
4060 if (!reload_completed)
4061 arm_compute_frame_layout ();
4063 offsets = arm_get_frame_offsets ();
4064 return offsets->outgoing_args != 0;
4067 /* Return TRUE if int I is a valid immediate ARM constant. */
4070 const_ok_for_arm (HOST_WIDE_INT i)
4072 int lowbit;
4074 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4075 be all zero, or all one. */
4076 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4077 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4078 != ((~(unsigned HOST_WIDE_INT) 0)
4079 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4080 return FALSE;
4082 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4084 /* Fast return for 0 and small values. We must do this for zero, since
4085 the code below can't handle that one case. */
4086 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4087 return TRUE;
4089 /* Get the number of trailing zeros. */
4090 lowbit = ffs((int) i) - 1;
4092 /* Only even shifts are allowed in ARM mode so round down to the
4093 nearest even number. */
4094 if (TARGET_ARM)
4095 lowbit &= ~1;
4097 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4098 return TRUE;
4100 if (TARGET_ARM)
4102 /* Allow rotated constants in ARM mode. */
4103 if (lowbit <= 4
4104 && ((i & ~0xc000003f) == 0
4105 || (i & ~0xf000000f) == 0
4106 || (i & ~0xfc000003) == 0))
4107 return TRUE;
4109 else if (TARGET_THUMB2)
4111 HOST_WIDE_INT v;
4113 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4114 v = i & 0xff;
4115 v |= v << 16;
4116 if (i == v || i == (v | (v << 8)))
4117 return TRUE;
4119 /* Allow repeated pattern 0xXY00XY00. */
4120 v = i & 0xff00;
4121 v |= v << 16;
4122 if (i == v)
4123 return TRUE;
4125 else if (TARGET_HAVE_MOVT)
4127 /* Thumb-1 Targets with MOVT. */
4128 if (i > 0xffff)
4129 return FALSE;
4130 else
4131 return TRUE;
4134 return FALSE;
4137 /* Return true if I is a valid constant for the operation CODE. */
4139 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4141 if (const_ok_for_arm (i))
4142 return 1;
4144 switch (code)
4146 case SET:
4147 /* See if we can use movw. */
4148 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4149 return 1;
4150 else
4151 /* Otherwise, try mvn. */
4152 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4154 case PLUS:
4155 /* See if we can use addw or subw. */
4156 if (TARGET_THUMB2
4157 && ((i & 0xfffff000) == 0
4158 || ((-i) & 0xfffff000) == 0))
4159 return 1;
4160 /* Fall through. */
4161 case COMPARE:
4162 case EQ:
4163 case NE:
4164 case GT:
4165 case LE:
4166 case LT:
4167 case GE:
4168 case GEU:
4169 case LTU:
4170 case GTU:
4171 case LEU:
4172 case UNORDERED:
4173 case ORDERED:
4174 case UNEQ:
4175 case UNGE:
4176 case UNLT:
4177 case UNGT:
4178 case UNLE:
4179 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4181 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4182 case XOR:
4183 return 0;
4185 case IOR:
4186 if (TARGET_THUMB2)
4187 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4188 return 0;
4190 case AND:
4191 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4193 default:
4194 gcc_unreachable ();
4198 /* Return true if I is a valid di mode constant for the operation CODE. */
4200 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4202 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4203 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4204 rtx hi = GEN_INT (hi_val);
4205 rtx lo = GEN_INT (lo_val);
4207 if (TARGET_THUMB1)
4208 return 0;
4210 switch (code)
4212 case AND:
4213 case IOR:
4214 case XOR:
4215 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4216 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4217 case PLUS:
4218 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4220 default:
4221 return 0;
4225 /* Emit a sequence of insns to handle a large constant.
4226 CODE is the code of the operation required, it can be any of SET, PLUS,
4227 IOR, AND, XOR, MINUS;
4228 MODE is the mode in which the operation is being performed;
4229 VAL is the integer to operate on;
4230 SOURCE is the other operand (a register, or a null-pointer for SET);
4231 SUBTARGETS means it is safe to create scratch registers if that will
4232 either produce a simpler sequence, or we will want to cse the values.
4233 Return value is the number of insns emitted. */
4235 /* ??? Tweak this for thumb2. */
4237 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4238 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4240 rtx cond;
4242 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4243 cond = COND_EXEC_TEST (PATTERN (insn));
4244 else
4245 cond = NULL_RTX;
4247 if (subtargets || code == SET
4248 || (REG_P (target) && REG_P (source)
4249 && REGNO (target) != REGNO (source)))
4251 /* After arm_reorg has been called, we can't fix up expensive
4252 constants by pushing them into memory so we must synthesize
4253 them in-line, regardless of the cost. This is only likely to
4254 be more costly on chips that have load delay slots and we are
4255 compiling without running the scheduler (so no splitting
4256 occurred before the final instruction emission).
4258 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4260 if (!cfun->machine->after_arm_reorg
4261 && !cond
4262 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4263 1, 0)
4264 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4265 + (code != SET))))
4267 if (code == SET)
4269 /* Currently SET is the only monadic value for CODE, all
4270 the rest are diadic. */
4271 if (TARGET_USE_MOVT)
4272 arm_emit_movpair (target, GEN_INT (val));
4273 else
4274 emit_set_insn (target, GEN_INT (val));
4276 return 1;
4278 else
4280 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4282 if (TARGET_USE_MOVT)
4283 arm_emit_movpair (temp, GEN_INT (val));
4284 else
4285 emit_set_insn (temp, GEN_INT (val));
4287 /* For MINUS, the value is subtracted from, since we never
4288 have subtraction of a constant. */
4289 if (code == MINUS)
4290 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4291 else
4292 emit_set_insn (target,
4293 gen_rtx_fmt_ee (code, mode, source, temp));
4294 return 2;
4299 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4303 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4304 ARM/THUMB2 immediates, and add up to VAL.
4305 Thr function return value gives the number of insns required. */
4306 static int
4307 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4308 struct four_ints *return_sequence)
4310 int best_consecutive_zeros = 0;
4311 int i;
4312 int best_start = 0;
4313 int insns1, insns2;
4314 struct four_ints tmp_sequence;
4316 /* If we aren't targeting ARM, the best place to start is always at
4317 the bottom, otherwise look more closely. */
4318 if (TARGET_ARM)
4320 for (i = 0; i < 32; i += 2)
4322 int consecutive_zeros = 0;
4324 if (!(val & (3 << i)))
4326 while ((i < 32) && !(val & (3 << i)))
4328 consecutive_zeros += 2;
4329 i += 2;
4331 if (consecutive_zeros > best_consecutive_zeros)
4333 best_consecutive_zeros = consecutive_zeros;
4334 best_start = i - consecutive_zeros;
4336 i -= 2;
4341 /* So long as it won't require any more insns to do so, it's
4342 desirable to emit a small constant (in bits 0...9) in the last
4343 insn. This way there is more chance that it can be combined with
4344 a later addressing insn to form a pre-indexed load or store
4345 operation. Consider:
4347 *((volatile int *)0xe0000100) = 1;
4348 *((volatile int *)0xe0000110) = 2;
4350 We want this to wind up as:
4352 mov rA, #0xe0000000
4353 mov rB, #1
4354 str rB, [rA, #0x100]
4355 mov rB, #2
4356 str rB, [rA, #0x110]
4358 rather than having to synthesize both large constants from scratch.
4360 Therefore, we calculate how many insns would be required to emit
4361 the constant starting from `best_start', and also starting from
4362 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4363 yield a shorter sequence, we may as well use zero. */
4364 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4365 if (best_start != 0
4366 && ((HOST_WIDE_INT_1U << best_start) < val))
4368 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4369 if (insns2 <= insns1)
4371 *return_sequence = tmp_sequence;
4372 insns1 = insns2;
4376 return insns1;
4379 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4380 static int
4381 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4382 struct four_ints *return_sequence, int i)
4384 int remainder = val & 0xffffffff;
4385 int insns = 0;
4387 /* Try and find a way of doing the job in either two or three
4388 instructions.
4390 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4391 location. We start at position I. This may be the MSB, or
4392 optimial_immediate_sequence may have positioned it at the largest block
4393 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4394 wrapping around to the top of the word when we drop off the bottom.
4395 In the worst case this code should produce no more than four insns.
4397 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4398 constants, shifted to any arbitrary location. We should always start
4399 at the MSB. */
4402 int end;
4403 unsigned int b1, b2, b3, b4;
4404 unsigned HOST_WIDE_INT result;
4405 int loc;
4407 gcc_assert (insns < 4);
4409 if (i <= 0)
4410 i += 32;
4412 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4413 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4415 loc = i;
4416 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4417 /* We can use addw/subw for the last 12 bits. */
4418 result = remainder;
4419 else
4421 /* Use an 8-bit shifted/rotated immediate. */
4422 end = i - 8;
4423 if (end < 0)
4424 end += 32;
4425 result = remainder & ((0x0ff << end)
4426 | ((i < end) ? (0xff >> (32 - end))
4427 : 0));
4428 i -= 8;
4431 else
4433 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4434 arbitrary shifts. */
4435 i -= TARGET_ARM ? 2 : 1;
4436 continue;
4439 /* Next, see if we can do a better job with a thumb2 replicated
4440 constant.
4442 We do it this way around to catch the cases like 0x01F001E0 where
4443 two 8-bit immediates would work, but a replicated constant would
4444 make it worse.
4446 TODO: 16-bit constants that don't clear all the bits, but still win.
4447 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4448 if (TARGET_THUMB2)
4450 b1 = (remainder & 0xff000000) >> 24;
4451 b2 = (remainder & 0x00ff0000) >> 16;
4452 b3 = (remainder & 0x0000ff00) >> 8;
4453 b4 = remainder & 0xff;
4455 if (loc > 24)
4457 /* The 8-bit immediate already found clears b1 (and maybe b2),
4458 but must leave b3 and b4 alone. */
4460 /* First try to find a 32-bit replicated constant that clears
4461 almost everything. We can assume that we can't do it in one,
4462 or else we wouldn't be here. */
4463 unsigned int tmp = b1 & b2 & b3 & b4;
4464 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4465 + (tmp << 24);
4466 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4467 + (tmp == b3) + (tmp == b4);
4468 if (tmp
4469 && (matching_bytes >= 3
4470 || (matching_bytes == 2
4471 && const_ok_for_op (remainder & ~tmp2, code))))
4473 /* At least 3 of the bytes match, and the fourth has at
4474 least as many bits set, or two of the bytes match
4475 and it will only require one more insn to finish. */
4476 result = tmp2;
4477 i = tmp != b1 ? 32
4478 : tmp != b2 ? 24
4479 : tmp != b3 ? 16
4480 : 8;
4483 /* Second, try to find a 16-bit replicated constant that can
4484 leave three of the bytes clear. If b2 or b4 is already
4485 zero, then we can. If the 8-bit from above would not
4486 clear b2 anyway, then we still win. */
4487 else if (b1 == b3 && (!b2 || !b4
4488 || (remainder & 0x00ff0000 & ~result)))
4490 result = remainder & 0xff00ff00;
4491 i = 24;
4494 else if (loc > 16)
4496 /* The 8-bit immediate already found clears b2 (and maybe b3)
4497 and we don't get here unless b1 is alredy clear, but it will
4498 leave b4 unchanged. */
4500 /* If we can clear b2 and b4 at once, then we win, since the
4501 8-bits couldn't possibly reach that far. */
4502 if (b2 == b4)
4504 result = remainder & 0x00ff00ff;
4505 i = 16;
4510 return_sequence->i[insns++] = result;
4511 remainder &= ~result;
4513 if (code == SET || code == MINUS)
4514 code = PLUS;
4516 while (remainder);
4518 return insns;
4521 /* Emit an instruction with the indicated PATTERN. If COND is
4522 non-NULL, conditionalize the execution of the instruction on COND
4523 being true. */
4525 static void
4526 emit_constant_insn (rtx cond, rtx pattern)
4528 if (cond)
4529 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4530 emit_insn (pattern);
4533 /* As above, but extra parameter GENERATE which, if clear, suppresses
4534 RTL generation. */
4536 static int
4537 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4538 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4539 int subtargets, int generate)
4541 int can_invert = 0;
4542 int can_negate = 0;
4543 int final_invert = 0;
4544 int i;
4545 int set_sign_bit_copies = 0;
4546 int clear_sign_bit_copies = 0;
4547 int clear_zero_bit_copies = 0;
4548 int set_zero_bit_copies = 0;
4549 int insns = 0, neg_insns, inv_insns;
4550 unsigned HOST_WIDE_INT temp1, temp2;
4551 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4552 struct four_ints *immediates;
4553 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4555 /* Find out which operations are safe for a given CODE. Also do a quick
4556 check for degenerate cases; these can occur when DImode operations
4557 are split. */
4558 switch (code)
4560 case SET:
4561 can_invert = 1;
4562 break;
4564 case PLUS:
4565 can_negate = 1;
4566 break;
4568 case IOR:
4569 if (remainder == 0xffffffff)
4571 if (generate)
4572 emit_constant_insn (cond,
4573 gen_rtx_SET (target,
4574 GEN_INT (ARM_SIGN_EXTEND (val))));
4575 return 1;
4578 if (remainder == 0)
4580 if (reload_completed && rtx_equal_p (target, source))
4581 return 0;
4583 if (generate)
4584 emit_constant_insn (cond, gen_rtx_SET (target, source));
4585 return 1;
4587 break;
4589 case AND:
4590 if (remainder == 0)
4592 if (generate)
4593 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4594 return 1;
4596 if (remainder == 0xffffffff)
4598 if (reload_completed && rtx_equal_p (target, source))
4599 return 0;
4600 if (generate)
4601 emit_constant_insn (cond, gen_rtx_SET (target, source));
4602 return 1;
4604 can_invert = 1;
4605 break;
4607 case XOR:
4608 if (remainder == 0)
4610 if (reload_completed && rtx_equal_p (target, source))
4611 return 0;
4612 if (generate)
4613 emit_constant_insn (cond, gen_rtx_SET (target, source));
4614 return 1;
4617 if (remainder == 0xffffffff)
4619 if (generate)
4620 emit_constant_insn (cond,
4621 gen_rtx_SET (target,
4622 gen_rtx_NOT (mode, source)));
4623 return 1;
4625 final_invert = 1;
4626 break;
4628 case MINUS:
4629 /* We treat MINUS as (val - source), since (source - val) is always
4630 passed as (source + (-val)). */
4631 if (remainder == 0)
4633 if (generate)
4634 emit_constant_insn (cond,
4635 gen_rtx_SET (target,
4636 gen_rtx_NEG (mode, source)));
4637 return 1;
4639 if (const_ok_for_arm (val))
4641 if (generate)
4642 emit_constant_insn (cond,
4643 gen_rtx_SET (target,
4644 gen_rtx_MINUS (mode, GEN_INT (val),
4645 source)));
4646 return 1;
4649 break;
4651 default:
4652 gcc_unreachable ();
4655 /* If we can do it in one insn get out quickly. */
4656 if (const_ok_for_op (val, code))
4658 if (generate)
4659 emit_constant_insn (cond,
4660 gen_rtx_SET (target,
4661 (source
4662 ? gen_rtx_fmt_ee (code, mode, source,
4663 GEN_INT (val))
4664 : GEN_INT (val))));
4665 return 1;
4668 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4669 insn. */
4670 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4671 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4673 if (generate)
4675 if (mode == SImode && i == 16)
4676 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4677 smaller insn. */
4678 emit_constant_insn (cond,
4679 gen_zero_extendhisi2
4680 (target, gen_lowpart (HImode, source)));
4681 else
4682 /* Extz only supports SImode, but we can coerce the operands
4683 into that mode. */
4684 emit_constant_insn (cond,
4685 gen_extzv_t2 (gen_lowpart (SImode, target),
4686 gen_lowpart (SImode, source),
4687 GEN_INT (i), const0_rtx));
4690 return 1;
4693 /* Calculate a few attributes that may be useful for specific
4694 optimizations. */
4695 /* Count number of leading zeros. */
4696 for (i = 31; i >= 0; i--)
4698 if ((remainder & (1 << i)) == 0)
4699 clear_sign_bit_copies++;
4700 else
4701 break;
4704 /* Count number of leading 1's. */
4705 for (i = 31; i >= 0; i--)
4707 if ((remainder & (1 << i)) != 0)
4708 set_sign_bit_copies++;
4709 else
4710 break;
4713 /* Count number of trailing zero's. */
4714 for (i = 0; i <= 31; i++)
4716 if ((remainder & (1 << i)) == 0)
4717 clear_zero_bit_copies++;
4718 else
4719 break;
4722 /* Count number of trailing 1's. */
4723 for (i = 0; i <= 31; i++)
4725 if ((remainder & (1 << i)) != 0)
4726 set_zero_bit_copies++;
4727 else
4728 break;
4731 switch (code)
4733 case SET:
4734 /* See if we can do this by sign_extending a constant that is known
4735 to be negative. This is a good, way of doing it, since the shift
4736 may well merge into a subsequent insn. */
4737 if (set_sign_bit_copies > 1)
4739 if (const_ok_for_arm
4740 (temp1 = ARM_SIGN_EXTEND (remainder
4741 << (set_sign_bit_copies - 1))))
4743 if (generate)
4745 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4746 emit_constant_insn (cond,
4747 gen_rtx_SET (new_src, GEN_INT (temp1)));
4748 emit_constant_insn (cond,
4749 gen_ashrsi3 (target, new_src,
4750 GEN_INT (set_sign_bit_copies - 1)));
4752 return 2;
4754 /* For an inverted constant, we will need to set the low bits,
4755 these will be shifted out of harm's way. */
4756 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4757 if (const_ok_for_arm (~temp1))
4759 if (generate)
4761 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4762 emit_constant_insn (cond,
4763 gen_rtx_SET (new_src, GEN_INT (temp1)));
4764 emit_constant_insn (cond,
4765 gen_ashrsi3 (target, new_src,
4766 GEN_INT (set_sign_bit_copies - 1)));
4768 return 2;
4772 /* See if we can calculate the value as the difference between two
4773 valid immediates. */
4774 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4776 int topshift = clear_sign_bit_copies & ~1;
4778 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4779 & (0xff000000 >> topshift));
4781 /* If temp1 is zero, then that means the 9 most significant
4782 bits of remainder were 1 and we've caused it to overflow.
4783 When topshift is 0 we don't need to do anything since we
4784 can borrow from 'bit 32'. */
4785 if (temp1 == 0 && topshift != 0)
4786 temp1 = 0x80000000 >> (topshift - 1);
4788 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4790 if (const_ok_for_arm (temp2))
4792 if (generate)
4794 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4795 emit_constant_insn (cond,
4796 gen_rtx_SET (new_src, GEN_INT (temp1)));
4797 emit_constant_insn (cond,
4798 gen_addsi3 (target, new_src,
4799 GEN_INT (-temp2)));
4802 return 2;
4806 /* See if we can generate this by setting the bottom (or the top)
4807 16 bits, and then shifting these into the other half of the
4808 word. We only look for the simplest cases, to do more would cost
4809 too much. Be careful, however, not to generate this when the
4810 alternative would take fewer insns. */
4811 if (val & 0xffff0000)
4813 temp1 = remainder & 0xffff0000;
4814 temp2 = remainder & 0x0000ffff;
4816 /* Overlaps outside this range are best done using other methods. */
4817 for (i = 9; i < 24; i++)
4819 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4820 && !const_ok_for_arm (temp2))
4822 rtx new_src = (subtargets
4823 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4824 : target);
4825 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4826 source, subtargets, generate);
4827 source = new_src;
4828 if (generate)
4829 emit_constant_insn
4830 (cond,
4831 gen_rtx_SET
4832 (target,
4833 gen_rtx_IOR (mode,
4834 gen_rtx_ASHIFT (mode, source,
4835 GEN_INT (i)),
4836 source)));
4837 return insns + 1;
4841 /* Don't duplicate cases already considered. */
4842 for (i = 17; i < 24; i++)
4844 if (((temp1 | (temp1 >> i)) == remainder)
4845 && !const_ok_for_arm (temp1))
4847 rtx new_src = (subtargets
4848 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4849 : target);
4850 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4851 source, subtargets, generate);
4852 source = new_src;
4853 if (generate)
4854 emit_constant_insn
4855 (cond,
4856 gen_rtx_SET (target,
4857 gen_rtx_IOR
4858 (mode,
4859 gen_rtx_LSHIFTRT (mode, source,
4860 GEN_INT (i)),
4861 source)));
4862 return insns + 1;
4866 break;
4868 case IOR:
4869 case XOR:
4870 /* If we have IOR or XOR, and the constant can be loaded in a
4871 single instruction, and we can find a temporary to put it in,
4872 then this can be done in two instructions instead of 3-4. */
4873 if (subtargets
4874 /* TARGET can't be NULL if SUBTARGETS is 0 */
4875 || (reload_completed && !reg_mentioned_p (target, source)))
4877 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4879 if (generate)
4881 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4883 emit_constant_insn (cond,
4884 gen_rtx_SET (sub, GEN_INT (val)));
4885 emit_constant_insn (cond,
4886 gen_rtx_SET (target,
4887 gen_rtx_fmt_ee (code, mode,
4888 source, sub)));
4890 return 2;
4894 if (code == XOR)
4895 break;
4897 /* Convert.
4898 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4899 and the remainder 0s for e.g. 0xfff00000)
4900 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4902 This can be done in 2 instructions by using shifts with mov or mvn.
4903 e.g. for
4904 x = x | 0xfff00000;
4905 we generate.
4906 mvn r0, r0, asl #12
4907 mvn r0, r0, lsr #12 */
4908 if (set_sign_bit_copies > 8
4909 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4911 if (generate)
4913 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4914 rtx shift = GEN_INT (set_sign_bit_copies);
4916 emit_constant_insn
4917 (cond,
4918 gen_rtx_SET (sub,
4919 gen_rtx_NOT (mode,
4920 gen_rtx_ASHIFT (mode,
4921 source,
4922 shift))));
4923 emit_constant_insn
4924 (cond,
4925 gen_rtx_SET (target,
4926 gen_rtx_NOT (mode,
4927 gen_rtx_LSHIFTRT (mode, sub,
4928 shift))));
4930 return 2;
4933 /* Convert
4934 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4936 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4938 For eg. r0 = r0 | 0xfff
4939 mvn r0, r0, lsr #12
4940 mvn r0, r0, asl #12
4943 if (set_zero_bit_copies > 8
4944 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4946 if (generate)
4948 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4949 rtx shift = GEN_INT (set_zero_bit_copies);
4951 emit_constant_insn
4952 (cond,
4953 gen_rtx_SET (sub,
4954 gen_rtx_NOT (mode,
4955 gen_rtx_LSHIFTRT (mode,
4956 source,
4957 shift))));
4958 emit_constant_insn
4959 (cond,
4960 gen_rtx_SET (target,
4961 gen_rtx_NOT (mode,
4962 gen_rtx_ASHIFT (mode, sub,
4963 shift))));
4965 return 2;
4968 /* This will never be reached for Thumb2 because orn is a valid
4969 instruction. This is for Thumb1 and the ARM 32 bit cases.
4971 x = y | constant (such that ~constant is a valid constant)
4972 Transform this to
4973 x = ~(~y & ~constant).
4975 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4977 if (generate)
4979 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4980 emit_constant_insn (cond,
4981 gen_rtx_SET (sub,
4982 gen_rtx_NOT (mode, source)));
4983 source = sub;
4984 if (subtargets)
4985 sub = gen_reg_rtx (mode);
4986 emit_constant_insn (cond,
4987 gen_rtx_SET (sub,
4988 gen_rtx_AND (mode, source,
4989 GEN_INT (temp1))));
4990 emit_constant_insn (cond,
4991 gen_rtx_SET (target,
4992 gen_rtx_NOT (mode, sub)));
4994 return 3;
4996 break;
4998 case AND:
4999 /* See if two shifts will do 2 or more insn's worth of work. */
5000 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5002 HOST_WIDE_INT shift_mask = ((0xffffffff
5003 << (32 - clear_sign_bit_copies))
5004 & 0xffffffff);
5006 if ((remainder | shift_mask) != 0xffffffff)
5008 HOST_WIDE_INT new_val
5009 = ARM_SIGN_EXTEND (remainder | shift_mask);
5011 if (generate)
5013 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5014 insns = arm_gen_constant (AND, SImode, cond, new_val,
5015 new_src, source, subtargets, 1);
5016 source = new_src;
5018 else
5020 rtx targ = subtargets ? NULL_RTX : target;
5021 insns = arm_gen_constant (AND, mode, cond, new_val,
5022 targ, source, subtargets, 0);
5026 if (generate)
5028 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5029 rtx shift = GEN_INT (clear_sign_bit_copies);
5031 emit_insn (gen_ashlsi3 (new_src, source, shift));
5032 emit_insn (gen_lshrsi3 (target, new_src, shift));
5035 return insns + 2;
5038 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5040 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5042 if ((remainder | shift_mask) != 0xffffffff)
5044 HOST_WIDE_INT new_val
5045 = ARM_SIGN_EXTEND (remainder | shift_mask);
5046 if (generate)
5048 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5050 insns = arm_gen_constant (AND, mode, cond, new_val,
5051 new_src, source, subtargets, 1);
5052 source = new_src;
5054 else
5056 rtx targ = subtargets ? NULL_RTX : target;
5058 insns = arm_gen_constant (AND, mode, cond, new_val,
5059 targ, source, subtargets, 0);
5063 if (generate)
5065 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5066 rtx shift = GEN_INT (clear_zero_bit_copies);
5068 emit_insn (gen_lshrsi3 (new_src, source, shift));
5069 emit_insn (gen_ashlsi3 (target, new_src, shift));
5072 return insns + 2;
5075 break;
5077 default:
5078 break;
5081 /* Calculate what the instruction sequences would be if we generated it
5082 normally, negated, or inverted. */
5083 if (code == AND)
5084 /* AND cannot be split into multiple insns, so invert and use BIC. */
5085 insns = 99;
5086 else
5087 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5089 if (can_negate)
5090 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5091 &neg_immediates);
5092 else
5093 neg_insns = 99;
5095 if (can_invert || final_invert)
5096 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5097 &inv_immediates);
5098 else
5099 inv_insns = 99;
5101 immediates = &pos_immediates;
5103 /* Is the negated immediate sequence more efficient? */
5104 if (neg_insns < insns && neg_insns <= inv_insns)
5106 insns = neg_insns;
5107 immediates = &neg_immediates;
5109 else
5110 can_negate = 0;
5112 /* Is the inverted immediate sequence more efficient?
5113 We must allow for an extra NOT instruction for XOR operations, although
5114 there is some chance that the final 'mvn' will get optimized later. */
5115 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5117 insns = inv_insns;
5118 immediates = &inv_immediates;
5120 else
5122 can_invert = 0;
5123 final_invert = 0;
5126 /* Now output the chosen sequence as instructions. */
5127 if (generate)
5129 for (i = 0; i < insns; i++)
5131 rtx new_src, temp1_rtx;
5133 temp1 = immediates->i[i];
5135 if (code == SET || code == MINUS)
5136 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5137 else if ((final_invert || i < (insns - 1)) && subtargets)
5138 new_src = gen_reg_rtx (mode);
5139 else
5140 new_src = target;
5142 if (can_invert)
5143 temp1 = ~temp1;
5144 else if (can_negate)
5145 temp1 = -temp1;
5147 temp1 = trunc_int_for_mode (temp1, mode);
5148 temp1_rtx = GEN_INT (temp1);
5150 if (code == SET)
5152 else if (code == MINUS)
5153 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5154 else
5155 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5157 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5158 source = new_src;
5160 if (code == SET)
5162 can_negate = can_invert;
5163 can_invert = 0;
5164 code = PLUS;
5166 else if (code == MINUS)
5167 code = PLUS;
5171 if (final_invert)
5173 if (generate)
5174 emit_constant_insn (cond, gen_rtx_SET (target,
5175 gen_rtx_NOT (mode, source)));
5176 insns++;
5179 return insns;
5182 /* Canonicalize a comparison so that we are more likely to recognize it.
5183 This can be done for a few constant compares, where we can make the
5184 immediate value easier to load. */
5186 static void
5187 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5188 bool op0_preserve_value)
5190 machine_mode mode;
5191 unsigned HOST_WIDE_INT i, maxval;
5193 mode = GET_MODE (*op0);
5194 if (mode == VOIDmode)
5195 mode = GET_MODE (*op1);
5197 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5199 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5200 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5201 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5202 for GTU/LEU in Thumb mode. */
5203 if (mode == DImode)
5206 if (*code == GT || *code == LE
5207 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5209 /* Missing comparison. First try to use an available
5210 comparison. */
5211 if (CONST_INT_P (*op1))
5213 i = INTVAL (*op1);
5214 switch (*code)
5216 case GT:
5217 case LE:
5218 if (i != maxval
5219 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5221 *op1 = GEN_INT (i + 1);
5222 *code = *code == GT ? GE : LT;
5223 return;
5225 break;
5226 case GTU:
5227 case LEU:
5228 if (i != ~((unsigned HOST_WIDE_INT) 0)
5229 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5231 *op1 = GEN_INT (i + 1);
5232 *code = *code == GTU ? GEU : LTU;
5233 return;
5235 break;
5236 default:
5237 gcc_unreachable ();
5241 /* If that did not work, reverse the condition. */
5242 if (!op0_preserve_value)
5244 std::swap (*op0, *op1);
5245 *code = (int)swap_condition ((enum rtx_code)*code);
5248 return;
5251 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5252 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5253 to facilitate possible combining with a cmp into 'ands'. */
5254 if (mode == SImode
5255 && GET_CODE (*op0) == ZERO_EXTEND
5256 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5257 && GET_MODE (XEXP (*op0, 0)) == QImode
5258 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5259 && subreg_lowpart_p (XEXP (*op0, 0))
5260 && *op1 == const0_rtx)
5261 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5262 GEN_INT (255));
5264 /* Comparisons smaller than DImode. Only adjust comparisons against
5265 an out-of-range constant. */
5266 if (!CONST_INT_P (*op1)
5267 || const_ok_for_arm (INTVAL (*op1))
5268 || const_ok_for_arm (- INTVAL (*op1)))
5269 return;
5271 i = INTVAL (*op1);
5273 switch (*code)
5275 case EQ:
5276 case NE:
5277 return;
5279 case GT:
5280 case LE:
5281 if (i != maxval
5282 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5284 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5285 *code = *code == GT ? GE : LT;
5286 return;
5288 break;
5290 case GE:
5291 case LT:
5292 if (i != ~maxval
5293 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5295 *op1 = GEN_INT (i - 1);
5296 *code = *code == GE ? GT : LE;
5297 return;
5299 break;
5301 case GTU:
5302 case LEU:
5303 if (i != ~((unsigned HOST_WIDE_INT) 0)
5304 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5306 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5307 *code = *code == GTU ? GEU : LTU;
5308 return;
5310 break;
5312 case GEU:
5313 case LTU:
5314 if (i != 0
5315 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5317 *op1 = GEN_INT (i - 1);
5318 *code = *code == GEU ? GTU : LEU;
5319 return;
5321 break;
5323 default:
5324 gcc_unreachable ();
5329 /* Define how to find the value returned by a function. */
5331 static rtx
5332 arm_function_value(const_tree type, const_tree func,
5333 bool outgoing ATTRIBUTE_UNUSED)
5335 machine_mode mode;
5336 int unsignedp ATTRIBUTE_UNUSED;
5337 rtx r ATTRIBUTE_UNUSED;
5339 mode = TYPE_MODE (type);
5341 if (TARGET_AAPCS_BASED)
5342 return aapcs_allocate_return_reg (mode, type, func);
5344 /* Promote integer types. */
5345 if (INTEGRAL_TYPE_P (type))
5346 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5348 /* Promotes small structs returned in a register to full-word size
5349 for big-endian AAPCS. */
5350 if (arm_return_in_msb (type))
5352 HOST_WIDE_INT size = int_size_in_bytes (type);
5353 if (size % UNITS_PER_WORD != 0)
5355 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5356 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5360 return arm_libcall_value_1 (mode);
5363 /* libcall hashtable helpers. */
5365 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5367 static inline hashval_t hash (const rtx_def *);
5368 static inline bool equal (const rtx_def *, const rtx_def *);
5369 static inline void remove (rtx_def *);
5372 inline bool
5373 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5375 return rtx_equal_p (p1, p2);
5378 inline hashval_t
5379 libcall_hasher::hash (const rtx_def *p1)
5381 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5384 typedef hash_table<libcall_hasher> libcall_table_type;
5386 static void
5387 add_libcall (libcall_table_type *htab, rtx libcall)
5389 *htab->find_slot (libcall, INSERT) = libcall;
5392 static bool
5393 arm_libcall_uses_aapcs_base (const_rtx libcall)
5395 static bool init_done = false;
5396 static libcall_table_type *libcall_htab = NULL;
5398 if (!init_done)
5400 init_done = true;
5402 libcall_htab = new libcall_table_type (31);
5403 add_libcall (libcall_htab,
5404 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5405 add_libcall (libcall_htab,
5406 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5407 add_libcall (libcall_htab,
5408 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5409 add_libcall (libcall_htab,
5410 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5412 add_libcall (libcall_htab,
5413 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5425 add_libcall (libcall_htab,
5426 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5427 add_libcall (libcall_htab,
5428 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5429 add_libcall (libcall_htab,
5430 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5431 add_libcall (libcall_htab,
5432 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5433 add_libcall (libcall_htab,
5434 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5435 add_libcall (libcall_htab,
5436 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5438 /* Values from double-precision helper functions are returned in core
5439 registers if the selected core only supports single-precision
5440 arithmetic, even if we are using the hard-float ABI. The same is
5441 true for single-precision helpers, but we will never be using the
5442 hard-float ABI on a CPU which doesn't support single-precision
5443 operations in hardware. */
5444 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5445 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5446 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5447 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5448 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5449 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5450 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5451 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5452 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5453 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5454 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5455 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5456 SFmode));
5457 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5458 DFmode));
5459 add_libcall (libcall_htab,
5460 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5463 return libcall && libcall_htab->find (libcall) != NULL;
5466 static rtx
5467 arm_libcall_value_1 (machine_mode mode)
5469 if (TARGET_AAPCS_BASED)
5470 return aapcs_libcall_value (mode);
5471 else if (TARGET_IWMMXT_ABI
5472 && arm_vector_mode_supported_p (mode))
5473 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5474 else
5475 return gen_rtx_REG (mode, ARG_REGISTER (1));
5478 /* Define how to find the value returned by a library function
5479 assuming the value has mode MODE. */
5481 static rtx
5482 arm_libcall_value (machine_mode mode, const_rtx libcall)
5484 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5485 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5487 /* The following libcalls return their result in integer registers,
5488 even though they return a floating point value. */
5489 if (arm_libcall_uses_aapcs_base (libcall))
5490 return gen_rtx_REG (mode, ARG_REGISTER(1));
5494 return arm_libcall_value_1 (mode);
5497 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5499 static bool
5500 arm_function_value_regno_p (const unsigned int regno)
5502 if (regno == ARG_REGISTER (1)
5503 || (TARGET_32BIT
5504 && TARGET_AAPCS_BASED
5505 && TARGET_HARD_FLOAT
5506 && regno == FIRST_VFP_REGNUM)
5507 || (TARGET_IWMMXT_ABI
5508 && regno == FIRST_IWMMXT_REGNUM))
5509 return true;
5511 return false;
5514 /* Determine the amount of memory needed to store the possible return
5515 registers of an untyped call. */
5517 arm_apply_result_size (void)
5519 int size = 16;
5521 if (TARGET_32BIT)
5523 if (TARGET_HARD_FLOAT_ABI)
5524 size += 32;
5525 if (TARGET_IWMMXT_ABI)
5526 size += 8;
5529 return size;
5532 /* Decide whether TYPE should be returned in memory (true)
5533 or in a register (false). FNTYPE is the type of the function making
5534 the call. */
5535 static bool
5536 arm_return_in_memory (const_tree type, const_tree fntype)
5538 HOST_WIDE_INT size;
5540 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5542 if (TARGET_AAPCS_BASED)
5544 /* Simple, non-aggregate types (ie not including vectors and
5545 complex) are always returned in a register (or registers).
5546 We don't care about which register here, so we can short-cut
5547 some of the detail. */
5548 if (!AGGREGATE_TYPE_P (type)
5549 && TREE_CODE (type) != VECTOR_TYPE
5550 && TREE_CODE (type) != COMPLEX_TYPE)
5551 return false;
5553 /* Any return value that is no larger than one word can be
5554 returned in r0. */
5555 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5556 return false;
5558 /* Check any available co-processors to see if they accept the
5559 type as a register candidate (VFP, for example, can return
5560 some aggregates in consecutive registers). These aren't
5561 available if the call is variadic. */
5562 if (aapcs_select_return_coproc (type, fntype) >= 0)
5563 return false;
5565 /* Vector values should be returned using ARM registers, not
5566 memory (unless they're over 16 bytes, which will break since
5567 we only have four call-clobbered registers to play with). */
5568 if (TREE_CODE (type) == VECTOR_TYPE)
5569 return (size < 0 || size > (4 * UNITS_PER_WORD));
5571 /* The rest go in memory. */
5572 return true;
5575 if (TREE_CODE (type) == VECTOR_TYPE)
5576 return (size < 0 || size > (4 * UNITS_PER_WORD));
5578 if (!AGGREGATE_TYPE_P (type) &&
5579 (TREE_CODE (type) != VECTOR_TYPE))
5580 /* All simple types are returned in registers. */
5581 return false;
5583 if (arm_abi != ARM_ABI_APCS)
5585 /* ATPCS and later return aggregate types in memory only if they are
5586 larger than a word (or are variable size). */
5587 return (size < 0 || size > UNITS_PER_WORD);
5590 /* For the arm-wince targets we choose to be compatible with Microsoft's
5591 ARM and Thumb compilers, which always return aggregates in memory. */
5592 #ifndef ARM_WINCE
5593 /* All structures/unions bigger than one word are returned in memory.
5594 Also catch the case where int_size_in_bytes returns -1. In this case
5595 the aggregate is either huge or of variable size, and in either case
5596 we will want to return it via memory and not in a register. */
5597 if (size < 0 || size > UNITS_PER_WORD)
5598 return true;
5600 if (TREE_CODE (type) == RECORD_TYPE)
5602 tree field;
5604 /* For a struct the APCS says that we only return in a register
5605 if the type is 'integer like' and every addressable element
5606 has an offset of zero. For practical purposes this means
5607 that the structure can have at most one non bit-field element
5608 and that this element must be the first one in the structure. */
5610 /* Find the first field, ignoring non FIELD_DECL things which will
5611 have been created by C++. */
5612 for (field = TYPE_FIELDS (type);
5613 field && TREE_CODE (field) != FIELD_DECL;
5614 field = DECL_CHAIN (field))
5615 continue;
5617 if (field == NULL)
5618 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5620 /* Check that the first field is valid for returning in a register. */
5622 /* ... Floats are not allowed */
5623 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5624 return true;
5626 /* ... Aggregates that are not themselves valid for returning in
5627 a register are not allowed. */
5628 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5629 return true;
5631 /* Now check the remaining fields, if any. Only bitfields are allowed,
5632 since they are not addressable. */
5633 for (field = DECL_CHAIN (field);
5634 field;
5635 field = DECL_CHAIN (field))
5637 if (TREE_CODE (field) != FIELD_DECL)
5638 continue;
5640 if (!DECL_BIT_FIELD_TYPE (field))
5641 return true;
5644 return false;
5647 if (TREE_CODE (type) == UNION_TYPE)
5649 tree field;
5651 /* Unions can be returned in registers if every element is
5652 integral, or can be returned in an integer register. */
5653 for (field = TYPE_FIELDS (type);
5654 field;
5655 field = DECL_CHAIN (field))
5657 if (TREE_CODE (field) != FIELD_DECL)
5658 continue;
5660 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5661 return true;
5663 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5664 return true;
5667 return false;
5669 #endif /* not ARM_WINCE */
5671 /* Return all other types in memory. */
5672 return true;
5675 const struct pcs_attribute_arg
5677 const char *arg;
5678 enum arm_pcs value;
5679 } pcs_attribute_args[] =
5681 {"aapcs", ARM_PCS_AAPCS},
5682 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5683 #if 0
5684 /* We could recognize these, but changes would be needed elsewhere
5685 * to implement them. */
5686 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5687 {"atpcs", ARM_PCS_ATPCS},
5688 {"apcs", ARM_PCS_APCS},
5689 #endif
5690 {NULL, ARM_PCS_UNKNOWN}
5693 static enum arm_pcs
5694 arm_pcs_from_attribute (tree attr)
5696 const struct pcs_attribute_arg *ptr;
5697 const char *arg;
5699 /* Get the value of the argument. */
5700 if (TREE_VALUE (attr) == NULL_TREE
5701 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5702 return ARM_PCS_UNKNOWN;
5704 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5706 /* Check it against the list of known arguments. */
5707 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5708 if (streq (arg, ptr->arg))
5709 return ptr->value;
5711 /* An unrecognized interrupt type. */
5712 return ARM_PCS_UNKNOWN;
5715 /* Get the PCS variant to use for this call. TYPE is the function's type
5716 specification, DECL is the specific declartion. DECL may be null if
5717 the call could be indirect or if this is a library call. */
5718 static enum arm_pcs
5719 arm_get_pcs_model (const_tree type, const_tree decl)
5721 bool user_convention = false;
5722 enum arm_pcs user_pcs = arm_pcs_default;
5723 tree attr;
5725 gcc_assert (type);
5727 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5728 if (attr)
5730 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5731 user_convention = true;
5734 if (TARGET_AAPCS_BASED)
5736 /* Detect varargs functions. These always use the base rules
5737 (no argument is ever a candidate for a co-processor
5738 register). */
5739 bool base_rules = stdarg_p (type);
5741 if (user_convention)
5743 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5744 sorry ("non-AAPCS derived PCS variant");
5745 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5746 error ("variadic functions must use the base AAPCS variant");
5749 if (base_rules)
5750 return ARM_PCS_AAPCS;
5751 else if (user_convention)
5752 return user_pcs;
5753 else if (decl && flag_unit_at_a_time)
5755 /* Local functions never leak outside this compilation unit,
5756 so we are free to use whatever conventions are
5757 appropriate. */
5758 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5759 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5760 if (i && i->local)
5761 return ARM_PCS_AAPCS_LOCAL;
5764 else if (user_convention && user_pcs != arm_pcs_default)
5765 sorry ("PCS variant");
5767 /* For everything else we use the target's default. */
5768 return arm_pcs_default;
5772 static void
5773 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5774 const_tree fntype ATTRIBUTE_UNUSED,
5775 rtx libcall ATTRIBUTE_UNUSED,
5776 const_tree fndecl ATTRIBUTE_UNUSED)
5778 /* Record the unallocated VFP registers. */
5779 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5780 pcum->aapcs_vfp_reg_alloc = 0;
5783 /* Walk down the type tree of TYPE counting consecutive base elements.
5784 If *MODEP is VOIDmode, then set it to the first valid floating point
5785 type. If a non-floating point type is found, or if a floating point
5786 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5787 otherwise return the count in the sub-tree. */
5788 static int
5789 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5791 machine_mode mode;
5792 HOST_WIDE_INT size;
5794 switch (TREE_CODE (type))
5796 case REAL_TYPE:
5797 mode = TYPE_MODE (type);
5798 if (mode != DFmode && mode != SFmode && mode != HFmode)
5799 return -1;
5801 if (*modep == VOIDmode)
5802 *modep = mode;
5804 if (*modep == mode)
5805 return 1;
5807 break;
5809 case COMPLEX_TYPE:
5810 mode = TYPE_MODE (TREE_TYPE (type));
5811 if (mode != DFmode && mode != SFmode)
5812 return -1;
5814 if (*modep == VOIDmode)
5815 *modep = mode;
5817 if (*modep == mode)
5818 return 2;
5820 break;
5822 case VECTOR_TYPE:
5823 /* Use V2SImode and V4SImode as representatives of all 64-bit
5824 and 128-bit vector types, whether or not those modes are
5825 supported with the present options. */
5826 size = int_size_in_bytes (type);
5827 switch (size)
5829 case 8:
5830 mode = V2SImode;
5831 break;
5832 case 16:
5833 mode = V4SImode;
5834 break;
5835 default:
5836 return -1;
5839 if (*modep == VOIDmode)
5840 *modep = mode;
5842 /* Vector modes are considered to be opaque: two vectors are
5843 equivalent for the purposes of being homogeneous aggregates
5844 if they are the same size. */
5845 if (*modep == mode)
5846 return 1;
5848 break;
5850 case ARRAY_TYPE:
5852 int count;
5853 tree index = TYPE_DOMAIN (type);
5855 /* Can't handle incomplete types nor sizes that are not
5856 fixed. */
5857 if (!COMPLETE_TYPE_P (type)
5858 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5859 return -1;
5861 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5862 if (count == -1
5863 || !index
5864 || !TYPE_MAX_VALUE (index)
5865 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5866 || !TYPE_MIN_VALUE (index)
5867 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5868 || count < 0)
5869 return -1;
5871 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5872 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5874 /* There must be no padding. */
5875 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5876 return -1;
5878 return count;
5881 case RECORD_TYPE:
5883 int count = 0;
5884 int sub_count;
5885 tree field;
5887 /* Can't handle incomplete types nor sizes that are not
5888 fixed. */
5889 if (!COMPLETE_TYPE_P (type)
5890 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5891 return -1;
5893 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5895 if (TREE_CODE (field) != FIELD_DECL)
5896 continue;
5898 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5899 if (sub_count < 0)
5900 return -1;
5901 count += sub_count;
5904 /* There must be no padding. */
5905 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5906 return -1;
5908 return count;
5911 case UNION_TYPE:
5912 case QUAL_UNION_TYPE:
5914 /* These aren't very interesting except in a degenerate case. */
5915 int count = 0;
5916 int sub_count;
5917 tree field;
5919 /* Can't handle incomplete types nor sizes that are not
5920 fixed. */
5921 if (!COMPLETE_TYPE_P (type)
5922 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5923 return -1;
5925 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5927 if (TREE_CODE (field) != FIELD_DECL)
5928 continue;
5930 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5931 if (sub_count < 0)
5932 return -1;
5933 count = count > sub_count ? count : sub_count;
5936 /* There must be no padding. */
5937 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5938 return -1;
5940 return count;
5943 default:
5944 break;
5947 return -1;
5950 /* Return true if PCS_VARIANT should use VFP registers. */
5951 static bool
5952 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5954 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5956 static bool seen_thumb1_vfp = false;
5958 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5960 sorry ("Thumb-1 hard-float VFP ABI");
5961 /* sorry() is not immediately fatal, so only display this once. */
5962 seen_thumb1_vfp = true;
5965 return true;
5968 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5969 return false;
5971 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5972 (TARGET_VFP_DOUBLE || !is_double));
5975 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5976 suitable for passing or returning in VFP registers for the PCS
5977 variant selected. If it is, then *BASE_MODE is updated to contain
5978 a machine mode describing each element of the argument's type and
5979 *COUNT to hold the number of such elements. */
5980 static bool
5981 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5982 machine_mode mode, const_tree type,
5983 machine_mode *base_mode, int *count)
5985 machine_mode new_mode = VOIDmode;
5987 /* If we have the type information, prefer that to working things
5988 out from the mode. */
5989 if (type)
5991 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5993 if (ag_count > 0 && ag_count <= 4)
5994 *count = ag_count;
5995 else
5996 return false;
5998 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5999 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6000 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6002 *count = 1;
6003 new_mode = mode;
6005 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6007 *count = 2;
6008 new_mode = (mode == DCmode ? DFmode : SFmode);
6010 else
6011 return false;
6014 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6015 return false;
6017 *base_mode = new_mode;
6018 return true;
6021 static bool
6022 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6023 machine_mode mode, const_tree type)
6025 int count ATTRIBUTE_UNUSED;
6026 machine_mode ag_mode ATTRIBUTE_UNUSED;
6028 if (!use_vfp_abi (pcs_variant, false))
6029 return false;
6030 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6031 &ag_mode, &count);
6034 static bool
6035 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6036 const_tree type)
6038 if (!use_vfp_abi (pcum->pcs_variant, false))
6039 return false;
6041 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6042 &pcum->aapcs_vfp_rmode,
6043 &pcum->aapcs_vfp_rcount);
6046 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6047 for the behaviour of this function. */
6049 static bool
6050 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6051 const_tree type ATTRIBUTE_UNUSED)
6053 int rmode_size
6054 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6055 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6056 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6057 int regno;
6059 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6060 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6062 pcum->aapcs_vfp_reg_alloc = mask << regno;
6063 if (mode == BLKmode
6064 || (mode == TImode && ! TARGET_NEON)
6065 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6067 int i;
6068 int rcount = pcum->aapcs_vfp_rcount;
6069 int rshift = shift;
6070 machine_mode rmode = pcum->aapcs_vfp_rmode;
6071 rtx par;
6072 if (!TARGET_NEON)
6074 /* Avoid using unsupported vector modes. */
6075 if (rmode == V2SImode)
6076 rmode = DImode;
6077 else if (rmode == V4SImode)
6079 rmode = DImode;
6080 rcount *= 2;
6081 rshift /= 2;
6084 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6085 for (i = 0; i < rcount; i++)
6087 rtx tmp = gen_rtx_REG (rmode,
6088 FIRST_VFP_REGNUM + regno + i * rshift);
6089 tmp = gen_rtx_EXPR_LIST
6090 (VOIDmode, tmp,
6091 GEN_INT (i * GET_MODE_SIZE (rmode)));
6092 XVECEXP (par, 0, i) = tmp;
6095 pcum->aapcs_reg = par;
6097 else
6098 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6099 return true;
6101 return false;
6104 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6105 comment there for the behaviour of this function. */
6107 static rtx
6108 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6109 machine_mode mode,
6110 const_tree type ATTRIBUTE_UNUSED)
6112 if (!use_vfp_abi (pcs_variant, false))
6113 return NULL;
6115 if (mode == BLKmode
6116 || (GET_MODE_CLASS (mode) == MODE_INT
6117 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6118 && !TARGET_NEON))
6120 int count;
6121 machine_mode ag_mode;
6122 int i;
6123 rtx par;
6124 int shift;
6126 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6127 &ag_mode, &count);
6129 if (!TARGET_NEON)
6131 if (ag_mode == V2SImode)
6132 ag_mode = DImode;
6133 else if (ag_mode == V4SImode)
6135 ag_mode = DImode;
6136 count *= 2;
6139 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6140 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6141 for (i = 0; i < count; i++)
6143 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6144 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6145 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6146 XVECEXP (par, 0, i) = tmp;
6149 return par;
6152 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6155 static void
6156 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6157 machine_mode mode ATTRIBUTE_UNUSED,
6158 const_tree type ATTRIBUTE_UNUSED)
6160 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6161 pcum->aapcs_vfp_reg_alloc = 0;
6162 return;
6165 #define AAPCS_CP(X) \
6167 aapcs_ ## X ## _cum_init, \
6168 aapcs_ ## X ## _is_call_candidate, \
6169 aapcs_ ## X ## _allocate, \
6170 aapcs_ ## X ## _is_return_candidate, \
6171 aapcs_ ## X ## _allocate_return_reg, \
6172 aapcs_ ## X ## _advance \
6175 /* Table of co-processors that can be used to pass arguments in
6176 registers. Idealy no arugment should be a candidate for more than
6177 one co-processor table entry, but the table is processed in order
6178 and stops after the first match. If that entry then fails to put
6179 the argument into a co-processor register, the argument will go on
6180 the stack. */
6181 static struct
6183 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6184 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6186 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6187 BLKmode) is a candidate for this co-processor's registers; this
6188 function should ignore any position-dependent state in
6189 CUMULATIVE_ARGS and only use call-type dependent information. */
6190 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6192 /* Return true if the argument does get a co-processor register; it
6193 should set aapcs_reg to an RTX of the register allocated as is
6194 required for a return from FUNCTION_ARG. */
6195 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6197 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6198 be returned in this co-processor's registers. */
6199 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6201 /* Allocate and return an RTX element to hold the return type of a call. This
6202 routine must not fail and will only be called if is_return_candidate
6203 returned true with the same parameters. */
6204 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6206 /* Finish processing this argument and prepare to start processing
6207 the next one. */
6208 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6209 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6211 AAPCS_CP(vfp)
6214 #undef AAPCS_CP
6216 static int
6217 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6218 const_tree type)
6220 int i;
6222 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6223 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6224 return i;
6226 return -1;
6229 static int
6230 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6232 /* We aren't passed a decl, so we can't check that a call is local.
6233 However, it isn't clear that that would be a win anyway, since it
6234 might limit some tail-calling opportunities. */
6235 enum arm_pcs pcs_variant;
6237 if (fntype)
6239 const_tree fndecl = NULL_TREE;
6241 if (TREE_CODE (fntype) == FUNCTION_DECL)
6243 fndecl = fntype;
6244 fntype = TREE_TYPE (fntype);
6247 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6249 else
6250 pcs_variant = arm_pcs_default;
6252 if (pcs_variant != ARM_PCS_AAPCS)
6254 int i;
6256 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6257 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6258 TYPE_MODE (type),
6259 type))
6260 return i;
6262 return -1;
6265 static rtx
6266 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6267 const_tree fntype)
6269 /* We aren't passed a decl, so we can't check that a call is local.
6270 However, it isn't clear that that would be a win anyway, since it
6271 might limit some tail-calling opportunities. */
6272 enum arm_pcs pcs_variant;
6273 int unsignedp ATTRIBUTE_UNUSED;
6275 if (fntype)
6277 const_tree fndecl = NULL_TREE;
6279 if (TREE_CODE (fntype) == FUNCTION_DECL)
6281 fndecl = fntype;
6282 fntype = TREE_TYPE (fntype);
6285 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6287 else
6288 pcs_variant = arm_pcs_default;
6290 /* Promote integer types. */
6291 if (type && INTEGRAL_TYPE_P (type))
6292 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6294 if (pcs_variant != ARM_PCS_AAPCS)
6296 int i;
6298 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6299 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6300 type))
6301 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6302 mode, type);
6305 /* Promotes small structs returned in a register to full-word size
6306 for big-endian AAPCS. */
6307 if (type && arm_return_in_msb (type))
6309 HOST_WIDE_INT size = int_size_in_bytes (type);
6310 if (size % UNITS_PER_WORD != 0)
6312 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6313 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6317 return gen_rtx_REG (mode, R0_REGNUM);
6320 static rtx
6321 aapcs_libcall_value (machine_mode mode)
6323 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6324 && GET_MODE_SIZE (mode) <= 4)
6325 mode = SImode;
6327 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6330 /* Lay out a function argument using the AAPCS rules. The rule
6331 numbers referred to here are those in the AAPCS. */
6332 static void
6333 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6334 const_tree type, bool named)
6336 int nregs, nregs2;
6337 int ncrn;
6339 /* We only need to do this once per argument. */
6340 if (pcum->aapcs_arg_processed)
6341 return;
6343 pcum->aapcs_arg_processed = true;
6345 /* Special case: if named is false then we are handling an incoming
6346 anonymous argument which is on the stack. */
6347 if (!named)
6348 return;
6350 /* Is this a potential co-processor register candidate? */
6351 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6353 int slot = aapcs_select_call_coproc (pcum, mode, type);
6354 pcum->aapcs_cprc_slot = slot;
6356 /* We don't have to apply any of the rules from part B of the
6357 preparation phase, these are handled elsewhere in the
6358 compiler. */
6360 if (slot >= 0)
6362 /* A Co-processor register candidate goes either in its own
6363 class of registers or on the stack. */
6364 if (!pcum->aapcs_cprc_failed[slot])
6366 /* C1.cp - Try to allocate the argument to co-processor
6367 registers. */
6368 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6369 return;
6371 /* C2.cp - Put the argument on the stack and note that we
6372 can't assign any more candidates in this slot. We also
6373 need to note that we have allocated stack space, so that
6374 we won't later try to split a non-cprc candidate between
6375 core registers and the stack. */
6376 pcum->aapcs_cprc_failed[slot] = true;
6377 pcum->can_split = false;
6380 /* We didn't get a register, so this argument goes on the
6381 stack. */
6382 gcc_assert (pcum->can_split == false);
6383 return;
6387 /* C3 - For double-word aligned arguments, round the NCRN up to the
6388 next even number. */
6389 ncrn = pcum->aapcs_ncrn;
6390 if (ncrn & 1)
6392 int res = arm_needs_doubleword_align (mode, type);
6393 /* Only warn during RTL expansion of call stmts, otherwise we would
6394 warn e.g. during gimplification even on functions that will be
6395 always inlined, and we'd warn multiple times. Don't warn when
6396 called in expand_function_start either, as we warn instead in
6397 arm_function_arg_boundary in that case. */
6398 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6399 inform (input_location, "parameter passing for argument of type "
6400 "%qT changed in GCC 7.1", type);
6401 else if (res > 0)
6402 ncrn++;
6405 nregs = ARM_NUM_REGS2(mode, type);
6407 /* Sigh, this test should really assert that nregs > 0, but a GCC
6408 extension allows empty structs and then gives them empty size; it
6409 then allows such a structure to be passed by value. For some of
6410 the code below we have to pretend that such an argument has
6411 non-zero size so that we 'locate' it correctly either in
6412 registers or on the stack. */
6413 gcc_assert (nregs >= 0);
6415 nregs2 = nregs ? nregs : 1;
6417 /* C4 - Argument fits entirely in core registers. */
6418 if (ncrn + nregs2 <= NUM_ARG_REGS)
6420 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6421 pcum->aapcs_next_ncrn = ncrn + nregs;
6422 return;
6425 /* C5 - Some core registers left and there are no arguments already
6426 on the stack: split this argument between the remaining core
6427 registers and the stack. */
6428 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6430 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6431 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6432 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6433 return;
6436 /* C6 - NCRN is set to 4. */
6437 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6439 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6440 return;
6443 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6444 for a call to a function whose data type is FNTYPE.
6445 For a library call, FNTYPE is NULL. */
6446 void
6447 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6448 rtx libname,
6449 tree fndecl ATTRIBUTE_UNUSED)
6451 /* Long call handling. */
6452 if (fntype)
6453 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6454 else
6455 pcum->pcs_variant = arm_pcs_default;
6457 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6459 if (arm_libcall_uses_aapcs_base (libname))
6460 pcum->pcs_variant = ARM_PCS_AAPCS;
6462 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6463 pcum->aapcs_reg = NULL_RTX;
6464 pcum->aapcs_partial = 0;
6465 pcum->aapcs_arg_processed = false;
6466 pcum->aapcs_cprc_slot = -1;
6467 pcum->can_split = true;
6469 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6471 int i;
6473 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6475 pcum->aapcs_cprc_failed[i] = false;
6476 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6479 return;
6482 /* Legacy ABIs */
6484 /* On the ARM, the offset starts at 0. */
6485 pcum->nregs = 0;
6486 pcum->iwmmxt_nregs = 0;
6487 pcum->can_split = true;
6489 /* Varargs vectors are treated the same as long long.
6490 named_count avoids having to change the way arm handles 'named' */
6491 pcum->named_count = 0;
6492 pcum->nargs = 0;
6494 if (TARGET_REALLY_IWMMXT && fntype)
6496 tree fn_arg;
6498 for (fn_arg = TYPE_ARG_TYPES (fntype);
6499 fn_arg;
6500 fn_arg = TREE_CHAIN (fn_arg))
6501 pcum->named_count += 1;
6503 if (! pcum->named_count)
6504 pcum->named_count = INT_MAX;
6508 /* Return 1 if double word alignment is required for argument passing.
6509 Return -1 if double word alignment used to be required for argument
6510 passing before PR77728 ABI fix, but is not required anymore.
6511 Return 0 if double word alignment is not required and wasn't requried
6512 before either. */
6513 static int
6514 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6516 if (!type)
6517 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6519 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6520 if (!AGGREGATE_TYPE_P (type))
6521 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6523 /* Array types: Use member alignment of element type. */
6524 if (TREE_CODE (type) == ARRAY_TYPE)
6525 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6527 int ret = 0;
6528 /* Record/aggregate types: Use greatest member alignment of any member. */
6529 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6530 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6532 if (TREE_CODE (field) == FIELD_DECL)
6533 return 1;
6534 else
6535 /* Before PR77728 fix, we were incorrectly considering also
6536 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6537 Make sure we can warn about that with -Wpsabi. */
6538 ret = -1;
6541 return ret;
6545 /* Determine where to put an argument to a function.
6546 Value is zero to push the argument on the stack,
6547 or a hard register in which to store the argument.
6549 MODE is the argument's machine mode.
6550 TYPE is the data type of the argument (as a tree).
6551 This is null for libcalls where that information may
6552 not be available.
6553 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6554 the preceding args and about the function being called.
6555 NAMED is nonzero if this argument is a named parameter
6556 (otherwise it is an extra parameter matching an ellipsis).
6558 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6559 other arguments are passed on the stack. If (NAMED == 0) (which happens
6560 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6561 defined), say it is passed in the stack (function_prologue will
6562 indeed make it pass in the stack if necessary). */
6564 static rtx
6565 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6566 const_tree type, bool named)
6568 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6569 int nregs;
6571 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6572 a call insn (op3 of a call_value insn). */
6573 if (mode == VOIDmode)
6574 return const0_rtx;
6576 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6578 aapcs_layout_arg (pcum, mode, type, named);
6579 return pcum->aapcs_reg;
6582 /* Varargs vectors are treated the same as long long.
6583 named_count avoids having to change the way arm handles 'named' */
6584 if (TARGET_IWMMXT_ABI
6585 && arm_vector_mode_supported_p (mode)
6586 && pcum->named_count > pcum->nargs + 1)
6588 if (pcum->iwmmxt_nregs <= 9)
6589 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6590 else
6592 pcum->can_split = false;
6593 return NULL_RTX;
6597 /* Put doubleword aligned quantities in even register pairs. */
6598 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6600 int res = arm_needs_doubleword_align (mode, type);
6601 if (res < 0 && warn_psabi)
6602 inform (input_location, "parameter passing for argument of type "
6603 "%qT changed in GCC 7.1", type);
6604 else if (res > 0)
6605 pcum->nregs++;
6608 /* Only allow splitting an arg between regs and memory if all preceding
6609 args were allocated to regs. For args passed by reference we only count
6610 the reference pointer. */
6611 if (pcum->can_split)
6612 nregs = 1;
6613 else
6614 nregs = ARM_NUM_REGS2 (mode, type);
6616 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6617 return NULL_RTX;
6619 return gen_rtx_REG (mode, pcum->nregs);
6622 static unsigned int
6623 arm_function_arg_boundary (machine_mode mode, const_tree type)
6625 if (!ARM_DOUBLEWORD_ALIGN)
6626 return PARM_BOUNDARY;
6628 int res = arm_needs_doubleword_align (mode, type);
6629 if (res < 0 && warn_psabi)
6630 inform (input_location, "parameter passing for argument of type %qT "
6631 "changed in GCC 7.1", type);
6633 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6636 static int
6637 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6638 tree type, bool named)
6640 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6641 int nregs = pcum->nregs;
6643 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6645 aapcs_layout_arg (pcum, mode, type, named);
6646 return pcum->aapcs_partial;
6649 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6650 return 0;
6652 if (NUM_ARG_REGS > nregs
6653 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6654 && pcum->can_split)
6655 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6657 return 0;
6660 /* Update the data in PCUM to advance over an argument
6661 of mode MODE and data type TYPE.
6662 (TYPE is null for libcalls where that information may not be available.) */
6664 static void
6665 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6666 const_tree type, bool named)
6668 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6670 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6672 aapcs_layout_arg (pcum, mode, type, named);
6674 if (pcum->aapcs_cprc_slot >= 0)
6676 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6677 type);
6678 pcum->aapcs_cprc_slot = -1;
6681 /* Generic stuff. */
6682 pcum->aapcs_arg_processed = false;
6683 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6684 pcum->aapcs_reg = NULL_RTX;
6685 pcum->aapcs_partial = 0;
6687 else
6689 pcum->nargs += 1;
6690 if (arm_vector_mode_supported_p (mode)
6691 && pcum->named_count > pcum->nargs
6692 && TARGET_IWMMXT_ABI)
6693 pcum->iwmmxt_nregs += 1;
6694 else
6695 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6699 /* Variable sized types are passed by reference. This is a GCC
6700 extension to the ARM ABI. */
6702 static bool
6703 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6704 machine_mode mode ATTRIBUTE_UNUSED,
6705 const_tree type, bool named ATTRIBUTE_UNUSED)
6707 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6710 /* Encode the current state of the #pragma [no_]long_calls. */
6711 typedef enum
6713 OFF, /* No #pragma [no_]long_calls is in effect. */
6714 LONG, /* #pragma long_calls is in effect. */
6715 SHORT /* #pragma no_long_calls is in effect. */
6716 } arm_pragma_enum;
6718 static arm_pragma_enum arm_pragma_long_calls = OFF;
6720 void
6721 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6723 arm_pragma_long_calls = LONG;
6726 void
6727 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6729 arm_pragma_long_calls = SHORT;
6732 void
6733 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6735 arm_pragma_long_calls = OFF;
6738 /* Handle an attribute requiring a FUNCTION_DECL;
6739 arguments as in struct attribute_spec.handler. */
6740 static tree
6741 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6742 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6744 if (TREE_CODE (*node) != FUNCTION_DECL)
6746 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6747 name);
6748 *no_add_attrs = true;
6751 return NULL_TREE;
6754 /* Handle an "interrupt" or "isr" attribute;
6755 arguments as in struct attribute_spec.handler. */
6756 static tree
6757 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6758 bool *no_add_attrs)
6760 if (DECL_P (*node))
6762 if (TREE_CODE (*node) != FUNCTION_DECL)
6764 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6765 name);
6766 *no_add_attrs = true;
6768 /* FIXME: the argument if any is checked for type attributes;
6769 should it be checked for decl ones? */
6771 else
6773 if (TREE_CODE (*node) == FUNCTION_TYPE
6774 || TREE_CODE (*node) == METHOD_TYPE)
6776 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6778 warning (OPT_Wattributes, "%qE attribute ignored",
6779 name);
6780 *no_add_attrs = true;
6783 else if (TREE_CODE (*node) == POINTER_TYPE
6784 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6785 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6786 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6788 *node = build_variant_type_copy (*node);
6789 TREE_TYPE (*node) = build_type_attribute_variant
6790 (TREE_TYPE (*node),
6791 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6792 *no_add_attrs = true;
6794 else
6796 /* Possibly pass this attribute on from the type to a decl. */
6797 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6798 | (int) ATTR_FLAG_FUNCTION_NEXT
6799 | (int) ATTR_FLAG_ARRAY_NEXT))
6801 *no_add_attrs = true;
6802 return tree_cons (name, args, NULL_TREE);
6804 else
6806 warning (OPT_Wattributes, "%qE attribute ignored",
6807 name);
6812 return NULL_TREE;
6815 /* Handle a "pcs" attribute; arguments as in struct
6816 attribute_spec.handler. */
6817 static tree
6818 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6819 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6821 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6823 warning (OPT_Wattributes, "%qE attribute ignored", name);
6824 *no_add_attrs = true;
6826 return NULL_TREE;
6829 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6830 /* Handle the "notshared" attribute. This attribute is another way of
6831 requesting hidden visibility. ARM's compiler supports
6832 "__declspec(notshared)"; we support the same thing via an
6833 attribute. */
6835 static tree
6836 arm_handle_notshared_attribute (tree *node,
6837 tree name ATTRIBUTE_UNUSED,
6838 tree args ATTRIBUTE_UNUSED,
6839 int flags ATTRIBUTE_UNUSED,
6840 bool *no_add_attrs)
6842 tree decl = TYPE_NAME (*node);
6844 if (decl)
6846 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6847 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6848 *no_add_attrs = false;
6850 return NULL_TREE;
6852 #endif
6854 /* This function returns true if a function with declaration FNDECL and type
6855 FNTYPE uses the stack to pass arguments or return variables and false
6856 otherwise. This is used for functions with the attributes
6857 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6858 diagnostic messages if the stack is used. NAME is the name of the attribute
6859 used. */
6861 static bool
6862 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6864 function_args_iterator args_iter;
6865 CUMULATIVE_ARGS args_so_far_v;
6866 cumulative_args_t args_so_far;
6867 bool first_param = true;
6868 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6870 /* Error out if any argument is passed on the stack. */
6871 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6872 args_so_far = pack_cumulative_args (&args_so_far_v);
6873 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6875 rtx arg_rtx;
6876 machine_mode arg_mode = TYPE_MODE (arg_type);
6878 prev_arg_type = arg_type;
6879 if (VOID_TYPE_P (arg_type))
6880 continue;
6882 if (!first_param)
6883 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6884 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6885 if (!arg_rtx
6886 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6888 error ("%qE attribute not available to functions with arguments "
6889 "passed on the stack", name);
6890 return true;
6892 first_param = false;
6895 /* Error out for variadic functions since we cannot control how many
6896 arguments will be passed and thus stack could be used. stdarg_p () is not
6897 used for the checking to avoid browsing arguments twice. */
6898 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6900 error ("%qE attribute not available to functions with variable number "
6901 "of arguments", name);
6902 return true;
6905 /* Error out if return value is passed on the stack. */
6906 ret_type = TREE_TYPE (fntype);
6907 if (arm_return_in_memory (ret_type, fntype))
6909 error ("%qE attribute not available to functions that return value on "
6910 "the stack", name);
6911 return true;
6913 return false;
6916 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6917 function will check whether the attribute is allowed here and will add the
6918 attribute to the function declaration tree or otherwise issue a warning. */
6920 static tree
6921 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6922 tree /* args */,
6923 int /* flags */,
6924 bool *no_add_attrs)
6926 tree fndecl;
6928 if (!use_cmse)
6930 *no_add_attrs = true;
6931 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6932 name);
6933 return NULL_TREE;
6936 /* Ignore attribute for function types. */
6937 if (TREE_CODE (*node) != FUNCTION_DECL)
6939 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6940 name);
6941 *no_add_attrs = true;
6942 return NULL_TREE;
6945 fndecl = *node;
6947 /* Warn for static linkage functions. */
6948 if (!TREE_PUBLIC (fndecl))
6950 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6951 "with static linkage", name);
6952 *no_add_attrs = true;
6953 return NULL_TREE;
6956 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6957 TREE_TYPE (fndecl));
6958 return NULL_TREE;
6962 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6963 function will check whether the attribute is allowed here and will add the
6964 attribute to the function type tree or otherwise issue a diagnostic. The
6965 reason we check this at declaration time is to only allow the use of the
6966 attribute with declarations of function pointers and not function
6967 declarations. This function checks NODE is of the expected type and issues
6968 diagnostics otherwise using NAME. If it is not of the expected type
6969 *NO_ADD_ATTRS will be set to true. */
6971 static tree
6972 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6973 tree /* args */,
6974 int /* flags */,
6975 bool *no_add_attrs)
6977 tree decl = NULL_TREE, fntype = NULL_TREE;
6978 tree type;
6980 if (!use_cmse)
6982 *no_add_attrs = true;
6983 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6984 name);
6985 return NULL_TREE;
6988 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6990 decl = *node;
6991 fntype = TREE_TYPE (decl);
6994 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6995 fntype = TREE_TYPE (fntype);
6997 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6999 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7000 "function pointer", name);
7001 *no_add_attrs = true;
7002 return NULL_TREE;
7005 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7007 if (*no_add_attrs)
7008 return NULL_TREE;
7010 /* Prevent trees being shared among function types with and without
7011 cmse_nonsecure_call attribute. */
7012 type = TREE_TYPE (decl);
7014 type = build_distinct_type_copy (type);
7015 TREE_TYPE (decl) = type;
7016 fntype = type;
7018 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7020 type = fntype;
7021 fntype = TREE_TYPE (fntype);
7022 fntype = build_distinct_type_copy (fntype);
7023 TREE_TYPE (type) = fntype;
7026 /* Construct a type attribute and add it to the function type. */
7027 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7028 TYPE_ATTRIBUTES (fntype));
7029 TYPE_ATTRIBUTES (fntype) = attrs;
7030 return NULL_TREE;
7033 /* Return 0 if the attributes for two types are incompatible, 1 if they
7034 are compatible, and 2 if they are nearly compatible (which causes a
7035 warning to be generated). */
7036 static int
7037 arm_comp_type_attributes (const_tree type1, const_tree type2)
7039 int l1, l2, s1, s2;
7041 /* Check for mismatch of non-default calling convention. */
7042 if (TREE_CODE (type1) != FUNCTION_TYPE)
7043 return 1;
7045 /* Check for mismatched call attributes. */
7046 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7047 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7048 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7049 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7051 /* Only bother to check if an attribute is defined. */
7052 if (l1 | l2 | s1 | s2)
7054 /* If one type has an attribute, the other must have the same attribute. */
7055 if ((l1 != l2) || (s1 != s2))
7056 return 0;
7058 /* Disallow mixed attributes. */
7059 if ((l1 & s2) || (l2 & s1))
7060 return 0;
7063 /* Check for mismatched ISR attribute. */
7064 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7065 if (! l1)
7066 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7067 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7068 if (! l2)
7069 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7070 if (l1 != l2)
7071 return 0;
7073 l1 = lookup_attribute ("cmse_nonsecure_call",
7074 TYPE_ATTRIBUTES (type1)) != NULL;
7075 l2 = lookup_attribute ("cmse_nonsecure_call",
7076 TYPE_ATTRIBUTES (type2)) != NULL;
7078 if (l1 != l2)
7079 return 0;
7081 return 1;
7084 /* Assigns default attributes to newly defined type. This is used to
7085 set short_call/long_call attributes for function types of
7086 functions defined inside corresponding #pragma scopes. */
7087 static void
7088 arm_set_default_type_attributes (tree type)
7090 /* Add __attribute__ ((long_call)) to all functions, when
7091 inside #pragma long_calls or __attribute__ ((short_call)),
7092 when inside #pragma no_long_calls. */
7093 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7095 tree type_attr_list, attr_name;
7096 type_attr_list = TYPE_ATTRIBUTES (type);
7098 if (arm_pragma_long_calls == LONG)
7099 attr_name = get_identifier ("long_call");
7100 else if (arm_pragma_long_calls == SHORT)
7101 attr_name = get_identifier ("short_call");
7102 else
7103 return;
7105 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7106 TYPE_ATTRIBUTES (type) = type_attr_list;
7110 /* Return true if DECL is known to be linked into section SECTION. */
7112 static bool
7113 arm_function_in_section_p (tree decl, section *section)
7115 /* We can only be certain about the prevailing symbol definition. */
7116 if (!decl_binds_to_current_def_p (decl))
7117 return false;
7119 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7120 if (!DECL_SECTION_NAME (decl))
7122 /* Make sure that we will not create a unique section for DECL. */
7123 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7124 return false;
7127 return function_section (decl) == section;
7130 /* Return nonzero if a 32-bit "long_call" should be generated for
7131 a call from the current function to DECL. We generate a long_call
7132 if the function:
7134 a. has an __attribute__((long call))
7135 or b. is within the scope of a #pragma long_calls
7136 or c. the -mlong-calls command line switch has been specified
7138 However we do not generate a long call if the function:
7140 d. has an __attribute__ ((short_call))
7141 or e. is inside the scope of a #pragma no_long_calls
7142 or f. is defined in the same section as the current function. */
7144 bool
7145 arm_is_long_call_p (tree decl)
7147 tree attrs;
7149 if (!decl)
7150 return TARGET_LONG_CALLS;
7152 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7153 if (lookup_attribute ("short_call", attrs))
7154 return false;
7156 /* For "f", be conservative, and only cater for cases in which the
7157 whole of the current function is placed in the same section. */
7158 if (!flag_reorder_blocks_and_partition
7159 && TREE_CODE (decl) == FUNCTION_DECL
7160 && arm_function_in_section_p (decl, current_function_section ()))
7161 return false;
7163 if (lookup_attribute ("long_call", attrs))
7164 return true;
7166 return TARGET_LONG_CALLS;
7169 /* Return nonzero if it is ok to make a tail-call to DECL. */
7170 static bool
7171 arm_function_ok_for_sibcall (tree decl, tree exp)
7173 unsigned long func_type;
7175 if (cfun->machine->sibcall_blocked)
7176 return false;
7178 /* Never tailcall something if we are generating code for Thumb-1. */
7179 if (TARGET_THUMB1)
7180 return false;
7182 /* The PIC register is live on entry to VxWorks PLT entries, so we
7183 must make the call before restoring the PIC register. */
7184 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7185 return false;
7187 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7188 may be used both as target of the call and base register for restoring
7189 the VFP registers */
7190 if (TARGET_APCS_FRAME && TARGET_ARM
7191 && TARGET_HARD_FLOAT
7192 && decl && arm_is_long_call_p (decl))
7193 return false;
7195 /* If we are interworking and the function is not declared static
7196 then we can't tail-call it unless we know that it exists in this
7197 compilation unit (since it might be a Thumb routine). */
7198 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7199 && !TREE_ASM_WRITTEN (decl))
7200 return false;
7202 func_type = arm_current_func_type ();
7203 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7204 if (IS_INTERRUPT (func_type))
7205 return false;
7207 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7208 generated for entry functions themselves. */
7209 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7210 return false;
7212 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7213 this would complicate matters for later code generation. */
7214 if (TREE_CODE (exp) == CALL_EXPR)
7216 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7217 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7218 return false;
7221 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7223 /* Check that the return value locations are the same. For
7224 example that we aren't returning a value from the sibling in
7225 a VFP register but then need to transfer it to a core
7226 register. */
7227 rtx a, b;
7228 tree decl_or_type = decl;
7230 /* If it is an indirect function pointer, get the function type. */
7231 if (!decl)
7232 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7234 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7235 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7236 cfun->decl, false);
7237 if (!rtx_equal_p (a, b))
7238 return false;
7241 /* Never tailcall if function may be called with a misaligned SP. */
7242 if (IS_STACKALIGN (func_type))
7243 return false;
7245 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7246 references should become a NOP. Don't convert such calls into
7247 sibling calls. */
7248 if (TARGET_AAPCS_BASED
7249 && arm_abi == ARM_ABI_AAPCS
7250 && decl
7251 && DECL_WEAK (decl))
7252 return false;
7254 /* We cannot do a tailcall for an indirect call by descriptor if all the
7255 argument registers are used because the only register left to load the
7256 address is IP and it will already contain the static chain. */
7257 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7259 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7260 CUMULATIVE_ARGS cum;
7261 cumulative_args_t cum_v;
7263 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7264 cum_v = pack_cumulative_args (&cum);
7266 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7268 tree type = TREE_VALUE (t);
7269 if (!VOID_TYPE_P (type))
7270 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7273 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7274 return false;
7277 /* Everything else is ok. */
7278 return true;
7282 /* Addressing mode support functions. */
7284 /* Return nonzero if X is a legitimate immediate operand when compiling
7285 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7287 legitimate_pic_operand_p (rtx x)
7289 if (GET_CODE (x) == SYMBOL_REF
7290 || (GET_CODE (x) == CONST
7291 && GET_CODE (XEXP (x, 0)) == PLUS
7292 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7293 return 0;
7295 return 1;
7298 /* Record that the current function needs a PIC register. Initialize
7299 cfun->machine->pic_reg if we have not already done so. */
7301 static void
7302 require_pic_register (void)
7304 /* A lot of the logic here is made obscure by the fact that this
7305 routine gets called as part of the rtx cost estimation process.
7306 We don't want those calls to affect any assumptions about the real
7307 function; and further, we can't call entry_of_function() until we
7308 start the real expansion process. */
7309 if (!crtl->uses_pic_offset_table)
7311 gcc_assert (can_create_pseudo_p ());
7312 if (arm_pic_register != INVALID_REGNUM
7313 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7315 if (!cfun->machine->pic_reg)
7316 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7318 /* Play games to avoid marking the function as needing pic
7319 if we are being called as part of the cost-estimation
7320 process. */
7321 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7322 crtl->uses_pic_offset_table = 1;
7324 else
7326 rtx_insn *seq, *insn;
7328 if (!cfun->machine->pic_reg)
7329 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7331 /* Play games to avoid marking the function as needing pic
7332 if we are being called as part of the cost-estimation
7333 process. */
7334 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7336 crtl->uses_pic_offset_table = 1;
7337 start_sequence ();
7339 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7340 && arm_pic_register > LAST_LO_REGNUM)
7341 emit_move_insn (cfun->machine->pic_reg,
7342 gen_rtx_REG (Pmode, arm_pic_register));
7343 else
7344 arm_load_pic_register (0UL);
7346 seq = get_insns ();
7347 end_sequence ();
7349 for (insn = seq; insn; insn = NEXT_INSN (insn))
7350 if (INSN_P (insn))
7351 INSN_LOCATION (insn) = prologue_location;
7353 /* We can be called during expansion of PHI nodes, where
7354 we can't yet emit instructions directly in the final
7355 insn stream. Queue the insns on the entry edge, they will
7356 be committed after everything else is expanded. */
7357 insert_insn_on_edge (seq,
7358 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7365 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7367 if (GET_CODE (orig) == SYMBOL_REF
7368 || GET_CODE (orig) == LABEL_REF)
7370 if (reg == 0)
7372 gcc_assert (can_create_pseudo_p ());
7373 reg = gen_reg_rtx (Pmode);
7376 /* VxWorks does not impose a fixed gap between segments; the run-time
7377 gap can be different from the object-file gap. We therefore can't
7378 use GOTOFF unless we are absolutely sure that the symbol is in the
7379 same segment as the GOT. Unfortunately, the flexibility of linker
7380 scripts means that we can't be sure of that in general, so assume
7381 that GOTOFF is never valid on VxWorks. */
7382 /* References to weak symbols cannot be resolved locally: they
7383 may be overridden by a non-weak definition at link time. */
7384 rtx_insn *insn;
7385 if ((GET_CODE (orig) == LABEL_REF
7386 || (GET_CODE (orig) == SYMBOL_REF
7387 && SYMBOL_REF_LOCAL_P (orig)
7388 && (SYMBOL_REF_DECL (orig)
7389 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7390 && NEED_GOT_RELOC
7391 && arm_pic_data_is_text_relative)
7392 insn = arm_pic_static_addr (orig, reg);
7393 else
7395 rtx pat;
7396 rtx mem;
7398 /* If this function doesn't have a pic register, create one now. */
7399 require_pic_register ();
7401 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7403 /* Make the MEM as close to a constant as possible. */
7404 mem = SET_SRC (pat);
7405 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7406 MEM_READONLY_P (mem) = 1;
7407 MEM_NOTRAP_P (mem) = 1;
7409 insn = emit_insn (pat);
7412 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7413 by loop. */
7414 set_unique_reg_note (insn, REG_EQUAL, orig);
7416 return reg;
7418 else if (GET_CODE (orig) == CONST)
7420 rtx base, offset;
7422 if (GET_CODE (XEXP (orig, 0)) == PLUS
7423 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7424 return orig;
7426 /* Handle the case where we have: const (UNSPEC_TLS). */
7427 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7428 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7429 return orig;
7431 /* Handle the case where we have:
7432 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7433 CONST_INT. */
7434 if (GET_CODE (XEXP (orig, 0)) == PLUS
7435 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7436 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7438 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7439 return orig;
7442 if (reg == 0)
7444 gcc_assert (can_create_pseudo_p ());
7445 reg = gen_reg_rtx (Pmode);
7448 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7450 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7451 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7452 base == reg ? 0 : reg);
7454 if (CONST_INT_P (offset))
7456 /* The base register doesn't really matter, we only want to
7457 test the index for the appropriate mode. */
7458 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7460 gcc_assert (can_create_pseudo_p ());
7461 offset = force_reg (Pmode, offset);
7464 if (CONST_INT_P (offset))
7465 return plus_constant (Pmode, base, INTVAL (offset));
7468 if (GET_MODE_SIZE (mode) > 4
7469 && (GET_MODE_CLASS (mode) == MODE_INT
7470 || TARGET_SOFT_FLOAT))
7472 emit_insn (gen_addsi3 (reg, base, offset));
7473 return reg;
7476 return gen_rtx_PLUS (Pmode, base, offset);
7479 return orig;
7483 /* Find a spare register to use during the prolog of a function. */
7485 static int
7486 thumb_find_work_register (unsigned long pushed_regs_mask)
7488 int reg;
7490 /* Check the argument registers first as these are call-used. The
7491 register allocation order means that sometimes r3 might be used
7492 but earlier argument registers might not, so check them all. */
7493 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7494 if (!df_regs_ever_live_p (reg))
7495 return reg;
7497 /* Before going on to check the call-saved registers we can try a couple
7498 more ways of deducing that r3 is available. The first is when we are
7499 pushing anonymous arguments onto the stack and we have less than 4
7500 registers worth of fixed arguments(*). In this case r3 will be part of
7501 the variable argument list and so we can be sure that it will be
7502 pushed right at the start of the function. Hence it will be available
7503 for the rest of the prologue.
7504 (*): ie crtl->args.pretend_args_size is greater than 0. */
7505 if (cfun->machine->uses_anonymous_args
7506 && crtl->args.pretend_args_size > 0)
7507 return LAST_ARG_REGNUM;
7509 /* The other case is when we have fixed arguments but less than 4 registers
7510 worth. In this case r3 might be used in the body of the function, but
7511 it is not being used to convey an argument into the function. In theory
7512 we could just check crtl->args.size to see how many bytes are
7513 being passed in argument registers, but it seems that it is unreliable.
7514 Sometimes it will have the value 0 when in fact arguments are being
7515 passed. (See testcase execute/20021111-1.c for an example). So we also
7516 check the args_info.nregs field as well. The problem with this field is
7517 that it makes no allowances for arguments that are passed to the
7518 function but which are not used. Hence we could miss an opportunity
7519 when a function has an unused argument in r3. But it is better to be
7520 safe than to be sorry. */
7521 if (! cfun->machine->uses_anonymous_args
7522 && crtl->args.size >= 0
7523 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7524 && (TARGET_AAPCS_BASED
7525 ? crtl->args.info.aapcs_ncrn < 4
7526 : crtl->args.info.nregs < 4))
7527 return LAST_ARG_REGNUM;
7529 /* Otherwise look for a call-saved register that is going to be pushed. */
7530 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7531 if (pushed_regs_mask & (1 << reg))
7532 return reg;
7534 if (TARGET_THUMB2)
7536 /* Thumb-2 can use high regs. */
7537 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7538 if (pushed_regs_mask & (1 << reg))
7539 return reg;
7541 /* Something went wrong - thumb_compute_save_reg_mask()
7542 should have arranged for a suitable register to be pushed. */
7543 gcc_unreachable ();
7546 static GTY(()) int pic_labelno;
7548 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7549 low register. */
7551 void
7552 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7554 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7556 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7557 return;
7559 gcc_assert (flag_pic);
7561 pic_reg = cfun->machine->pic_reg;
7562 if (TARGET_VXWORKS_RTP)
7564 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7565 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7566 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7568 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7570 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7571 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7573 else
7575 /* We use an UNSPEC rather than a LABEL_REF because this label
7576 never appears in the code stream. */
7578 labelno = GEN_INT (pic_labelno++);
7579 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7580 l1 = gen_rtx_CONST (VOIDmode, l1);
7582 /* On the ARM the PC register contains 'dot + 8' at the time of the
7583 addition, on the Thumb it is 'dot + 4'. */
7584 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7585 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7586 UNSPEC_GOTSYM_OFF);
7587 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7589 if (TARGET_32BIT)
7591 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7593 else /* TARGET_THUMB1 */
7595 if (arm_pic_register != INVALID_REGNUM
7596 && REGNO (pic_reg) > LAST_LO_REGNUM)
7598 /* We will have pushed the pic register, so we should always be
7599 able to find a work register. */
7600 pic_tmp = gen_rtx_REG (SImode,
7601 thumb_find_work_register (saved_regs));
7602 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7603 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7604 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7606 else if (arm_pic_register != INVALID_REGNUM
7607 && arm_pic_register > LAST_LO_REGNUM
7608 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7610 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7611 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7612 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7614 else
7615 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7619 /* Need to emit this whether or not we obey regdecls,
7620 since setjmp/longjmp can cause life info to screw up. */
7621 emit_use (pic_reg);
7624 /* Generate code to load the address of a static var when flag_pic is set. */
7625 static rtx_insn *
7626 arm_pic_static_addr (rtx orig, rtx reg)
7628 rtx l1, labelno, offset_rtx;
7630 gcc_assert (flag_pic);
7632 /* We use an UNSPEC rather than a LABEL_REF because this label
7633 never appears in the code stream. */
7634 labelno = GEN_INT (pic_labelno++);
7635 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7636 l1 = gen_rtx_CONST (VOIDmode, l1);
7638 /* On the ARM the PC register contains 'dot + 8' at the time of the
7639 addition, on the Thumb it is 'dot + 4'. */
7640 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7641 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7642 UNSPEC_SYMBOL_OFFSET);
7643 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7645 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7648 /* Return nonzero if X is valid as an ARM state addressing register. */
7649 static int
7650 arm_address_register_rtx_p (rtx x, int strict_p)
7652 int regno;
7654 if (!REG_P (x))
7655 return 0;
7657 regno = REGNO (x);
7659 if (strict_p)
7660 return ARM_REGNO_OK_FOR_BASE_P (regno);
7662 return (regno <= LAST_ARM_REGNUM
7663 || regno >= FIRST_PSEUDO_REGISTER
7664 || regno == FRAME_POINTER_REGNUM
7665 || regno == ARG_POINTER_REGNUM);
7668 /* Return TRUE if this rtx is the difference of a symbol and a label,
7669 and will reduce to a PC-relative relocation in the object file.
7670 Expressions like this can be left alone when generating PIC, rather
7671 than forced through the GOT. */
7672 static int
7673 pcrel_constant_p (rtx x)
7675 if (GET_CODE (x) == MINUS)
7676 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7678 return FALSE;
7681 /* Return true if X will surely end up in an index register after next
7682 splitting pass. */
7683 static bool
7684 will_be_in_index_register (const_rtx x)
7686 /* arm.md: calculate_pic_address will split this into a register. */
7687 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7690 /* Return nonzero if X is a valid ARM state address operand. */
7692 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7693 int strict_p)
7695 bool use_ldrd;
7696 enum rtx_code code = GET_CODE (x);
7698 if (arm_address_register_rtx_p (x, strict_p))
7699 return 1;
7701 use_ldrd = (TARGET_LDRD
7702 && (mode == DImode || mode == DFmode));
7704 if (code == POST_INC || code == PRE_DEC
7705 || ((code == PRE_INC || code == POST_DEC)
7706 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7707 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7709 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7710 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7711 && GET_CODE (XEXP (x, 1)) == PLUS
7712 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7714 rtx addend = XEXP (XEXP (x, 1), 1);
7716 /* Don't allow ldrd post increment by register because it's hard
7717 to fixup invalid register choices. */
7718 if (use_ldrd
7719 && GET_CODE (x) == POST_MODIFY
7720 && REG_P (addend))
7721 return 0;
7723 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7724 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7727 /* After reload constants split into minipools will have addresses
7728 from a LABEL_REF. */
7729 else if (reload_completed
7730 && (code == LABEL_REF
7731 || (code == CONST
7732 && GET_CODE (XEXP (x, 0)) == PLUS
7733 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7734 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7735 return 1;
7737 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7738 return 0;
7740 else if (code == PLUS)
7742 rtx xop0 = XEXP (x, 0);
7743 rtx xop1 = XEXP (x, 1);
7745 return ((arm_address_register_rtx_p (xop0, strict_p)
7746 && ((CONST_INT_P (xop1)
7747 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7748 || (!strict_p && will_be_in_index_register (xop1))))
7749 || (arm_address_register_rtx_p (xop1, strict_p)
7750 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7753 #if 0
7754 /* Reload currently can't handle MINUS, so disable this for now */
7755 else if (GET_CODE (x) == MINUS)
7757 rtx xop0 = XEXP (x, 0);
7758 rtx xop1 = XEXP (x, 1);
7760 return (arm_address_register_rtx_p (xop0, strict_p)
7761 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7763 #endif
7765 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7766 && code == SYMBOL_REF
7767 && CONSTANT_POOL_ADDRESS_P (x)
7768 && ! (flag_pic
7769 && symbol_mentioned_p (get_pool_constant (x))
7770 && ! pcrel_constant_p (get_pool_constant (x))))
7771 return 1;
7773 return 0;
7776 /* Return true if we can avoid creating a constant pool entry for x. */
7777 static bool
7778 can_avoid_literal_pool_for_label_p (rtx x)
7780 /* Normally we can assign constant values to target registers without
7781 the help of constant pool. But there are cases we have to use constant
7782 pool like:
7783 1) assign a label to register.
7784 2) sign-extend a 8bit value to 32bit and then assign to register.
7786 Constant pool access in format:
7787 (set (reg r0) (mem (symbol_ref (".LC0"))))
7788 will cause the use of literal pool (later in function arm_reorg).
7789 So here we mark such format as an invalid format, then the compiler
7790 will adjust it into:
7791 (set (reg r0) (symbol_ref (".LC0")))
7792 (set (reg r0) (mem (reg r0))).
7793 No extra register is required, and (mem (reg r0)) won't cause the use
7794 of literal pools. */
7795 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7796 && CONSTANT_POOL_ADDRESS_P (x))
7797 return 1;
7798 return 0;
7802 /* Return nonzero if X is a valid Thumb-2 address operand. */
7803 static int
7804 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7806 bool use_ldrd;
7807 enum rtx_code code = GET_CODE (x);
7809 if (arm_address_register_rtx_p (x, strict_p))
7810 return 1;
7812 use_ldrd = (TARGET_LDRD
7813 && (mode == DImode || mode == DFmode));
7815 if (code == POST_INC || code == PRE_DEC
7816 || ((code == PRE_INC || code == POST_DEC)
7817 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7818 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7820 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7821 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7822 && GET_CODE (XEXP (x, 1)) == PLUS
7823 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7825 /* Thumb-2 only has autoincrement by constant. */
7826 rtx addend = XEXP (XEXP (x, 1), 1);
7827 HOST_WIDE_INT offset;
7829 if (!CONST_INT_P (addend))
7830 return 0;
7832 offset = INTVAL(addend);
7833 if (GET_MODE_SIZE (mode) <= 4)
7834 return (offset > -256 && offset < 256);
7836 return (use_ldrd && offset > -1024 && offset < 1024
7837 && (offset & 3) == 0);
7840 /* After reload constants split into minipools will have addresses
7841 from a LABEL_REF. */
7842 else if (reload_completed
7843 && (code == LABEL_REF
7844 || (code == CONST
7845 && GET_CODE (XEXP (x, 0)) == PLUS
7846 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7847 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7848 return 1;
7850 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7851 return 0;
7853 else if (code == PLUS)
7855 rtx xop0 = XEXP (x, 0);
7856 rtx xop1 = XEXP (x, 1);
7858 return ((arm_address_register_rtx_p (xop0, strict_p)
7859 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7860 || (!strict_p && will_be_in_index_register (xop1))))
7861 || (arm_address_register_rtx_p (xop1, strict_p)
7862 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7865 else if (can_avoid_literal_pool_for_label_p (x))
7866 return 0;
7868 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7869 && code == SYMBOL_REF
7870 && CONSTANT_POOL_ADDRESS_P (x)
7871 && ! (flag_pic
7872 && symbol_mentioned_p (get_pool_constant (x))
7873 && ! pcrel_constant_p (get_pool_constant (x))))
7874 return 1;
7876 return 0;
7879 /* Return nonzero if INDEX is valid for an address index operand in
7880 ARM state. */
7881 static int
7882 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7883 int strict_p)
7885 HOST_WIDE_INT range;
7886 enum rtx_code code = GET_CODE (index);
7888 /* Standard coprocessor addressing modes. */
7889 if (TARGET_HARD_FLOAT
7890 && (mode == SFmode || mode == DFmode))
7891 return (code == CONST_INT && INTVAL (index) < 1024
7892 && INTVAL (index) > -1024
7893 && (INTVAL (index) & 3) == 0);
7895 /* For quad modes, we restrict the constant offset to be slightly less
7896 than what the instruction format permits. We do this because for
7897 quad mode moves, we will actually decompose them into two separate
7898 double-mode reads or writes. INDEX must therefore be a valid
7899 (double-mode) offset and so should INDEX+8. */
7900 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7901 return (code == CONST_INT
7902 && INTVAL (index) < 1016
7903 && INTVAL (index) > -1024
7904 && (INTVAL (index) & 3) == 0);
7906 /* We have no such constraint on double mode offsets, so we permit the
7907 full range of the instruction format. */
7908 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7909 return (code == CONST_INT
7910 && INTVAL (index) < 1024
7911 && INTVAL (index) > -1024
7912 && (INTVAL (index) & 3) == 0);
7914 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7915 return (code == CONST_INT
7916 && INTVAL (index) < 1024
7917 && INTVAL (index) > -1024
7918 && (INTVAL (index) & 3) == 0);
7920 if (arm_address_register_rtx_p (index, strict_p)
7921 && (GET_MODE_SIZE (mode) <= 4))
7922 return 1;
7924 if (mode == DImode || mode == DFmode)
7926 if (code == CONST_INT)
7928 HOST_WIDE_INT val = INTVAL (index);
7930 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7931 If vldr is selected it uses arm_coproc_mem_operand. */
7932 if (TARGET_LDRD)
7933 return val > -256 && val < 256;
7934 else
7935 return val > -4096 && val < 4092;
7938 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7941 if (GET_MODE_SIZE (mode) <= 4
7942 && ! (arm_arch4
7943 && (mode == HImode
7944 || mode == HFmode
7945 || (mode == QImode && outer == SIGN_EXTEND))))
7947 if (code == MULT)
7949 rtx xiop0 = XEXP (index, 0);
7950 rtx xiop1 = XEXP (index, 1);
7952 return ((arm_address_register_rtx_p (xiop0, strict_p)
7953 && power_of_two_operand (xiop1, SImode))
7954 || (arm_address_register_rtx_p (xiop1, strict_p)
7955 && power_of_two_operand (xiop0, SImode)));
7957 else if (code == LSHIFTRT || code == ASHIFTRT
7958 || code == ASHIFT || code == ROTATERT)
7960 rtx op = XEXP (index, 1);
7962 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7963 && CONST_INT_P (op)
7964 && INTVAL (op) > 0
7965 && INTVAL (op) <= 31);
7969 /* For ARM v4 we may be doing a sign-extend operation during the
7970 load. */
7971 if (arm_arch4)
7973 if (mode == HImode
7974 || mode == HFmode
7975 || (outer == SIGN_EXTEND && mode == QImode))
7976 range = 256;
7977 else
7978 range = 4096;
7980 else
7981 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7983 return (code == CONST_INT
7984 && INTVAL (index) < range
7985 && INTVAL (index) > -range);
7988 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7989 index operand. i.e. 1, 2, 4 or 8. */
7990 static bool
7991 thumb2_index_mul_operand (rtx op)
7993 HOST_WIDE_INT val;
7995 if (!CONST_INT_P (op))
7996 return false;
7998 val = INTVAL(op);
7999 return (val == 1 || val == 2 || val == 4 || val == 8);
8002 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8003 static int
8004 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8006 enum rtx_code code = GET_CODE (index);
8008 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8009 /* Standard coprocessor addressing modes. */
8010 if (TARGET_HARD_FLOAT
8011 && (mode == SFmode || mode == DFmode))
8012 return (code == CONST_INT && INTVAL (index) < 1024
8013 /* Thumb-2 allows only > -256 index range for it's core register
8014 load/stores. Since we allow SF/DF in core registers, we have
8015 to use the intersection between -256~4096 (core) and -1024~1024
8016 (coprocessor). */
8017 && INTVAL (index) > -256
8018 && (INTVAL (index) & 3) == 0);
8020 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8022 /* For DImode assume values will usually live in core regs
8023 and only allow LDRD addressing modes. */
8024 if (!TARGET_LDRD || mode != DImode)
8025 return (code == CONST_INT
8026 && INTVAL (index) < 1024
8027 && INTVAL (index) > -1024
8028 && (INTVAL (index) & 3) == 0);
8031 /* For quad modes, we restrict the constant offset to be slightly less
8032 than what the instruction format permits. We do this because for
8033 quad mode moves, we will actually decompose them into two separate
8034 double-mode reads or writes. INDEX must therefore be a valid
8035 (double-mode) offset and so should INDEX+8. */
8036 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8037 return (code == CONST_INT
8038 && INTVAL (index) < 1016
8039 && INTVAL (index) > -1024
8040 && (INTVAL (index) & 3) == 0);
8042 /* We have no such constraint on double mode offsets, so we permit the
8043 full range of the instruction format. */
8044 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8045 return (code == CONST_INT
8046 && INTVAL (index) < 1024
8047 && INTVAL (index) > -1024
8048 && (INTVAL (index) & 3) == 0);
8050 if (arm_address_register_rtx_p (index, strict_p)
8051 && (GET_MODE_SIZE (mode) <= 4))
8052 return 1;
8054 if (mode == DImode || mode == DFmode)
8056 if (code == CONST_INT)
8058 HOST_WIDE_INT val = INTVAL (index);
8059 /* Thumb-2 ldrd only has reg+const addressing modes.
8060 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8061 If vldr is selected it uses arm_coproc_mem_operand. */
8062 if (TARGET_LDRD)
8063 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8064 else
8065 return IN_RANGE (val, -255, 4095 - 4);
8067 else
8068 return 0;
8071 if (code == MULT)
8073 rtx xiop0 = XEXP (index, 0);
8074 rtx xiop1 = XEXP (index, 1);
8076 return ((arm_address_register_rtx_p (xiop0, strict_p)
8077 && thumb2_index_mul_operand (xiop1))
8078 || (arm_address_register_rtx_p (xiop1, strict_p)
8079 && thumb2_index_mul_operand (xiop0)));
8081 else if (code == ASHIFT)
8083 rtx op = XEXP (index, 1);
8085 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8086 && CONST_INT_P (op)
8087 && INTVAL (op) > 0
8088 && INTVAL (op) <= 3);
8091 return (code == CONST_INT
8092 && INTVAL (index) < 4096
8093 && INTVAL (index) > -256);
8096 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8097 static int
8098 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8100 int regno;
8102 if (!REG_P (x))
8103 return 0;
8105 regno = REGNO (x);
8107 if (strict_p)
8108 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8110 return (regno <= LAST_LO_REGNUM
8111 || regno > LAST_VIRTUAL_REGISTER
8112 || regno == FRAME_POINTER_REGNUM
8113 || (GET_MODE_SIZE (mode) >= 4
8114 && (regno == STACK_POINTER_REGNUM
8115 || regno >= FIRST_PSEUDO_REGISTER
8116 || x == hard_frame_pointer_rtx
8117 || x == arg_pointer_rtx)));
8120 /* Return nonzero if x is a legitimate index register. This is the case
8121 for any base register that can access a QImode object. */
8122 inline static int
8123 thumb1_index_register_rtx_p (rtx x, int strict_p)
8125 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8128 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8130 The AP may be eliminated to either the SP or the FP, so we use the
8131 least common denominator, e.g. SImode, and offsets from 0 to 64.
8133 ??? Verify whether the above is the right approach.
8135 ??? Also, the FP may be eliminated to the SP, so perhaps that
8136 needs special handling also.
8138 ??? Look at how the mips16 port solves this problem. It probably uses
8139 better ways to solve some of these problems.
8141 Although it is not incorrect, we don't accept QImode and HImode
8142 addresses based on the frame pointer or arg pointer until the
8143 reload pass starts. This is so that eliminating such addresses
8144 into stack based ones won't produce impossible code. */
8146 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8148 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8149 return 0;
8151 /* ??? Not clear if this is right. Experiment. */
8152 if (GET_MODE_SIZE (mode) < 4
8153 && !(reload_in_progress || reload_completed)
8154 && (reg_mentioned_p (frame_pointer_rtx, x)
8155 || reg_mentioned_p (arg_pointer_rtx, x)
8156 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8157 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8158 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8159 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8160 return 0;
8162 /* Accept any base register. SP only in SImode or larger. */
8163 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8164 return 1;
8166 /* This is PC relative data before arm_reorg runs. */
8167 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8168 && GET_CODE (x) == SYMBOL_REF
8169 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8170 return 1;
8172 /* This is PC relative data after arm_reorg runs. */
8173 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8174 && reload_completed
8175 && (GET_CODE (x) == LABEL_REF
8176 || (GET_CODE (x) == CONST
8177 && GET_CODE (XEXP (x, 0)) == PLUS
8178 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8179 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8180 return 1;
8182 /* Post-inc indexing only supported for SImode and larger. */
8183 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8184 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8185 return 1;
8187 else if (GET_CODE (x) == PLUS)
8189 /* REG+REG address can be any two index registers. */
8190 /* We disallow FRAME+REG addressing since we know that FRAME
8191 will be replaced with STACK, and SP relative addressing only
8192 permits SP+OFFSET. */
8193 if (GET_MODE_SIZE (mode) <= 4
8194 && XEXP (x, 0) != frame_pointer_rtx
8195 && XEXP (x, 1) != frame_pointer_rtx
8196 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8197 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8198 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8199 return 1;
8201 /* REG+const has 5-7 bit offset for non-SP registers. */
8202 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8203 || XEXP (x, 0) == arg_pointer_rtx)
8204 && CONST_INT_P (XEXP (x, 1))
8205 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8206 return 1;
8208 /* REG+const has 10-bit offset for SP, but only SImode and
8209 larger is supported. */
8210 /* ??? Should probably check for DI/DFmode overflow here
8211 just like GO_IF_LEGITIMATE_OFFSET does. */
8212 else if (REG_P (XEXP (x, 0))
8213 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8214 && GET_MODE_SIZE (mode) >= 4
8215 && CONST_INT_P (XEXP (x, 1))
8216 && INTVAL (XEXP (x, 1)) >= 0
8217 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8218 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8219 return 1;
8221 else if (REG_P (XEXP (x, 0))
8222 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8223 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8224 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8225 && REGNO (XEXP (x, 0))
8226 <= LAST_VIRTUAL_POINTER_REGISTER))
8227 && GET_MODE_SIZE (mode) >= 4
8228 && CONST_INT_P (XEXP (x, 1))
8229 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8230 return 1;
8233 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8234 && GET_MODE_SIZE (mode) == 4
8235 && GET_CODE (x) == SYMBOL_REF
8236 && CONSTANT_POOL_ADDRESS_P (x)
8237 && ! (flag_pic
8238 && symbol_mentioned_p (get_pool_constant (x))
8239 && ! pcrel_constant_p (get_pool_constant (x))))
8240 return 1;
8242 return 0;
8245 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8246 instruction of mode MODE. */
8248 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8250 switch (GET_MODE_SIZE (mode))
8252 case 1:
8253 return val >= 0 && val < 32;
8255 case 2:
8256 return val >= 0 && val < 64 && (val & 1) == 0;
8258 default:
8259 return (val >= 0
8260 && (val + GET_MODE_SIZE (mode)) <= 128
8261 && (val & 3) == 0);
8265 bool
8266 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8268 if (TARGET_ARM)
8269 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8270 else if (TARGET_THUMB2)
8271 return thumb2_legitimate_address_p (mode, x, strict_p);
8272 else /* if (TARGET_THUMB1) */
8273 return thumb1_legitimate_address_p (mode, x, strict_p);
8276 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8278 Given an rtx X being reloaded into a reg required to be
8279 in class CLASS, return the class of reg to actually use.
8280 In general this is just CLASS, but for the Thumb core registers and
8281 immediate constants we prefer a LO_REGS class or a subset. */
8283 static reg_class_t
8284 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8286 if (TARGET_32BIT)
8287 return rclass;
8288 else
8290 if (rclass == GENERAL_REGS)
8291 return LO_REGS;
8292 else
8293 return rclass;
8297 /* Build the SYMBOL_REF for __tls_get_addr. */
8299 static GTY(()) rtx tls_get_addr_libfunc;
8301 static rtx
8302 get_tls_get_addr (void)
8304 if (!tls_get_addr_libfunc)
8305 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8306 return tls_get_addr_libfunc;
8310 arm_load_tp (rtx target)
8312 if (!target)
8313 target = gen_reg_rtx (SImode);
8315 if (TARGET_HARD_TP)
8317 /* Can return in any reg. */
8318 emit_insn (gen_load_tp_hard (target));
8320 else
8322 /* Always returned in r0. Immediately copy the result into a pseudo,
8323 otherwise other uses of r0 (e.g. setting up function arguments) may
8324 clobber the value. */
8326 rtx tmp;
8328 emit_insn (gen_load_tp_soft ());
8330 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8331 emit_move_insn (target, tmp);
8333 return target;
8336 static rtx
8337 load_tls_operand (rtx x, rtx reg)
8339 rtx tmp;
8341 if (reg == NULL_RTX)
8342 reg = gen_reg_rtx (SImode);
8344 tmp = gen_rtx_CONST (SImode, x);
8346 emit_move_insn (reg, tmp);
8348 return reg;
8351 static rtx_insn *
8352 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8354 rtx label, labelno, sum;
8356 gcc_assert (reloc != TLS_DESCSEQ);
8357 start_sequence ();
8359 labelno = GEN_INT (pic_labelno++);
8360 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8361 label = gen_rtx_CONST (VOIDmode, label);
8363 sum = gen_rtx_UNSPEC (Pmode,
8364 gen_rtvec (4, x, GEN_INT (reloc), label,
8365 GEN_INT (TARGET_ARM ? 8 : 4)),
8366 UNSPEC_TLS);
8367 reg = load_tls_operand (sum, reg);
8369 if (TARGET_ARM)
8370 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8371 else
8372 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8374 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8375 LCT_PURE, /* LCT_CONST? */
8376 Pmode, reg, Pmode);
8378 rtx_insn *insns = get_insns ();
8379 end_sequence ();
8381 return insns;
8384 static rtx
8385 arm_tls_descseq_addr (rtx x, rtx reg)
8387 rtx labelno = GEN_INT (pic_labelno++);
8388 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8389 rtx sum = gen_rtx_UNSPEC (Pmode,
8390 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8391 gen_rtx_CONST (VOIDmode, label),
8392 GEN_INT (!TARGET_ARM)),
8393 UNSPEC_TLS);
8394 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8396 emit_insn (gen_tlscall (x, labelno));
8397 if (!reg)
8398 reg = gen_reg_rtx (SImode);
8399 else
8400 gcc_assert (REGNO (reg) != R0_REGNUM);
8402 emit_move_insn (reg, reg0);
8404 return reg;
8408 legitimize_tls_address (rtx x, rtx reg)
8410 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8411 rtx_insn *insns;
8412 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8414 switch (model)
8416 case TLS_MODEL_GLOBAL_DYNAMIC:
8417 if (TARGET_GNU2_TLS)
8419 reg = arm_tls_descseq_addr (x, reg);
8421 tp = arm_load_tp (NULL_RTX);
8423 dest = gen_rtx_PLUS (Pmode, tp, reg);
8425 else
8427 /* Original scheme */
8428 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8429 dest = gen_reg_rtx (Pmode);
8430 emit_libcall_block (insns, dest, ret, x);
8432 return dest;
8434 case TLS_MODEL_LOCAL_DYNAMIC:
8435 if (TARGET_GNU2_TLS)
8437 reg = arm_tls_descseq_addr (x, reg);
8439 tp = arm_load_tp (NULL_RTX);
8441 dest = gen_rtx_PLUS (Pmode, tp, reg);
8443 else
8445 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8447 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8448 share the LDM result with other LD model accesses. */
8449 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8450 UNSPEC_TLS);
8451 dest = gen_reg_rtx (Pmode);
8452 emit_libcall_block (insns, dest, ret, eqv);
8454 /* Load the addend. */
8455 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8456 GEN_INT (TLS_LDO32)),
8457 UNSPEC_TLS);
8458 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8459 dest = gen_rtx_PLUS (Pmode, dest, addend);
8461 return dest;
8463 case TLS_MODEL_INITIAL_EXEC:
8464 labelno = GEN_INT (pic_labelno++);
8465 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8466 label = gen_rtx_CONST (VOIDmode, label);
8467 sum = gen_rtx_UNSPEC (Pmode,
8468 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8469 GEN_INT (TARGET_ARM ? 8 : 4)),
8470 UNSPEC_TLS);
8471 reg = load_tls_operand (sum, reg);
8473 if (TARGET_ARM)
8474 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8475 else if (TARGET_THUMB2)
8476 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8477 else
8479 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8480 emit_move_insn (reg, gen_const_mem (SImode, reg));
8483 tp = arm_load_tp (NULL_RTX);
8485 return gen_rtx_PLUS (Pmode, tp, reg);
8487 case TLS_MODEL_LOCAL_EXEC:
8488 tp = arm_load_tp (NULL_RTX);
8490 reg = gen_rtx_UNSPEC (Pmode,
8491 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8492 UNSPEC_TLS);
8493 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8495 return gen_rtx_PLUS (Pmode, tp, reg);
8497 default:
8498 abort ();
8502 /* Try machine-dependent ways of modifying an illegitimate address
8503 to be legitimate. If we find one, return the new, valid address. */
8505 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8507 if (arm_tls_referenced_p (x))
8509 rtx addend = NULL;
8511 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8513 addend = XEXP (XEXP (x, 0), 1);
8514 x = XEXP (XEXP (x, 0), 0);
8517 if (GET_CODE (x) != SYMBOL_REF)
8518 return x;
8520 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8522 x = legitimize_tls_address (x, NULL_RTX);
8524 if (addend)
8526 x = gen_rtx_PLUS (SImode, x, addend);
8527 orig_x = x;
8529 else
8530 return x;
8533 if (!TARGET_ARM)
8535 /* TODO: legitimize_address for Thumb2. */
8536 if (TARGET_THUMB2)
8537 return x;
8538 return thumb_legitimize_address (x, orig_x, mode);
8541 if (GET_CODE (x) == PLUS)
8543 rtx xop0 = XEXP (x, 0);
8544 rtx xop1 = XEXP (x, 1);
8546 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8547 xop0 = force_reg (SImode, xop0);
8549 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8550 && !symbol_mentioned_p (xop1))
8551 xop1 = force_reg (SImode, xop1);
8553 if (ARM_BASE_REGISTER_RTX_P (xop0)
8554 && CONST_INT_P (xop1))
8556 HOST_WIDE_INT n, low_n;
8557 rtx base_reg, val;
8558 n = INTVAL (xop1);
8560 /* VFP addressing modes actually allow greater offsets, but for
8561 now we just stick with the lowest common denominator. */
8562 if (mode == DImode || mode == DFmode)
8564 low_n = n & 0x0f;
8565 n &= ~0x0f;
8566 if (low_n > 4)
8568 n += 16;
8569 low_n -= 16;
8572 else
8574 low_n = ((mode) == TImode ? 0
8575 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8576 n -= low_n;
8579 base_reg = gen_reg_rtx (SImode);
8580 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8581 emit_move_insn (base_reg, val);
8582 x = plus_constant (Pmode, base_reg, low_n);
8584 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8585 x = gen_rtx_PLUS (SImode, xop0, xop1);
8588 /* XXX We don't allow MINUS any more -- see comment in
8589 arm_legitimate_address_outer_p (). */
8590 else if (GET_CODE (x) == MINUS)
8592 rtx xop0 = XEXP (x, 0);
8593 rtx xop1 = XEXP (x, 1);
8595 if (CONSTANT_P (xop0))
8596 xop0 = force_reg (SImode, xop0);
8598 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8599 xop1 = force_reg (SImode, xop1);
8601 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8602 x = gen_rtx_MINUS (SImode, xop0, xop1);
8605 /* Make sure to take full advantage of the pre-indexed addressing mode
8606 with absolute addresses which often allows for the base register to
8607 be factorized for multiple adjacent memory references, and it might
8608 even allows for the mini pool to be avoided entirely. */
8609 else if (CONST_INT_P (x) && optimize > 0)
8611 unsigned int bits;
8612 HOST_WIDE_INT mask, base, index;
8613 rtx base_reg;
8615 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8616 use a 8-bit index. So let's use a 12-bit index for SImode only and
8617 hope that arm_gen_constant will enable ldrb to use more bits. */
8618 bits = (mode == SImode) ? 12 : 8;
8619 mask = (1 << bits) - 1;
8620 base = INTVAL (x) & ~mask;
8621 index = INTVAL (x) & mask;
8622 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8624 /* It'll most probably be more efficient to generate the base
8625 with more bits set and use a negative index instead. */
8626 base |= mask;
8627 index -= mask;
8629 base_reg = force_reg (SImode, GEN_INT (base));
8630 x = plus_constant (Pmode, base_reg, index);
8633 if (flag_pic)
8635 /* We need to find and carefully transform any SYMBOL and LABEL
8636 references; so go back to the original address expression. */
8637 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8639 if (new_x != orig_x)
8640 x = new_x;
8643 return x;
8647 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8648 to be legitimate. If we find one, return the new, valid address. */
8650 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8652 if (GET_CODE (x) == PLUS
8653 && CONST_INT_P (XEXP (x, 1))
8654 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8655 || INTVAL (XEXP (x, 1)) < 0))
8657 rtx xop0 = XEXP (x, 0);
8658 rtx xop1 = XEXP (x, 1);
8659 HOST_WIDE_INT offset = INTVAL (xop1);
8661 /* Try and fold the offset into a biasing of the base register and
8662 then offsetting that. Don't do this when optimizing for space
8663 since it can cause too many CSEs. */
8664 if (optimize_size && offset >= 0
8665 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8667 HOST_WIDE_INT delta;
8669 if (offset >= 256)
8670 delta = offset - (256 - GET_MODE_SIZE (mode));
8671 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8672 delta = 31 * GET_MODE_SIZE (mode);
8673 else
8674 delta = offset & (~31 * GET_MODE_SIZE (mode));
8676 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8677 NULL_RTX);
8678 x = plus_constant (Pmode, xop0, delta);
8680 else if (offset < 0 && offset > -256)
8681 /* Small negative offsets are best done with a subtract before the
8682 dereference, forcing these into a register normally takes two
8683 instructions. */
8684 x = force_operand (x, NULL_RTX);
8685 else
8687 /* For the remaining cases, force the constant into a register. */
8688 xop1 = force_reg (SImode, xop1);
8689 x = gen_rtx_PLUS (SImode, xop0, xop1);
8692 else if (GET_CODE (x) == PLUS
8693 && s_register_operand (XEXP (x, 1), SImode)
8694 && !s_register_operand (XEXP (x, 0), SImode))
8696 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8698 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8701 if (flag_pic)
8703 /* We need to find and carefully transform any SYMBOL and LABEL
8704 references; so go back to the original address expression. */
8705 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8707 if (new_x != orig_x)
8708 x = new_x;
8711 return x;
8714 /* Return TRUE if X contains any TLS symbol references. */
8716 bool
8717 arm_tls_referenced_p (rtx x)
8719 if (! TARGET_HAVE_TLS)
8720 return false;
8722 subrtx_iterator::array_type array;
8723 FOR_EACH_SUBRTX (iter, array, x, ALL)
8725 const_rtx x = *iter;
8726 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8728 /* ARM currently does not provide relocations to encode TLS variables
8729 into AArch32 instructions, only data, so there is no way to
8730 currently implement these if a literal pool is disabled. */
8731 if (arm_disable_literal_pool)
8732 sorry ("accessing thread-local storage is not currently supported "
8733 "with -mpure-code or -mslow-flash-data");
8735 return true;
8738 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8739 TLS offsets, not real symbol references. */
8740 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8741 iter.skip_subrtxes ();
8743 return false;
8746 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8748 On the ARM, allow any integer (invalid ones are removed later by insn
8749 patterns), nice doubles and symbol_refs which refer to the function's
8750 constant pool XXX.
8752 When generating pic allow anything. */
8754 static bool
8755 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8757 return flag_pic || !label_mentioned_p (x);
8760 static bool
8761 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8763 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8764 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8765 for ARMv8-M Baseline or later the result is valid. */
8766 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8767 x = XEXP (x, 0);
8769 return (CONST_INT_P (x)
8770 || CONST_DOUBLE_P (x)
8771 || CONSTANT_ADDRESS_P (x)
8772 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8773 || flag_pic);
8776 static bool
8777 arm_legitimate_constant_p (machine_mode mode, rtx x)
8779 return (!arm_cannot_force_const_mem (mode, x)
8780 && (TARGET_32BIT
8781 ? arm_legitimate_constant_p_1 (mode, x)
8782 : thumb_legitimate_constant_p (mode, x)));
8785 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8787 static bool
8788 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8790 rtx base, offset;
8792 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8794 split_const (x, &base, &offset);
8795 if (GET_CODE (base) == SYMBOL_REF
8796 && !offset_within_block_p (base, INTVAL (offset)))
8797 return true;
8799 return arm_tls_referenced_p (x);
8802 #define REG_OR_SUBREG_REG(X) \
8803 (REG_P (X) \
8804 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8806 #define REG_OR_SUBREG_RTX(X) \
8807 (REG_P (X) ? (X) : SUBREG_REG (X))
8809 static inline int
8810 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8812 machine_mode mode = GET_MODE (x);
8813 int total, words;
8815 switch (code)
8817 case ASHIFT:
8818 case ASHIFTRT:
8819 case LSHIFTRT:
8820 case ROTATERT:
8821 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8823 case PLUS:
8824 case MINUS:
8825 case COMPARE:
8826 case NEG:
8827 case NOT:
8828 return COSTS_N_INSNS (1);
8830 case MULT:
8831 if (arm_arch6m && arm_m_profile_small_mul)
8832 return COSTS_N_INSNS (32);
8834 if (CONST_INT_P (XEXP (x, 1)))
8836 int cycles = 0;
8837 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8839 while (i)
8841 i >>= 2;
8842 cycles++;
8844 return COSTS_N_INSNS (2) + cycles;
8846 return COSTS_N_INSNS (1) + 16;
8848 case SET:
8849 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8850 the mode. */
8851 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8852 return (COSTS_N_INSNS (words)
8853 + 4 * ((MEM_P (SET_SRC (x)))
8854 + MEM_P (SET_DEST (x))));
8856 case CONST_INT:
8857 if (outer == SET)
8859 if (UINTVAL (x) < 256
8860 /* 16-bit constant. */
8861 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8862 return 0;
8863 if (thumb_shiftable_const (INTVAL (x)))
8864 return COSTS_N_INSNS (2);
8865 return COSTS_N_INSNS (3);
8867 else if ((outer == PLUS || outer == COMPARE)
8868 && INTVAL (x) < 256 && INTVAL (x) > -256)
8869 return 0;
8870 else if ((outer == IOR || outer == XOR || outer == AND)
8871 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8872 return COSTS_N_INSNS (1);
8873 else if (outer == AND)
8875 int i;
8876 /* This duplicates the tests in the andsi3 expander. */
8877 for (i = 9; i <= 31; i++)
8878 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8879 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8880 return COSTS_N_INSNS (2);
8882 else if (outer == ASHIFT || outer == ASHIFTRT
8883 || outer == LSHIFTRT)
8884 return 0;
8885 return COSTS_N_INSNS (2);
8887 case CONST:
8888 case CONST_DOUBLE:
8889 case LABEL_REF:
8890 case SYMBOL_REF:
8891 return COSTS_N_INSNS (3);
8893 case UDIV:
8894 case UMOD:
8895 case DIV:
8896 case MOD:
8897 return 100;
8899 case TRUNCATE:
8900 return 99;
8902 case AND:
8903 case XOR:
8904 case IOR:
8905 /* XXX guess. */
8906 return 8;
8908 case MEM:
8909 /* XXX another guess. */
8910 /* Memory costs quite a lot for the first word, but subsequent words
8911 load at the equivalent of a single insn each. */
8912 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8913 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8914 ? 4 : 0));
8916 case IF_THEN_ELSE:
8917 /* XXX a guess. */
8918 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8919 return 14;
8920 return 2;
8922 case SIGN_EXTEND:
8923 case ZERO_EXTEND:
8924 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8925 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8927 if (mode == SImode)
8928 return total;
8930 if (arm_arch6)
8931 return total + COSTS_N_INSNS (1);
8933 /* Assume a two-shift sequence. Increase the cost slightly so
8934 we prefer actual shifts over an extend operation. */
8935 return total + 1 + COSTS_N_INSNS (2);
8937 default:
8938 return 99;
8942 /* Estimates the size cost of thumb1 instructions.
8943 For now most of the code is copied from thumb1_rtx_costs. We need more
8944 fine grain tuning when we have more related test cases. */
8945 static inline int
8946 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8948 machine_mode mode = GET_MODE (x);
8949 int words, cost;
8951 switch (code)
8953 case ASHIFT:
8954 case ASHIFTRT:
8955 case LSHIFTRT:
8956 case ROTATERT:
8957 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8959 case PLUS:
8960 case MINUS:
8961 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8962 defined by RTL expansion, especially for the expansion of
8963 multiplication. */
8964 if ((GET_CODE (XEXP (x, 0)) == MULT
8965 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8966 || (GET_CODE (XEXP (x, 1)) == MULT
8967 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8968 return COSTS_N_INSNS (2);
8969 /* Fall through. */
8970 case COMPARE:
8971 case NEG:
8972 case NOT:
8973 return COSTS_N_INSNS (1);
8975 case MULT:
8976 if (CONST_INT_P (XEXP (x, 1)))
8978 /* Thumb1 mul instruction can't operate on const. We must Load it
8979 into a register first. */
8980 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8981 /* For the targets which have a very small and high-latency multiply
8982 unit, we prefer to synthesize the mult with up to 5 instructions,
8983 giving a good balance between size and performance. */
8984 if (arm_arch6m && arm_m_profile_small_mul)
8985 return COSTS_N_INSNS (5);
8986 else
8987 return COSTS_N_INSNS (1) + const_size;
8989 return COSTS_N_INSNS (1);
8991 case SET:
8992 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8993 the mode. */
8994 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8995 cost = COSTS_N_INSNS (words);
8996 if (satisfies_constraint_J (SET_SRC (x))
8997 || satisfies_constraint_K (SET_SRC (x))
8998 /* Too big an immediate for a 2-byte mov, using MOVT. */
8999 || (CONST_INT_P (SET_SRC (x))
9000 && UINTVAL (SET_SRC (x)) >= 256
9001 && TARGET_HAVE_MOVT
9002 && satisfies_constraint_j (SET_SRC (x)))
9003 /* thumb1_movdi_insn. */
9004 || ((words > 1) && MEM_P (SET_SRC (x))))
9005 cost += COSTS_N_INSNS (1);
9006 return cost;
9008 case CONST_INT:
9009 if (outer == SET)
9011 if (UINTVAL (x) < 256)
9012 return COSTS_N_INSNS (1);
9013 /* movw is 4byte long. */
9014 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9015 return COSTS_N_INSNS (2);
9016 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9017 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9018 return COSTS_N_INSNS (2);
9019 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9020 if (thumb_shiftable_const (INTVAL (x)))
9021 return COSTS_N_INSNS (2);
9022 return COSTS_N_INSNS (3);
9024 else if ((outer == PLUS || outer == COMPARE)
9025 && INTVAL (x) < 256 && INTVAL (x) > -256)
9026 return 0;
9027 else if ((outer == IOR || outer == XOR || outer == AND)
9028 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9029 return COSTS_N_INSNS (1);
9030 else if (outer == AND)
9032 int i;
9033 /* This duplicates the tests in the andsi3 expander. */
9034 for (i = 9; i <= 31; i++)
9035 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9036 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9037 return COSTS_N_INSNS (2);
9039 else if (outer == ASHIFT || outer == ASHIFTRT
9040 || outer == LSHIFTRT)
9041 return 0;
9042 return COSTS_N_INSNS (2);
9044 case CONST:
9045 case CONST_DOUBLE:
9046 case LABEL_REF:
9047 case SYMBOL_REF:
9048 return COSTS_N_INSNS (3);
9050 case UDIV:
9051 case UMOD:
9052 case DIV:
9053 case MOD:
9054 return 100;
9056 case TRUNCATE:
9057 return 99;
9059 case AND:
9060 case XOR:
9061 case IOR:
9062 return COSTS_N_INSNS (1);
9064 case MEM:
9065 return (COSTS_N_INSNS (1)
9066 + COSTS_N_INSNS (1)
9067 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9068 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9069 ? COSTS_N_INSNS (1) : 0));
9071 case IF_THEN_ELSE:
9072 /* XXX a guess. */
9073 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9074 return 14;
9075 return 2;
9077 case ZERO_EXTEND:
9078 /* XXX still guessing. */
9079 switch (GET_MODE (XEXP (x, 0)))
9081 case E_QImode:
9082 return (1 + (mode == DImode ? 4 : 0)
9083 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085 case E_HImode:
9086 return (4 + (mode == DImode ? 4 : 0)
9087 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9089 case E_SImode:
9090 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9092 default:
9093 return 99;
9096 default:
9097 return 99;
9101 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9102 operand, then return the operand that is being shifted. If the shift
9103 is not by a constant, then set SHIFT_REG to point to the operand.
9104 Return NULL if OP is not a shifter operand. */
9105 static rtx
9106 shifter_op_p (rtx op, rtx *shift_reg)
9108 enum rtx_code code = GET_CODE (op);
9110 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9111 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9112 return XEXP (op, 0);
9113 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9114 return XEXP (op, 0);
9115 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9116 || code == ASHIFTRT)
9118 if (!CONST_INT_P (XEXP (op, 1)))
9119 *shift_reg = XEXP (op, 1);
9120 return XEXP (op, 0);
9123 return NULL;
9126 static bool
9127 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9129 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9130 rtx_code code = GET_CODE (x);
9131 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9133 switch (XINT (x, 1))
9135 case UNSPEC_UNALIGNED_LOAD:
9136 /* We can only do unaligned loads into the integer unit, and we can't
9137 use LDM or LDRD. */
9138 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9139 if (speed_p)
9140 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9141 + extra_cost->ldst.load_unaligned);
9143 #ifdef NOT_YET
9144 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9145 ADDR_SPACE_GENERIC, speed_p);
9146 #endif
9147 return true;
9149 case UNSPEC_UNALIGNED_STORE:
9150 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9151 if (speed_p)
9152 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9153 + extra_cost->ldst.store_unaligned);
9155 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9156 #ifdef NOT_YET
9157 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9158 ADDR_SPACE_GENERIC, speed_p);
9159 #endif
9160 return true;
9162 case UNSPEC_VRINTZ:
9163 case UNSPEC_VRINTP:
9164 case UNSPEC_VRINTM:
9165 case UNSPEC_VRINTR:
9166 case UNSPEC_VRINTX:
9167 case UNSPEC_VRINTA:
9168 if (speed_p)
9169 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9171 return true;
9172 default:
9173 *cost = COSTS_N_INSNS (2);
9174 break;
9176 return true;
9179 /* Cost of a libcall. We assume one insn per argument, an amount for the
9180 call (one insn for -Os) and then one for processing the result. */
9181 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9183 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9184 do \
9186 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9187 if (shift_op != NULL \
9188 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9190 if (shift_reg) \
9192 if (speed_p) \
9193 *cost += extra_cost->alu.arith_shift_reg; \
9194 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9195 ASHIFT, 1, speed_p); \
9197 else if (speed_p) \
9198 *cost += extra_cost->alu.arith_shift; \
9200 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9201 ASHIFT, 0, speed_p) \
9202 + rtx_cost (XEXP (x, 1 - IDX), \
9203 GET_MODE (shift_op), \
9204 OP, 1, speed_p)); \
9205 return true; \
9208 while (0);
9210 /* RTX costs. Make an estimate of the cost of executing the operation
9211 X, which is contained with an operation with code OUTER_CODE.
9212 SPEED_P indicates whether the cost desired is the performance cost,
9213 or the size cost. The estimate is stored in COST and the return
9214 value is TRUE if the cost calculation is final, or FALSE if the
9215 caller should recurse through the operands of X to add additional
9216 costs.
9218 We currently make no attempt to model the size savings of Thumb-2
9219 16-bit instructions. At the normal points in compilation where
9220 this code is called we have no measure of whether the condition
9221 flags are live or not, and thus no realistic way to determine what
9222 the size will eventually be. */
9223 static bool
9224 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9225 const struct cpu_cost_table *extra_cost,
9226 int *cost, bool speed_p)
9228 machine_mode mode = GET_MODE (x);
9230 *cost = COSTS_N_INSNS (1);
9232 if (TARGET_THUMB1)
9234 if (speed_p)
9235 *cost = thumb1_rtx_costs (x, code, outer_code);
9236 else
9237 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9238 return true;
9241 switch (code)
9243 case SET:
9244 *cost = 0;
9245 /* SET RTXs don't have a mode so we get it from the destination. */
9246 mode = GET_MODE (SET_DEST (x));
9248 if (REG_P (SET_SRC (x))
9249 && REG_P (SET_DEST (x)))
9251 /* Assume that most copies can be done with a single insn,
9252 unless we don't have HW FP, in which case everything
9253 larger than word mode will require two insns. */
9254 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9255 && GET_MODE_SIZE (mode) > 4)
9256 || mode == DImode)
9257 ? 2 : 1);
9258 /* Conditional register moves can be encoded
9259 in 16 bits in Thumb mode. */
9260 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9261 *cost >>= 1;
9263 return true;
9266 if (CONST_INT_P (SET_SRC (x)))
9268 /* Handle CONST_INT here, since the value doesn't have a mode
9269 and we would otherwise be unable to work out the true cost. */
9270 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9271 0, speed_p);
9272 outer_code = SET;
9273 /* Slightly lower the cost of setting a core reg to a constant.
9274 This helps break up chains and allows for better scheduling. */
9275 if (REG_P (SET_DEST (x))
9276 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9277 *cost -= 1;
9278 x = SET_SRC (x);
9279 /* Immediate moves with an immediate in the range [0, 255] can be
9280 encoded in 16 bits in Thumb mode. */
9281 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9282 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9283 *cost >>= 1;
9284 goto const_int_cost;
9287 return false;
9289 case MEM:
9290 /* A memory access costs 1 insn if the mode is small, or the address is
9291 a single register, otherwise it costs one insn per word. */
9292 if (REG_P (XEXP (x, 0)))
9293 *cost = COSTS_N_INSNS (1);
9294 else if (flag_pic
9295 && GET_CODE (XEXP (x, 0)) == PLUS
9296 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9297 /* This will be split into two instructions.
9298 See arm.md:calculate_pic_address. */
9299 *cost = COSTS_N_INSNS (2);
9300 else
9301 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9303 /* For speed optimizations, add the costs of the address and
9304 accessing memory. */
9305 if (speed_p)
9306 #ifdef NOT_YET
9307 *cost += (extra_cost->ldst.load
9308 + arm_address_cost (XEXP (x, 0), mode,
9309 ADDR_SPACE_GENERIC, speed_p));
9310 #else
9311 *cost += extra_cost->ldst.load;
9312 #endif
9313 return true;
9315 case PARALLEL:
9317 /* Calculations of LDM costs are complex. We assume an initial cost
9318 (ldm_1st) which will load the number of registers mentioned in
9319 ldm_regs_per_insn_1st registers; then each additional
9320 ldm_regs_per_insn_subsequent registers cost one more insn. The
9321 formula for N regs is thus:
9323 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9324 + ldm_regs_per_insn_subsequent - 1)
9325 / ldm_regs_per_insn_subsequent).
9327 Additional costs may also be added for addressing. A similar
9328 formula is used for STM. */
9330 bool is_ldm = load_multiple_operation (x, SImode);
9331 bool is_stm = store_multiple_operation (x, SImode);
9333 if (is_ldm || is_stm)
9335 if (speed_p)
9337 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9338 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9339 ? extra_cost->ldst.ldm_regs_per_insn_1st
9340 : extra_cost->ldst.stm_regs_per_insn_1st;
9341 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9342 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9343 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9345 *cost += regs_per_insn_1st
9346 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9347 + regs_per_insn_sub - 1)
9348 / regs_per_insn_sub);
9349 return true;
9353 return false;
9355 case DIV:
9356 case UDIV:
9357 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9358 && (mode == SFmode || !TARGET_VFP_SINGLE))
9359 *cost += COSTS_N_INSNS (speed_p
9360 ? extra_cost->fp[mode != SFmode].div : 0);
9361 else if (mode == SImode && TARGET_IDIV)
9362 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9363 else
9364 *cost = LIBCALL_COST (2);
9366 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9367 possible udiv is prefered. */
9368 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9369 return false; /* All arguments must be in registers. */
9371 case MOD:
9372 /* MOD by a power of 2 can be expanded as:
9373 rsbs r1, r0, #0
9374 and r0, r0, #(n - 1)
9375 and r1, r1, #(n - 1)
9376 rsbpl r0, r1, #0. */
9377 if (CONST_INT_P (XEXP (x, 1))
9378 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9379 && mode == SImode)
9381 *cost += COSTS_N_INSNS (3);
9383 if (speed_p)
9384 *cost += 2 * extra_cost->alu.logical
9385 + extra_cost->alu.arith;
9386 return true;
9389 /* Fall-through. */
9390 case UMOD:
9391 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9392 possible udiv is prefered. */
9393 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9394 return false; /* All arguments must be in registers. */
9396 case ROTATE:
9397 if (mode == SImode && REG_P (XEXP (x, 1)))
9399 *cost += (COSTS_N_INSNS (1)
9400 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9401 if (speed_p)
9402 *cost += extra_cost->alu.shift_reg;
9403 return true;
9405 /* Fall through */
9406 case ROTATERT:
9407 case ASHIFT:
9408 case LSHIFTRT:
9409 case ASHIFTRT:
9410 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9412 *cost += (COSTS_N_INSNS (2)
9413 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9414 if (speed_p)
9415 *cost += 2 * extra_cost->alu.shift;
9416 return true;
9418 else if (mode == SImode)
9420 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9421 /* Slightly disparage register shifts at -Os, but not by much. */
9422 if (!CONST_INT_P (XEXP (x, 1)))
9423 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9424 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9425 return true;
9427 else if (GET_MODE_CLASS (mode) == MODE_INT
9428 && GET_MODE_SIZE (mode) < 4)
9430 if (code == ASHIFT)
9432 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9433 /* Slightly disparage register shifts at -Os, but not by
9434 much. */
9435 if (!CONST_INT_P (XEXP (x, 1)))
9436 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9437 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9439 else if (code == LSHIFTRT || code == ASHIFTRT)
9441 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9443 /* Can use SBFX/UBFX. */
9444 if (speed_p)
9445 *cost += extra_cost->alu.bfx;
9446 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9448 else
9450 *cost += COSTS_N_INSNS (1);
9451 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9452 if (speed_p)
9454 if (CONST_INT_P (XEXP (x, 1)))
9455 *cost += 2 * extra_cost->alu.shift;
9456 else
9457 *cost += (extra_cost->alu.shift
9458 + extra_cost->alu.shift_reg);
9460 else
9461 /* Slightly disparage register shifts. */
9462 *cost += !CONST_INT_P (XEXP (x, 1));
9465 else /* Rotates. */
9467 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9468 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9469 if (speed_p)
9471 if (CONST_INT_P (XEXP (x, 1)))
9472 *cost += (2 * extra_cost->alu.shift
9473 + extra_cost->alu.log_shift);
9474 else
9475 *cost += (extra_cost->alu.shift
9476 + extra_cost->alu.shift_reg
9477 + extra_cost->alu.log_shift_reg);
9480 return true;
9483 *cost = LIBCALL_COST (2);
9484 return false;
9486 case BSWAP:
9487 if (arm_arch6)
9489 if (mode == SImode)
9491 if (speed_p)
9492 *cost += extra_cost->alu.rev;
9494 return false;
9497 else
9499 /* No rev instruction available. Look at arm_legacy_rev
9500 and thumb_legacy_rev for the form of RTL used then. */
9501 if (TARGET_THUMB)
9503 *cost += COSTS_N_INSNS (9);
9505 if (speed_p)
9507 *cost += 6 * extra_cost->alu.shift;
9508 *cost += 3 * extra_cost->alu.logical;
9511 else
9513 *cost += COSTS_N_INSNS (4);
9515 if (speed_p)
9517 *cost += 2 * extra_cost->alu.shift;
9518 *cost += extra_cost->alu.arith_shift;
9519 *cost += 2 * extra_cost->alu.logical;
9522 return true;
9524 return false;
9526 case MINUS:
9527 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9528 && (mode == SFmode || !TARGET_VFP_SINGLE))
9530 if (GET_CODE (XEXP (x, 0)) == MULT
9531 || GET_CODE (XEXP (x, 1)) == MULT)
9533 rtx mul_op0, mul_op1, sub_op;
9535 if (speed_p)
9536 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9538 if (GET_CODE (XEXP (x, 0)) == MULT)
9540 mul_op0 = XEXP (XEXP (x, 0), 0);
9541 mul_op1 = XEXP (XEXP (x, 0), 1);
9542 sub_op = XEXP (x, 1);
9544 else
9546 mul_op0 = XEXP (XEXP (x, 1), 0);
9547 mul_op1 = XEXP (XEXP (x, 1), 1);
9548 sub_op = XEXP (x, 0);
9551 /* The first operand of the multiply may be optionally
9552 negated. */
9553 if (GET_CODE (mul_op0) == NEG)
9554 mul_op0 = XEXP (mul_op0, 0);
9556 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9557 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9558 + rtx_cost (sub_op, mode, code, 0, speed_p));
9560 return true;
9563 if (speed_p)
9564 *cost += extra_cost->fp[mode != SFmode].addsub;
9565 return false;
9568 if (mode == SImode)
9570 rtx shift_by_reg = NULL;
9571 rtx shift_op;
9572 rtx non_shift_op;
9574 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9575 if (shift_op == NULL)
9577 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9578 non_shift_op = XEXP (x, 0);
9580 else
9581 non_shift_op = XEXP (x, 1);
9583 if (shift_op != NULL)
9585 if (shift_by_reg != NULL)
9587 if (speed_p)
9588 *cost += extra_cost->alu.arith_shift_reg;
9589 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9591 else if (speed_p)
9592 *cost += extra_cost->alu.arith_shift;
9594 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9595 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9596 return true;
9599 if (arm_arch_thumb2
9600 && GET_CODE (XEXP (x, 1)) == MULT)
9602 /* MLS. */
9603 if (speed_p)
9604 *cost += extra_cost->mult[0].add;
9605 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9606 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9607 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9608 return true;
9611 if (CONST_INT_P (XEXP (x, 0)))
9613 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9614 INTVAL (XEXP (x, 0)), NULL_RTX,
9615 NULL_RTX, 1, 0);
9616 *cost = COSTS_N_INSNS (insns);
9617 if (speed_p)
9618 *cost += insns * extra_cost->alu.arith;
9619 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9620 return true;
9622 else if (speed_p)
9623 *cost += extra_cost->alu.arith;
9625 return false;
9628 if (GET_MODE_CLASS (mode) == MODE_INT
9629 && GET_MODE_SIZE (mode) < 4)
9631 rtx shift_op, shift_reg;
9632 shift_reg = NULL;
9634 /* We check both sides of the MINUS for shifter operands since,
9635 unlike PLUS, it's not commutative. */
9637 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9638 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9640 /* Slightly disparage, as we might need to widen the result. */
9641 *cost += 1;
9642 if (speed_p)
9643 *cost += extra_cost->alu.arith;
9645 if (CONST_INT_P (XEXP (x, 0)))
9647 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9648 return true;
9651 return false;
9654 if (mode == DImode)
9656 *cost += COSTS_N_INSNS (1);
9658 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9660 rtx op1 = XEXP (x, 1);
9662 if (speed_p)
9663 *cost += 2 * extra_cost->alu.arith;
9665 if (GET_CODE (op1) == ZERO_EXTEND)
9666 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9667 0, speed_p);
9668 else
9669 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9670 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9671 0, speed_p);
9672 return true;
9674 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9676 if (speed_p)
9677 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9678 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9679 0, speed_p)
9680 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9681 return true;
9683 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9684 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9686 if (speed_p)
9687 *cost += (extra_cost->alu.arith
9688 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9689 ? extra_cost->alu.arith
9690 : extra_cost->alu.arith_shift));
9691 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9692 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9693 GET_CODE (XEXP (x, 1)), 0, speed_p));
9694 return true;
9697 if (speed_p)
9698 *cost += 2 * extra_cost->alu.arith;
9699 return false;
9702 /* Vector mode? */
9704 *cost = LIBCALL_COST (2);
9705 return false;
9707 case PLUS:
9708 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9709 && (mode == SFmode || !TARGET_VFP_SINGLE))
9711 if (GET_CODE (XEXP (x, 0)) == MULT)
9713 rtx mul_op0, mul_op1, add_op;
9715 if (speed_p)
9716 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9718 mul_op0 = XEXP (XEXP (x, 0), 0);
9719 mul_op1 = XEXP (XEXP (x, 0), 1);
9720 add_op = XEXP (x, 1);
9722 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9723 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9724 + rtx_cost (add_op, mode, code, 0, speed_p));
9726 return true;
9729 if (speed_p)
9730 *cost += extra_cost->fp[mode != SFmode].addsub;
9731 return false;
9733 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9735 *cost = LIBCALL_COST (2);
9736 return false;
9739 /* Narrow modes can be synthesized in SImode, but the range
9740 of useful sub-operations is limited. Check for shift operations
9741 on one of the operands. Only left shifts can be used in the
9742 narrow modes. */
9743 if (GET_MODE_CLASS (mode) == MODE_INT
9744 && GET_MODE_SIZE (mode) < 4)
9746 rtx shift_op, shift_reg;
9747 shift_reg = NULL;
9749 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9751 if (CONST_INT_P (XEXP (x, 1)))
9753 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9754 INTVAL (XEXP (x, 1)), NULL_RTX,
9755 NULL_RTX, 1, 0);
9756 *cost = COSTS_N_INSNS (insns);
9757 if (speed_p)
9758 *cost += insns * extra_cost->alu.arith;
9759 /* Slightly penalize a narrow operation as the result may
9760 need widening. */
9761 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9762 return true;
9765 /* Slightly penalize a narrow operation as the result may
9766 need widening. */
9767 *cost += 1;
9768 if (speed_p)
9769 *cost += extra_cost->alu.arith;
9771 return false;
9774 if (mode == SImode)
9776 rtx shift_op, shift_reg;
9778 if (TARGET_INT_SIMD
9779 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9780 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9782 /* UXTA[BH] or SXTA[BH]. */
9783 if (speed_p)
9784 *cost += extra_cost->alu.extend_arith;
9785 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9786 0, speed_p)
9787 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9788 return true;
9791 shift_reg = NULL;
9792 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9793 if (shift_op != NULL)
9795 if (shift_reg)
9797 if (speed_p)
9798 *cost += extra_cost->alu.arith_shift_reg;
9799 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9801 else if (speed_p)
9802 *cost += extra_cost->alu.arith_shift;
9804 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9805 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9806 return true;
9808 if (GET_CODE (XEXP (x, 0)) == MULT)
9810 rtx mul_op = XEXP (x, 0);
9812 if (TARGET_DSP_MULTIPLY
9813 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9814 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9815 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9817 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9818 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9819 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9820 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9821 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9822 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9823 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9824 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9825 == 16))))))
9827 /* SMLA[BT][BT]. */
9828 if (speed_p)
9829 *cost += extra_cost->mult[0].extend_add;
9830 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9831 SIGN_EXTEND, 0, speed_p)
9832 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9833 SIGN_EXTEND, 0, speed_p)
9834 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9835 return true;
9838 if (speed_p)
9839 *cost += extra_cost->mult[0].add;
9840 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9841 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9842 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9843 return true;
9845 if (CONST_INT_P (XEXP (x, 1)))
9847 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9848 INTVAL (XEXP (x, 1)), NULL_RTX,
9849 NULL_RTX, 1, 0);
9850 *cost = COSTS_N_INSNS (insns);
9851 if (speed_p)
9852 *cost += insns * extra_cost->alu.arith;
9853 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9854 return true;
9856 else if (speed_p)
9857 *cost += extra_cost->alu.arith;
9859 return false;
9862 if (mode == DImode)
9864 if (arm_arch3m
9865 && GET_CODE (XEXP (x, 0)) == MULT
9866 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9867 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9868 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9869 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9871 if (speed_p)
9872 *cost += extra_cost->mult[1].extend_add;
9873 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9874 ZERO_EXTEND, 0, speed_p)
9875 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9876 ZERO_EXTEND, 0, speed_p)
9877 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9878 return true;
9881 *cost += COSTS_N_INSNS (1);
9883 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9884 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9886 if (speed_p)
9887 *cost += (extra_cost->alu.arith
9888 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9889 ? extra_cost->alu.arith
9890 : extra_cost->alu.arith_shift));
9892 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9893 0, speed_p)
9894 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9895 return true;
9898 if (speed_p)
9899 *cost += 2 * extra_cost->alu.arith;
9900 return false;
9903 /* Vector mode? */
9904 *cost = LIBCALL_COST (2);
9905 return false;
9906 case IOR:
9907 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9909 if (speed_p)
9910 *cost += extra_cost->alu.rev;
9912 return true;
9914 /* Fall through. */
9915 case AND: case XOR:
9916 if (mode == SImode)
9918 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9919 rtx op0 = XEXP (x, 0);
9920 rtx shift_op, shift_reg;
9922 if (subcode == NOT
9923 && (code == AND
9924 || (code == IOR && TARGET_THUMB2)))
9925 op0 = XEXP (op0, 0);
9927 shift_reg = NULL;
9928 shift_op = shifter_op_p (op0, &shift_reg);
9929 if (shift_op != NULL)
9931 if (shift_reg)
9933 if (speed_p)
9934 *cost += extra_cost->alu.log_shift_reg;
9935 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9937 else if (speed_p)
9938 *cost += extra_cost->alu.log_shift;
9940 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9941 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9942 return true;
9945 if (CONST_INT_P (XEXP (x, 1)))
9947 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9948 INTVAL (XEXP (x, 1)), NULL_RTX,
9949 NULL_RTX, 1, 0);
9951 *cost = COSTS_N_INSNS (insns);
9952 if (speed_p)
9953 *cost += insns * extra_cost->alu.logical;
9954 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9955 return true;
9958 if (speed_p)
9959 *cost += extra_cost->alu.logical;
9960 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9961 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9962 return true;
9965 if (mode == DImode)
9967 rtx op0 = XEXP (x, 0);
9968 enum rtx_code subcode = GET_CODE (op0);
9970 *cost += COSTS_N_INSNS (1);
9972 if (subcode == NOT
9973 && (code == AND
9974 || (code == IOR && TARGET_THUMB2)))
9975 op0 = XEXP (op0, 0);
9977 if (GET_CODE (op0) == ZERO_EXTEND)
9979 if (speed_p)
9980 *cost += 2 * extra_cost->alu.logical;
9982 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9983 0, speed_p)
9984 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9985 return true;
9987 else if (GET_CODE (op0) == SIGN_EXTEND)
9989 if (speed_p)
9990 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9992 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9993 0, speed_p)
9994 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9995 return true;
9998 if (speed_p)
9999 *cost += 2 * extra_cost->alu.logical;
10001 return true;
10003 /* Vector mode? */
10005 *cost = LIBCALL_COST (2);
10006 return false;
10008 case MULT:
10009 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10010 && (mode == SFmode || !TARGET_VFP_SINGLE))
10012 rtx op0 = XEXP (x, 0);
10014 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10015 op0 = XEXP (op0, 0);
10017 if (speed_p)
10018 *cost += extra_cost->fp[mode != SFmode].mult;
10020 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10022 return true;
10024 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10026 *cost = LIBCALL_COST (2);
10027 return false;
10030 if (mode == SImode)
10032 if (TARGET_DSP_MULTIPLY
10033 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10034 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10035 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10036 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10037 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10038 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10039 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10040 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10041 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10042 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10043 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10044 && (INTVAL (XEXP (XEXP (x, 1), 1))
10045 == 16))))))
10047 /* SMUL[TB][TB]. */
10048 if (speed_p)
10049 *cost += extra_cost->mult[0].extend;
10050 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10051 SIGN_EXTEND, 0, speed_p);
10052 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10053 SIGN_EXTEND, 1, speed_p);
10054 return true;
10056 if (speed_p)
10057 *cost += extra_cost->mult[0].simple;
10058 return false;
10061 if (mode == DImode)
10063 if (arm_arch3m
10064 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10065 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10066 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10067 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10069 if (speed_p)
10070 *cost += extra_cost->mult[1].extend;
10071 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10072 ZERO_EXTEND, 0, speed_p)
10073 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10074 ZERO_EXTEND, 0, speed_p));
10075 return true;
10078 *cost = LIBCALL_COST (2);
10079 return false;
10082 /* Vector mode? */
10083 *cost = LIBCALL_COST (2);
10084 return false;
10086 case NEG:
10087 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10088 && (mode == SFmode || !TARGET_VFP_SINGLE))
10090 if (GET_CODE (XEXP (x, 0)) == MULT)
10092 /* VNMUL. */
10093 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10094 return true;
10097 if (speed_p)
10098 *cost += extra_cost->fp[mode != SFmode].neg;
10100 return false;
10102 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10104 *cost = LIBCALL_COST (1);
10105 return false;
10108 if (mode == SImode)
10110 if (GET_CODE (XEXP (x, 0)) == ABS)
10112 *cost += COSTS_N_INSNS (1);
10113 /* Assume the non-flag-changing variant. */
10114 if (speed_p)
10115 *cost += (extra_cost->alu.log_shift
10116 + extra_cost->alu.arith_shift);
10117 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10118 return true;
10121 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10122 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10124 *cost += COSTS_N_INSNS (1);
10125 /* No extra cost for MOV imm and MVN imm. */
10126 /* If the comparison op is using the flags, there's no further
10127 cost, otherwise we need to add the cost of the comparison. */
10128 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10129 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10130 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10132 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10133 *cost += (COSTS_N_INSNS (1)
10134 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10135 0, speed_p)
10136 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10137 1, speed_p));
10138 if (speed_p)
10139 *cost += extra_cost->alu.arith;
10141 return true;
10144 if (speed_p)
10145 *cost += extra_cost->alu.arith;
10146 return false;
10149 if (GET_MODE_CLASS (mode) == MODE_INT
10150 && GET_MODE_SIZE (mode) < 4)
10152 /* Slightly disparage, as we might need an extend operation. */
10153 *cost += 1;
10154 if (speed_p)
10155 *cost += extra_cost->alu.arith;
10156 return false;
10159 if (mode == DImode)
10161 *cost += COSTS_N_INSNS (1);
10162 if (speed_p)
10163 *cost += 2 * extra_cost->alu.arith;
10164 return false;
10167 /* Vector mode? */
10168 *cost = LIBCALL_COST (1);
10169 return false;
10171 case NOT:
10172 if (mode == SImode)
10174 rtx shift_op;
10175 rtx shift_reg = NULL;
10177 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10179 if (shift_op)
10181 if (shift_reg != NULL)
10183 if (speed_p)
10184 *cost += extra_cost->alu.log_shift_reg;
10185 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10187 else if (speed_p)
10188 *cost += extra_cost->alu.log_shift;
10189 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10190 return true;
10193 if (speed_p)
10194 *cost += extra_cost->alu.logical;
10195 return false;
10197 if (mode == DImode)
10199 *cost += COSTS_N_INSNS (1);
10200 return false;
10203 /* Vector mode? */
10205 *cost += LIBCALL_COST (1);
10206 return false;
10208 case IF_THEN_ELSE:
10210 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10212 *cost += COSTS_N_INSNS (3);
10213 return true;
10215 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10216 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10218 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10219 /* Assume that if one arm of the if_then_else is a register,
10220 that it will be tied with the result and eliminate the
10221 conditional insn. */
10222 if (REG_P (XEXP (x, 1)))
10223 *cost += op2cost;
10224 else if (REG_P (XEXP (x, 2)))
10225 *cost += op1cost;
10226 else
10228 if (speed_p)
10230 if (extra_cost->alu.non_exec_costs_exec)
10231 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10232 else
10233 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10235 else
10236 *cost += op1cost + op2cost;
10239 return true;
10241 case COMPARE:
10242 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10243 *cost = 0;
10244 else
10246 machine_mode op0mode;
10247 /* We'll mostly assume that the cost of a compare is the cost of the
10248 LHS. However, there are some notable exceptions. */
10250 /* Floating point compares are never done as side-effects. */
10251 op0mode = GET_MODE (XEXP (x, 0));
10252 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10253 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10255 if (speed_p)
10256 *cost += extra_cost->fp[op0mode != SFmode].compare;
10258 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10260 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10261 return true;
10264 return false;
10266 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10268 *cost = LIBCALL_COST (2);
10269 return false;
10272 /* DImode compares normally take two insns. */
10273 if (op0mode == DImode)
10275 *cost += COSTS_N_INSNS (1);
10276 if (speed_p)
10277 *cost += 2 * extra_cost->alu.arith;
10278 return false;
10281 if (op0mode == SImode)
10283 rtx shift_op;
10284 rtx shift_reg;
10286 if (XEXP (x, 1) == const0_rtx
10287 && !(REG_P (XEXP (x, 0))
10288 || (GET_CODE (XEXP (x, 0)) == SUBREG
10289 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10291 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10293 /* Multiply operations that set the flags are often
10294 significantly more expensive. */
10295 if (speed_p
10296 && GET_CODE (XEXP (x, 0)) == MULT
10297 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10298 *cost += extra_cost->mult[0].flag_setting;
10300 if (speed_p
10301 && GET_CODE (XEXP (x, 0)) == PLUS
10302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10303 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10304 0), 1), mode))
10305 *cost += extra_cost->mult[0].flag_setting;
10306 return true;
10309 shift_reg = NULL;
10310 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10311 if (shift_op != NULL)
10313 if (shift_reg != NULL)
10315 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10316 1, speed_p);
10317 if (speed_p)
10318 *cost += extra_cost->alu.arith_shift_reg;
10320 else if (speed_p)
10321 *cost += extra_cost->alu.arith_shift;
10322 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10323 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10324 return true;
10327 if (speed_p)
10328 *cost += extra_cost->alu.arith;
10329 if (CONST_INT_P (XEXP (x, 1))
10330 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10332 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10333 return true;
10335 return false;
10338 /* Vector mode? */
10340 *cost = LIBCALL_COST (2);
10341 return false;
10343 return true;
10345 case EQ:
10346 case NE:
10347 case LT:
10348 case LE:
10349 case GT:
10350 case GE:
10351 case LTU:
10352 case LEU:
10353 case GEU:
10354 case GTU:
10355 case ORDERED:
10356 case UNORDERED:
10357 case UNEQ:
10358 case UNLE:
10359 case UNLT:
10360 case UNGE:
10361 case UNGT:
10362 case LTGT:
10363 if (outer_code == SET)
10365 /* Is it a store-flag operation? */
10366 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10367 && XEXP (x, 1) == const0_rtx)
10369 /* Thumb also needs an IT insn. */
10370 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10371 return true;
10373 if (XEXP (x, 1) == const0_rtx)
10375 switch (code)
10377 case LT:
10378 /* LSR Rd, Rn, #31. */
10379 if (speed_p)
10380 *cost += extra_cost->alu.shift;
10381 break;
10383 case EQ:
10384 /* RSBS T1, Rn, #0
10385 ADC Rd, Rn, T1. */
10387 case NE:
10388 /* SUBS T1, Rn, #1
10389 SBC Rd, Rn, T1. */
10390 *cost += COSTS_N_INSNS (1);
10391 break;
10393 case LE:
10394 /* RSBS T1, Rn, Rn, LSR #31
10395 ADC Rd, Rn, T1. */
10396 *cost += COSTS_N_INSNS (1);
10397 if (speed_p)
10398 *cost += extra_cost->alu.arith_shift;
10399 break;
10401 case GT:
10402 /* RSB Rd, Rn, Rn, ASR #1
10403 LSR Rd, Rd, #31. */
10404 *cost += COSTS_N_INSNS (1);
10405 if (speed_p)
10406 *cost += (extra_cost->alu.arith_shift
10407 + extra_cost->alu.shift);
10408 break;
10410 case GE:
10411 /* ASR Rd, Rn, #31
10412 ADD Rd, Rn, #1. */
10413 *cost += COSTS_N_INSNS (1);
10414 if (speed_p)
10415 *cost += extra_cost->alu.shift;
10416 break;
10418 default:
10419 /* Remaining cases are either meaningless or would take
10420 three insns anyway. */
10421 *cost = COSTS_N_INSNS (3);
10422 break;
10424 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10425 return true;
10427 else
10429 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10430 if (CONST_INT_P (XEXP (x, 1))
10431 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10433 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10434 return true;
10437 return false;
10440 /* Not directly inside a set. If it involves the condition code
10441 register it must be the condition for a branch, cond_exec or
10442 I_T_E operation. Since the comparison is performed elsewhere
10443 this is just the control part which has no additional
10444 cost. */
10445 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10446 && XEXP (x, 1) == const0_rtx)
10448 *cost = 0;
10449 return true;
10451 return false;
10453 case ABS:
10454 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10455 && (mode == SFmode || !TARGET_VFP_SINGLE))
10457 if (speed_p)
10458 *cost += extra_cost->fp[mode != SFmode].neg;
10460 return false;
10462 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10464 *cost = LIBCALL_COST (1);
10465 return false;
10468 if (mode == SImode)
10470 if (speed_p)
10471 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10472 return false;
10474 /* Vector mode? */
10475 *cost = LIBCALL_COST (1);
10476 return false;
10478 case SIGN_EXTEND:
10479 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10480 && MEM_P (XEXP (x, 0)))
10482 if (mode == DImode)
10483 *cost += COSTS_N_INSNS (1);
10485 if (!speed_p)
10486 return true;
10488 if (GET_MODE (XEXP (x, 0)) == SImode)
10489 *cost += extra_cost->ldst.load;
10490 else
10491 *cost += extra_cost->ldst.load_sign_extend;
10493 if (mode == DImode)
10494 *cost += extra_cost->alu.shift;
10496 return true;
10499 /* Widening from less than 32-bits requires an extend operation. */
10500 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10502 /* We have SXTB/SXTH. */
10503 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10504 if (speed_p)
10505 *cost += extra_cost->alu.extend;
10507 else if (GET_MODE (XEXP (x, 0)) != SImode)
10509 /* Needs two shifts. */
10510 *cost += COSTS_N_INSNS (1);
10511 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10512 if (speed_p)
10513 *cost += 2 * extra_cost->alu.shift;
10516 /* Widening beyond 32-bits requires one more insn. */
10517 if (mode == DImode)
10519 *cost += COSTS_N_INSNS (1);
10520 if (speed_p)
10521 *cost += extra_cost->alu.shift;
10524 return true;
10526 case ZERO_EXTEND:
10527 if ((arm_arch4
10528 || GET_MODE (XEXP (x, 0)) == SImode
10529 || GET_MODE (XEXP (x, 0)) == QImode)
10530 && MEM_P (XEXP (x, 0)))
10532 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10534 if (mode == DImode)
10535 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10537 return true;
10540 /* Widening from less than 32-bits requires an extend operation. */
10541 if (GET_MODE (XEXP (x, 0)) == QImode)
10543 /* UXTB can be a shorter instruction in Thumb2, but it might
10544 be slower than the AND Rd, Rn, #255 alternative. When
10545 optimizing for speed it should never be slower to use
10546 AND, and we don't really model 16-bit vs 32-bit insns
10547 here. */
10548 if (speed_p)
10549 *cost += extra_cost->alu.logical;
10551 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10553 /* We have UXTB/UXTH. */
10554 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10555 if (speed_p)
10556 *cost += extra_cost->alu.extend;
10558 else if (GET_MODE (XEXP (x, 0)) != SImode)
10560 /* Needs two shifts. It's marginally preferable to use
10561 shifts rather than two BIC instructions as the second
10562 shift may merge with a subsequent insn as a shifter
10563 op. */
10564 *cost = COSTS_N_INSNS (2);
10565 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10566 if (speed_p)
10567 *cost += 2 * extra_cost->alu.shift;
10570 /* Widening beyond 32-bits requires one more insn. */
10571 if (mode == DImode)
10573 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10576 return true;
10578 case CONST_INT:
10579 *cost = 0;
10580 /* CONST_INT has no mode, so we cannot tell for sure how many
10581 insns are really going to be needed. The best we can do is
10582 look at the value passed. If it fits in SImode, then assume
10583 that's the mode it will be used for. Otherwise assume it
10584 will be used in DImode. */
10585 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10586 mode = SImode;
10587 else
10588 mode = DImode;
10590 /* Avoid blowing up in arm_gen_constant (). */
10591 if (!(outer_code == PLUS
10592 || outer_code == AND
10593 || outer_code == IOR
10594 || outer_code == XOR
10595 || outer_code == MINUS))
10596 outer_code = SET;
10598 const_int_cost:
10599 if (mode == SImode)
10601 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10602 INTVAL (x), NULL, NULL,
10603 0, 0));
10604 /* Extra costs? */
10606 else
10608 *cost += COSTS_N_INSNS (arm_gen_constant
10609 (outer_code, SImode, NULL,
10610 trunc_int_for_mode (INTVAL (x), SImode),
10611 NULL, NULL, 0, 0)
10612 + arm_gen_constant (outer_code, SImode, NULL,
10613 INTVAL (x) >> 32, NULL,
10614 NULL, 0, 0));
10615 /* Extra costs? */
10618 return true;
10620 case CONST:
10621 case LABEL_REF:
10622 case SYMBOL_REF:
10623 if (speed_p)
10625 if (arm_arch_thumb2 && !flag_pic)
10626 *cost += COSTS_N_INSNS (1);
10627 else
10628 *cost += extra_cost->ldst.load;
10630 else
10631 *cost += COSTS_N_INSNS (1);
10633 if (flag_pic)
10635 *cost += COSTS_N_INSNS (1);
10636 if (speed_p)
10637 *cost += extra_cost->alu.arith;
10640 return true;
10642 case CONST_FIXED:
10643 *cost = COSTS_N_INSNS (4);
10644 /* Fixme. */
10645 return true;
10647 case CONST_DOUBLE:
10648 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10649 && (mode == SFmode || !TARGET_VFP_SINGLE))
10651 if (vfp3_const_double_rtx (x))
10653 if (speed_p)
10654 *cost += extra_cost->fp[mode == DFmode].fpconst;
10655 return true;
10658 if (speed_p)
10660 if (mode == DFmode)
10661 *cost += extra_cost->ldst.loadd;
10662 else
10663 *cost += extra_cost->ldst.loadf;
10665 else
10666 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10668 return true;
10670 *cost = COSTS_N_INSNS (4);
10671 return true;
10673 case CONST_VECTOR:
10674 /* Fixme. */
10675 if (TARGET_NEON
10676 && TARGET_HARD_FLOAT
10677 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10678 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10679 *cost = COSTS_N_INSNS (1);
10680 else
10681 *cost = COSTS_N_INSNS (4);
10682 return true;
10684 case HIGH:
10685 case LO_SUM:
10686 /* When optimizing for size, we prefer constant pool entries to
10687 MOVW/MOVT pairs, so bump the cost of these slightly. */
10688 if (!speed_p)
10689 *cost += 1;
10690 return true;
10692 case CLZ:
10693 if (speed_p)
10694 *cost += extra_cost->alu.clz;
10695 return false;
10697 case SMIN:
10698 if (XEXP (x, 1) == const0_rtx)
10700 if (speed_p)
10701 *cost += extra_cost->alu.log_shift;
10702 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10703 return true;
10705 /* Fall through. */
10706 case SMAX:
10707 case UMIN:
10708 case UMAX:
10709 *cost += COSTS_N_INSNS (1);
10710 return false;
10712 case TRUNCATE:
10713 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10714 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10715 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10716 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10717 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10718 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10719 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10720 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10721 == ZERO_EXTEND))))
10723 if (speed_p)
10724 *cost += extra_cost->mult[1].extend;
10725 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10726 ZERO_EXTEND, 0, speed_p)
10727 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10728 ZERO_EXTEND, 0, speed_p));
10729 return true;
10731 *cost = LIBCALL_COST (1);
10732 return false;
10734 case UNSPEC_VOLATILE:
10735 case UNSPEC:
10736 return arm_unspec_cost (x, outer_code, speed_p, cost);
10738 case PC:
10739 /* Reading the PC is like reading any other register. Writing it
10740 is more expensive, but we take that into account elsewhere. */
10741 *cost = 0;
10742 return true;
10744 case ZERO_EXTRACT:
10745 /* TODO: Simple zero_extract of bottom bits using AND. */
10746 /* Fall through. */
10747 case SIGN_EXTRACT:
10748 if (arm_arch6
10749 && mode == SImode
10750 && CONST_INT_P (XEXP (x, 1))
10751 && CONST_INT_P (XEXP (x, 2)))
10753 if (speed_p)
10754 *cost += extra_cost->alu.bfx;
10755 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10756 return true;
10758 /* Without UBFX/SBFX, need to resort to shift operations. */
10759 *cost += COSTS_N_INSNS (1);
10760 if (speed_p)
10761 *cost += 2 * extra_cost->alu.shift;
10762 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10763 return true;
10765 case FLOAT_EXTEND:
10766 if (TARGET_HARD_FLOAT)
10768 if (speed_p)
10769 *cost += extra_cost->fp[mode == DFmode].widen;
10770 if (!TARGET_VFP5
10771 && GET_MODE (XEXP (x, 0)) == HFmode)
10773 /* Pre v8, widening HF->DF is a two-step process, first
10774 widening to SFmode. */
10775 *cost += COSTS_N_INSNS (1);
10776 if (speed_p)
10777 *cost += extra_cost->fp[0].widen;
10779 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10780 return true;
10783 *cost = LIBCALL_COST (1);
10784 return false;
10786 case FLOAT_TRUNCATE:
10787 if (TARGET_HARD_FLOAT)
10789 if (speed_p)
10790 *cost += extra_cost->fp[mode == DFmode].narrow;
10791 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10792 return true;
10793 /* Vector modes? */
10795 *cost = LIBCALL_COST (1);
10796 return false;
10798 case FMA:
10799 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10801 rtx op0 = XEXP (x, 0);
10802 rtx op1 = XEXP (x, 1);
10803 rtx op2 = XEXP (x, 2);
10806 /* vfms or vfnma. */
10807 if (GET_CODE (op0) == NEG)
10808 op0 = XEXP (op0, 0);
10810 /* vfnms or vfnma. */
10811 if (GET_CODE (op2) == NEG)
10812 op2 = XEXP (op2, 0);
10814 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10815 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10816 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10818 if (speed_p)
10819 *cost += extra_cost->fp[mode ==DFmode].fma;
10821 return true;
10824 *cost = LIBCALL_COST (3);
10825 return false;
10827 case FIX:
10828 case UNSIGNED_FIX:
10829 if (TARGET_HARD_FLOAT)
10831 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10832 a vcvt fixed-point conversion. */
10833 if (code == FIX && mode == SImode
10834 && GET_CODE (XEXP (x, 0)) == FIX
10835 && GET_MODE (XEXP (x, 0)) == SFmode
10836 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10837 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10838 > 0)
10840 if (speed_p)
10841 *cost += extra_cost->fp[0].toint;
10843 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10844 code, 0, speed_p);
10845 return true;
10848 if (GET_MODE_CLASS (mode) == MODE_INT)
10850 mode = GET_MODE (XEXP (x, 0));
10851 if (speed_p)
10852 *cost += extra_cost->fp[mode == DFmode].toint;
10853 /* Strip of the 'cost' of rounding towards zero. */
10854 if (GET_CODE (XEXP (x, 0)) == FIX)
10855 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10856 0, speed_p);
10857 else
10858 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10859 /* ??? Increase the cost to deal with transferring from
10860 FP -> CORE registers? */
10861 return true;
10863 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10864 && TARGET_VFP5)
10866 if (speed_p)
10867 *cost += extra_cost->fp[mode == DFmode].roundint;
10868 return false;
10870 /* Vector costs? */
10872 *cost = LIBCALL_COST (1);
10873 return false;
10875 case FLOAT:
10876 case UNSIGNED_FLOAT:
10877 if (TARGET_HARD_FLOAT)
10879 /* ??? Increase the cost to deal with transferring from CORE
10880 -> FP registers? */
10881 if (speed_p)
10882 *cost += extra_cost->fp[mode == DFmode].fromint;
10883 return false;
10885 *cost = LIBCALL_COST (1);
10886 return false;
10888 case CALL:
10889 return true;
10891 case ASM_OPERANDS:
10893 /* Just a guess. Guess number of instructions in the asm
10894 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10895 though (see PR60663). */
10896 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10897 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10899 *cost = COSTS_N_INSNS (asm_length + num_operands);
10900 return true;
10902 default:
10903 if (mode != VOIDmode)
10904 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10905 else
10906 *cost = COSTS_N_INSNS (4); /* Who knows? */
10907 return false;
10911 #undef HANDLE_NARROW_SHIFT_ARITH
10913 /* RTX costs entry point. */
10915 static bool
10916 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10917 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10919 bool result;
10920 int code = GET_CODE (x);
10921 gcc_assert (current_tune->insn_extra_cost);
10923 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10924 (enum rtx_code) outer_code,
10925 current_tune->insn_extra_cost,
10926 total, speed);
10928 if (dump_file && (dump_flags & TDF_DETAILS))
10930 print_rtl_single (dump_file, x);
10931 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10932 *total, result ? "final" : "partial");
10934 return result;
10937 /* All address computations that can be done are free, but rtx cost returns
10938 the same for practically all of them. So we weight the different types
10939 of address here in the order (most pref first):
10940 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10941 static inline int
10942 arm_arm_address_cost (rtx x)
10944 enum rtx_code c = GET_CODE (x);
10946 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10947 return 0;
10948 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10949 return 10;
10951 if (c == PLUS)
10953 if (CONST_INT_P (XEXP (x, 1)))
10954 return 2;
10956 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10957 return 3;
10959 return 4;
10962 return 6;
10965 static inline int
10966 arm_thumb_address_cost (rtx x)
10968 enum rtx_code c = GET_CODE (x);
10970 if (c == REG)
10971 return 1;
10972 if (c == PLUS
10973 && REG_P (XEXP (x, 0))
10974 && CONST_INT_P (XEXP (x, 1)))
10975 return 1;
10977 return 2;
10980 static int
10981 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10982 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10984 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10987 /* Adjust cost hook for XScale. */
10988 static bool
10989 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10990 int * cost)
10992 /* Some true dependencies can have a higher cost depending
10993 on precisely how certain input operands are used. */
10994 if (dep_type == 0
10995 && recog_memoized (insn) >= 0
10996 && recog_memoized (dep) >= 0)
10998 int shift_opnum = get_attr_shift (insn);
10999 enum attr_type attr_type = get_attr_type (dep);
11001 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11002 operand for INSN. If we have a shifted input operand and the
11003 instruction we depend on is another ALU instruction, then we may
11004 have to account for an additional stall. */
11005 if (shift_opnum != 0
11006 && (attr_type == TYPE_ALU_SHIFT_IMM
11007 || attr_type == TYPE_ALUS_SHIFT_IMM
11008 || attr_type == TYPE_LOGIC_SHIFT_IMM
11009 || attr_type == TYPE_LOGICS_SHIFT_IMM
11010 || attr_type == TYPE_ALU_SHIFT_REG
11011 || attr_type == TYPE_ALUS_SHIFT_REG
11012 || attr_type == TYPE_LOGIC_SHIFT_REG
11013 || attr_type == TYPE_LOGICS_SHIFT_REG
11014 || attr_type == TYPE_MOV_SHIFT
11015 || attr_type == TYPE_MVN_SHIFT
11016 || attr_type == TYPE_MOV_SHIFT_REG
11017 || attr_type == TYPE_MVN_SHIFT_REG))
11019 rtx shifted_operand;
11020 int opno;
11022 /* Get the shifted operand. */
11023 extract_insn (insn);
11024 shifted_operand = recog_data.operand[shift_opnum];
11026 /* Iterate over all the operands in DEP. If we write an operand
11027 that overlaps with SHIFTED_OPERAND, then we have increase the
11028 cost of this dependency. */
11029 extract_insn (dep);
11030 preprocess_constraints (dep);
11031 for (opno = 0; opno < recog_data.n_operands; opno++)
11033 /* We can ignore strict inputs. */
11034 if (recog_data.operand_type[opno] == OP_IN)
11035 continue;
11037 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11038 shifted_operand))
11040 *cost = 2;
11041 return false;
11046 return true;
11049 /* Adjust cost hook for Cortex A9. */
11050 static bool
11051 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11052 int * cost)
11054 switch (dep_type)
11056 case REG_DEP_ANTI:
11057 *cost = 0;
11058 return false;
11060 case REG_DEP_TRUE:
11061 case REG_DEP_OUTPUT:
11062 if (recog_memoized (insn) >= 0
11063 && recog_memoized (dep) >= 0)
11065 if (GET_CODE (PATTERN (insn)) == SET)
11067 if (GET_MODE_CLASS
11068 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11069 || GET_MODE_CLASS
11070 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11072 enum attr_type attr_type_insn = get_attr_type (insn);
11073 enum attr_type attr_type_dep = get_attr_type (dep);
11075 /* By default all dependencies of the form
11076 s0 = s0 <op> s1
11077 s0 = s0 <op> s2
11078 have an extra latency of 1 cycle because
11079 of the input and output dependency in this
11080 case. However this gets modeled as an true
11081 dependency and hence all these checks. */
11082 if (REG_P (SET_DEST (PATTERN (insn)))
11083 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11085 /* FMACS is a special case where the dependent
11086 instruction can be issued 3 cycles before
11087 the normal latency in case of an output
11088 dependency. */
11089 if ((attr_type_insn == TYPE_FMACS
11090 || attr_type_insn == TYPE_FMACD)
11091 && (attr_type_dep == TYPE_FMACS
11092 || attr_type_dep == TYPE_FMACD))
11094 if (dep_type == REG_DEP_OUTPUT)
11095 *cost = insn_default_latency (dep) - 3;
11096 else
11097 *cost = insn_default_latency (dep);
11098 return false;
11100 else
11102 if (dep_type == REG_DEP_OUTPUT)
11103 *cost = insn_default_latency (dep) + 1;
11104 else
11105 *cost = insn_default_latency (dep);
11107 return false;
11112 break;
11114 default:
11115 gcc_unreachable ();
11118 return true;
11121 /* Adjust cost hook for FA726TE. */
11122 static bool
11123 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11124 int * cost)
11126 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11127 have penalty of 3. */
11128 if (dep_type == REG_DEP_TRUE
11129 && recog_memoized (insn) >= 0
11130 && recog_memoized (dep) >= 0
11131 && get_attr_conds (dep) == CONDS_SET)
11133 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11134 if (get_attr_conds (insn) == CONDS_USE
11135 && get_attr_type (insn) != TYPE_BRANCH)
11137 *cost = 3;
11138 return false;
11141 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11142 || get_attr_conds (insn) == CONDS_USE)
11144 *cost = 0;
11145 return false;
11149 return true;
11152 /* Implement TARGET_REGISTER_MOVE_COST.
11154 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11155 it is typically more expensive than a single memory access. We set
11156 the cost to less than two memory accesses so that floating
11157 point to integer conversion does not go through memory. */
11160 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11161 reg_class_t from, reg_class_t to)
11163 if (TARGET_32BIT)
11165 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11166 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11167 return 15;
11168 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11169 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11170 return 4;
11171 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11172 return 20;
11173 else
11174 return 2;
11176 else
11178 if (from == HI_REGS || to == HI_REGS)
11179 return 4;
11180 else
11181 return 2;
11185 /* Implement TARGET_MEMORY_MOVE_COST. */
11188 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11189 bool in ATTRIBUTE_UNUSED)
11191 if (TARGET_32BIT)
11192 return 10;
11193 else
11195 if (GET_MODE_SIZE (mode) < 4)
11196 return 8;
11197 else
11198 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11202 /* Vectorizer cost model implementation. */
11204 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11205 static int
11206 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11207 tree vectype,
11208 int misalign ATTRIBUTE_UNUSED)
11210 unsigned elements;
11212 switch (type_of_cost)
11214 case scalar_stmt:
11215 return current_tune->vec_costs->scalar_stmt_cost;
11217 case scalar_load:
11218 return current_tune->vec_costs->scalar_load_cost;
11220 case scalar_store:
11221 return current_tune->vec_costs->scalar_store_cost;
11223 case vector_stmt:
11224 return current_tune->vec_costs->vec_stmt_cost;
11226 case vector_load:
11227 return current_tune->vec_costs->vec_align_load_cost;
11229 case vector_store:
11230 return current_tune->vec_costs->vec_store_cost;
11232 case vec_to_scalar:
11233 return current_tune->vec_costs->vec_to_scalar_cost;
11235 case scalar_to_vec:
11236 return current_tune->vec_costs->scalar_to_vec_cost;
11238 case unaligned_load:
11239 return current_tune->vec_costs->vec_unalign_load_cost;
11241 case unaligned_store:
11242 return current_tune->vec_costs->vec_unalign_store_cost;
11244 case cond_branch_taken:
11245 return current_tune->vec_costs->cond_taken_branch_cost;
11247 case cond_branch_not_taken:
11248 return current_tune->vec_costs->cond_not_taken_branch_cost;
11250 case vec_perm:
11251 case vec_promote_demote:
11252 return current_tune->vec_costs->vec_stmt_cost;
11254 case vec_construct:
11255 elements = TYPE_VECTOR_SUBPARTS (vectype);
11256 return elements / 2 + 1;
11258 default:
11259 gcc_unreachable ();
11263 /* Implement targetm.vectorize.add_stmt_cost. */
11265 static unsigned
11266 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11267 struct _stmt_vec_info *stmt_info, int misalign,
11268 enum vect_cost_model_location where)
11270 unsigned *cost = (unsigned *) data;
11271 unsigned retval = 0;
11273 if (flag_vect_cost_model)
11275 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11276 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11278 /* Statements in an inner loop relative to the loop being
11279 vectorized are weighted more heavily. The value here is
11280 arbitrary and could potentially be improved with analysis. */
11281 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11282 count *= 50; /* FIXME. */
11284 retval = (unsigned) (count * stmt_cost);
11285 cost[where] += retval;
11288 return retval;
11291 /* Return true if and only if this insn can dual-issue only as older. */
11292 static bool
11293 cortexa7_older_only (rtx_insn *insn)
11295 if (recog_memoized (insn) < 0)
11296 return false;
11298 switch (get_attr_type (insn))
11300 case TYPE_ALU_DSP_REG:
11301 case TYPE_ALU_SREG:
11302 case TYPE_ALUS_SREG:
11303 case TYPE_LOGIC_REG:
11304 case TYPE_LOGICS_REG:
11305 case TYPE_ADC_REG:
11306 case TYPE_ADCS_REG:
11307 case TYPE_ADR:
11308 case TYPE_BFM:
11309 case TYPE_REV:
11310 case TYPE_MVN_REG:
11311 case TYPE_SHIFT_IMM:
11312 case TYPE_SHIFT_REG:
11313 case TYPE_LOAD_BYTE:
11314 case TYPE_LOAD1:
11315 case TYPE_STORE1:
11316 case TYPE_FFARITHS:
11317 case TYPE_FADDS:
11318 case TYPE_FFARITHD:
11319 case TYPE_FADDD:
11320 case TYPE_FMOV:
11321 case TYPE_F_CVT:
11322 case TYPE_FCMPS:
11323 case TYPE_FCMPD:
11324 case TYPE_FCONSTS:
11325 case TYPE_FCONSTD:
11326 case TYPE_FMULS:
11327 case TYPE_FMACS:
11328 case TYPE_FMULD:
11329 case TYPE_FMACD:
11330 case TYPE_FDIVS:
11331 case TYPE_FDIVD:
11332 case TYPE_F_MRC:
11333 case TYPE_F_MRRC:
11334 case TYPE_F_FLAG:
11335 case TYPE_F_LOADS:
11336 case TYPE_F_STORES:
11337 return true;
11338 default:
11339 return false;
11343 /* Return true if and only if this insn can dual-issue as younger. */
11344 static bool
11345 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11347 if (recog_memoized (insn) < 0)
11349 if (verbose > 5)
11350 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11351 return false;
11354 switch (get_attr_type (insn))
11356 case TYPE_ALU_IMM:
11357 case TYPE_ALUS_IMM:
11358 case TYPE_LOGIC_IMM:
11359 case TYPE_LOGICS_IMM:
11360 case TYPE_EXTEND:
11361 case TYPE_MVN_IMM:
11362 case TYPE_MOV_IMM:
11363 case TYPE_MOV_REG:
11364 case TYPE_MOV_SHIFT:
11365 case TYPE_MOV_SHIFT_REG:
11366 case TYPE_BRANCH:
11367 case TYPE_CALL:
11368 return true;
11369 default:
11370 return false;
11375 /* Look for an instruction that can dual issue only as an older
11376 instruction, and move it in front of any instructions that can
11377 dual-issue as younger, while preserving the relative order of all
11378 other instructions in the ready list. This is a hueuristic to help
11379 dual-issue in later cycles, by postponing issue of more flexible
11380 instructions. This heuristic may affect dual issue opportunities
11381 in the current cycle. */
11382 static void
11383 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11384 int *n_readyp, int clock)
11386 int i;
11387 int first_older_only = -1, first_younger = -1;
11389 if (verbose > 5)
11390 fprintf (file,
11391 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11392 clock,
11393 *n_readyp);
11395 /* Traverse the ready list from the head (the instruction to issue
11396 first), and looking for the first instruction that can issue as
11397 younger and the first instruction that can dual-issue only as
11398 older. */
11399 for (i = *n_readyp - 1; i >= 0; i--)
11401 rtx_insn *insn = ready[i];
11402 if (cortexa7_older_only (insn))
11404 first_older_only = i;
11405 if (verbose > 5)
11406 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11407 break;
11409 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11410 first_younger = i;
11413 /* Nothing to reorder because either no younger insn found or insn
11414 that can dual-issue only as older appears before any insn that
11415 can dual-issue as younger. */
11416 if (first_younger == -1)
11418 if (verbose > 5)
11419 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11420 return;
11423 /* Nothing to reorder because no older-only insn in the ready list. */
11424 if (first_older_only == -1)
11426 if (verbose > 5)
11427 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11428 return;
11431 /* Move first_older_only insn before first_younger. */
11432 if (verbose > 5)
11433 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11434 INSN_UID(ready [first_older_only]),
11435 INSN_UID(ready [first_younger]));
11436 rtx_insn *first_older_only_insn = ready [first_older_only];
11437 for (i = first_older_only; i < first_younger; i++)
11439 ready[i] = ready[i+1];
11442 ready[i] = first_older_only_insn;
11443 return;
11446 /* Implement TARGET_SCHED_REORDER. */
11447 static int
11448 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11449 int clock)
11451 switch (arm_tune)
11453 case TARGET_CPU_cortexa7:
11454 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11455 break;
11456 default:
11457 /* Do nothing for other cores. */
11458 break;
11461 return arm_issue_rate ();
11464 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11465 It corrects the value of COST based on the relationship between
11466 INSN and DEP through the dependence LINK. It returns the new
11467 value. There is a per-core adjust_cost hook to adjust scheduler costs
11468 and the per-core hook can choose to completely override the generic
11469 adjust_cost function. Only put bits of code into arm_adjust_cost that
11470 are common across all cores. */
11471 static int
11472 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11473 unsigned int)
11475 rtx i_pat, d_pat;
11477 /* When generating Thumb-1 code, we want to place flag-setting operations
11478 close to a conditional branch which depends on them, so that we can
11479 omit the comparison. */
11480 if (TARGET_THUMB1
11481 && dep_type == 0
11482 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11483 && recog_memoized (dep) >= 0
11484 && get_attr_conds (dep) == CONDS_SET)
11485 return 0;
11487 if (current_tune->sched_adjust_cost != NULL)
11489 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11490 return cost;
11493 /* XXX Is this strictly true? */
11494 if (dep_type == REG_DEP_ANTI
11495 || dep_type == REG_DEP_OUTPUT)
11496 return 0;
11498 /* Call insns don't incur a stall, even if they follow a load. */
11499 if (dep_type == 0
11500 && CALL_P (insn))
11501 return 1;
11503 if ((i_pat = single_set (insn)) != NULL
11504 && MEM_P (SET_SRC (i_pat))
11505 && (d_pat = single_set (dep)) != NULL
11506 && MEM_P (SET_DEST (d_pat)))
11508 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11509 /* This is a load after a store, there is no conflict if the load reads
11510 from a cached area. Assume that loads from the stack, and from the
11511 constant pool are cached, and that others will miss. This is a
11512 hack. */
11514 if ((GET_CODE (src_mem) == SYMBOL_REF
11515 && CONSTANT_POOL_ADDRESS_P (src_mem))
11516 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11517 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11518 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11519 return 1;
11522 return cost;
11526 arm_max_conditional_execute (void)
11528 return max_insns_skipped;
11531 static int
11532 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11534 if (TARGET_32BIT)
11535 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11536 else
11537 return (optimize > 0) ? 2 : 0;
11540 static int
11541 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11543 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11546 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11547 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11548 sequences of non-executed instructions in IT blocks probably take the same
11549 amount of time as executed instructions (and the IT instruction itself takes
11550 space in icache). This function was experimentally determined to give good
11551 results on a popular embedded benchmark. */
11553 static int
11554 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11556 return (TARGET_32BIT && speed_p) ? 1
11557 : arm_default_branch_cost (speed_p, predictable_p);
11560 static int
11561 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11563 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11566 static bool fp_consts_inited = false;
11568 static REAL_VALUE_TYPE value_fp0;
11570 static void
11571 init_fp_table (void)
11573 REAL_VALUE_TYPE r;
11575 r = REAL_VALUE_ATOF ("0", DFmode);
11576 value_fp0 = r;
11577 fp_consts_inited = true;
11580 /* Return TRUE if rtx X is a valid immediate FP constant. */
11582 arm_const_double_rtx (rtx x)
11584 const REAL_VALUE_TYPE *r;
11586 if (!fp_consts_inited)
11587 init_fp_table ();
11589 r = CONST_DOUBLE_REAL_VALUE (x);
11590 if (REAL_VALUE_MINUS_ZERO (*r))
11591 return 0;
11593 if (real_equal (r, &value_fp0))
11594 return 1;
11596 return 0;
11599 /* VFPv3 has a fairly wide range of representable immediates, formed from
11600 "quarter-precision" floating-point values. These can be evaluated using this
11601 formula (with ^ for exponentiation):
11603 -1^s * n * 2^-r
11605 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11606 16 <= n <= 31 and 0 <= r <= 7.
11608 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11610 - A (most-significant) is the sign bit.
11611 - BCD are the exponent (encoded as r XOR 3).
11612 - EFGH are the mantissa (encoded as n - 16).
11615 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11616 fconst[sd] instruction, or -1 if X isn't suitable. */
11617 static int
11618 vfp3_const_double_index (rtx x)
11620 REAL_VALUE_TYPE r, m;
11621 int sign, exponent;
11622 unsigned HOST_WIDE_INT mantissa, mant_hi;
11623 unsigned HOST_WIDE_INT mask;
11624 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11625 bool fail;
11627 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11628 return -1;
11630 r = *CONST_DOUBLE_REAL_VALUE (x);
11632 /* We can't represent these things, so detect them first. */
11633 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11634 return -1;
11636 /* Extract sign, exponent and mantissa. */
11637 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11638 r = real_value_abs (&r);
11639 exponent = REAL_EXP (&r);
11640 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11641 highest (sign) bit, with a fixed binary point at bit point_pos.
11642 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11643 bits for the mantissa, this may fail (low bits would be lost). */
11644 real_ldexp (&m, &r, point_pos - exponent);
11645 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11646 mantissa = w.elt (0);
11647 mant_hi = w.elt (1);
11649 /* If there are bits set in the low part of the mantissa, we can't
11650 represent this value. */
11651 if (mantissa != 0)
11652 return -1;
11654 /* Now make it so that mantissa contains the most-significant bits, and move
11655 the point_pos to indicate that the least-significant bits have been
11656 discarded. */
11657 point_pos -= HOST_BITS_PER_WIDE_INT;
11658 mantissa = mant_hi;
11660 /* We can permit four significant bits of mantissa only, plus a high bit
11661 which is always 1. */
11662 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11663 if ((mantissa & mask) != 0)
11664 return -1;
11666 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11667 mantissa >>= point_pos - 5;
11669 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11670 floating-point immediate zero with Neon using an integer-zero load, but
11671 that case is handled elsewhere.) */
11672 if (mantissa == 0)
11673 return -1;
11675 gcc_assert (mantissa >= 16 && mantissa <= 31);
11677 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11678 normalized significands are in the range [1, 2). (Our mantissa is shifted
11679 left 4 places at this point relative to normalized IEEE754 values). GCC
11680 internally uses [0.5, 1) (see real.c), so the exponent returned from
11681 REAL_EXP must be altered. */
11682 exponent = 5 - exponent;
11684 if (exponent < 0 || exponent > 7)
11685 return -1;
11687 /* Sign, mantissa and exponent are now in the correct form to plug into the
11688 formula described in the comment above. */
11689 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11692 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11694 vfp3_const_double_rtx (rtx x)
11696 if (!TARGET_VFP3)
11697 return 0;
11699 return vfp3_const_double_index (x) != -1;
11702 /* Recognize immediates which can be used in various Neon instructions. Legal
11703 immediates are described by the following table (for VMVN variants, the
11704 bitwise inverse of the constant shown is recognized. In either case, VMOV
11705 is output and the correct instruction to use for a given constant is chosen
11706 by the assembler). The constant shown is replicated across all elements of
11707 the destination vector.
11709 insn elems variant constant (binary)
11710 ---- ----- ------- -----------------
11711 vmov i32 0 00000000 00000000 00000000 abcdefgh
11712 vmov i32 1 00000000 00000000 abcdefgh 00000000
11713 vmov i32 2 00000000 abcdefgh 00000000 00000000
11714 vmov i32 3 abcdefgh 00000000 00000000 00000000
11715 vmov i16 4 00000000 abcdefgh
11716 vmov i16 5 abcdefgh 00000000
11717 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11718 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11719 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11720 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11721 vmvn i16 10 00000000 abcdefgh
11722 vmvn i16 11 abcdefgh 00000000
11723 vmov i32 12 00000000 00000000 abcdefgh 11111111
11724 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11725 vmov i32 14 00000000 abcdefgh 11111111 11111111
11726 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11727 vmov i8 16 abcdefgh
11728 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11729 eeeeeeee ffffffff gggggggg hhhhhhhh
11730 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11731 vmov f32 19 00000000 00000000 00000000 00000000
11733 For case 18, B = !b. Representable values are exactly those accepted by
11734 vfp3_const_double_index, but are output as floating-point numbers rather
11735 than indices.
11737 For case 19, we will change it to vmov.i32 when assembling.
11739 Variants 0-5 (inclusive) may also be used as immediates for the second
11740 operand of VORR/VBIC instructions.
11742 The INVERSE argument causes the bitwise inverse of the given operand to be
11743 recognized instead (used for recognizing legal immediates for the VAND/VORN
11744 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11745 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11746 output, rather than the real insns vbic/vorr).
11748 INVERSE makes no difference to the recognition of float vectors.
11750 The return value is the variant of immediate as shown in the above table, or
11751 -1 if the given value doesn't match any of the listed patterns.
11753 static int
11754 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11755 rtx *modconst, int *elementwidth)
11757 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11758 matches = 1; \
11759 for (i = 0; i < idx; i += (STRIDE)) \
11760 if (!(TEST)) \
11761 matches = 0; \
11762 if (matches) \
11764 immtype = (CLASS); \
11765 elsize = (ELSIZE); \
11766 break; \
11769 unsigned int i, elsize = 0, idx = 0, n_elts;
11770 unsigned int innersize;
11771 unsigned char bytes[16];
11772 int immtype = -1, matches;
11773 unsigned int invmask = inverse ? 0xff : 0;
11774 bool vector = GET_CODE (op) == CONST_VECTOR;
11776 if (vector)
11777 n_elts = CONST_VECTOR_NUNITS (op);
11778 else
11780 n_elts = 1;
11781 if (mode == VOIDmode)
11782 mode = DImode;
11785 innersize = GET_MODE_UNIT_SIZE (mode);
11787 /* Vectors of float constants. */
11788 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11790 rtx el0 = CONST_VECTOR_ELT (op, 0);
11792 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11793 return -1;
11795 /* FP16 vectors cannot be represented. */
11796 if (GET_MODE_INNER (mode) == HFmode)
11797 return -1;
11799 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11800 are distinct in this context. */
11801 if (!const_vec_duplicate_p (op))
11802 return -1;
11804 if (modconst)
11805 *modconst = CONST_VECTOR_ELT (op, 0);
11807 if (elementwidth)
11808 *elementwidth = 0;
11810 if (el0 == CONST0_RTX (GET_MODE (el0)))
11811 return 19;
11812 else
11813 return 18;
11816 /* The tricks done in the code below apply for little-endian vector layout.
11817 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11818 FIXME: Implement logic for big-endian vectors. */
11819 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11820 return -1;
11822 /* Splat vector constant out into a byte vector. */
11823 for (i = 0; i < n_elts; i++)
11825 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11826 unsigned HOST_WIDE_INT elpart;
11828 gcc_assert (CONST_INT_P (el));
11829 elpart = INTVAL (el);
11831 for (unsigned int byte = 0; byte < innersize; byte++)
11833 bytes[idx++] = (elpart & 0xff) ^ invmask;
11834 elpart >>= BITS_PER_UNIT;
11838 /* Sanity check. */
11839 gcc_assert (idx == GET_MODE_SIZE (mode));
11843 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11844 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11846 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11847 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11849 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11850 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11852 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11853 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11855 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11857 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11859 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11860 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11862 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11863 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11865 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11866 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11868 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11869 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11871 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11873 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11875 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11876 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11878 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11879 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11881 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11882 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11884 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11885 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11887 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11889 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11890 && bytes[i] == bytes[(i + 8) % idx]);
11892 while (0);
11894 if (immtype == -1)
11895 return -1;
11897 if (elementwidth)
11898 *elementwidth = elsize;
11900 if (modconst)
11902 unsigned HOST_WIDE_INT imm = 0;
11904 /* Un-invert bytes of recognized vector, if necessary. */
11905 if (invmask != 0)
11906 for (i = 0; i < idx; i++)
11907 bytes[i] ^= invmask;
11909 if (immtype == 17)
11911 /* FIXME: Broken on 32-bit H_W_I hosts. */
11912 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11914 for (i = 0; i < 8; i++)
11915 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11916 << (i * BITS_PER_UNIT);
11918 *modconst = GEN_INT (imm);
11920 else
11922 unsigned HOST_WIDE_INT imm = 0;
11924 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11925 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11927 *modconst = GEN_INT (imm);
11931 return immtype;
11932 #undef CHECK
11935 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11936 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11937 float elements), and a modified constant (whatever should be output for a
11938 VMOV) in *MODCONST. */
11941 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11942 rtx *modconst, int *elementwidth)
11944 rtx tmpconst;
11945 int tmpwidth;
11946 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11948 if (retval == -1)
11949 return 0;
11951 if (modconst)
11952 *modconst = tmpconst;
11954 if (elementwidth)
11955 *elementwidth = tmpwidth;
11957 return 1;
11960 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11961 the immediate is valid, write a constant suitable for using as an operand
11962 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11963 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11966 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11967 rtx *modconst, int *elementwidth)
11969 rtx tmpconst;
11970 int tmpwidth;
11971 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11973 if (retval < 0 || retval > 5)
11974 return 0;
11976 if (modconst)
11977 *modconst = tmpconst;
11979 if (elementwidth)
11980 *elementwidth = tmpwidth;
11982 return 1;
11985 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11986 the immediate is valid, write a constant suitable for using as an operand
11987 to VSHR/VSHL to *MODCONST and the corresponding element width to
11988 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11989 because they have different limitations. */
11992 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11993 rtx *modconst, int *elementwidth,
11994 bool isleftshift)
11996 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11997 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11998 unsigned HOST_WIDE_INT last_elt = 0;
11999 unsigned HOST_WIDE_INT maxshift;
12001 /* Split vector constant out into a byte vector. */
12002 for (i = 0; i < n_elts; i++)
12004 rtx el = CONST_VECTOR_ELT (op, i);
12005 unsigned HOST_WIDE_INT elpart;
12007 if (CONST_INT_P (el))
12008 elpart = INTVAL (el);
12009 else if (CONST_DOUBLE_P (el))
12010 return 0;
12011 else
12012 gcc_unreachable ();
12014 if (i != 0 && elpart != last_elt)
12015 return 0;
12017 last_elt = elpart;
12020 /* Shift less than element size. */
12021 maxshift = innersize * 8;
12023 if (isleftshift)
12025 /* Left shift immediate value can be from 0 to <size>-1. */
12026 if (last_elt >= maxshift)
12027 return 0;
12029 else
12031 /* Right shift immediate value can be from 1 to <size>. */
12032 if (last_elt == 0 || last_elt > maxshift)
12033 return 0;
12036 if (elementwidth)
12037 *elementwidth = innersize * 8;
12039 if (modconst)
12040 *modconst = CONST_VECTOR_ELT (op, 0);
12042 return 1;
12045 /* Return a string suitable for output of Neon immediate logic operation
12046 MNEM. */
12048 char *
12049 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12050 int inverse, int quad)
12052 int width, is_valid;
12053 static char templ[40];
12055 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12057 gcc_assert (is_valid != 0);
12059 if (quad)
12060 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12061 else
12062 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12064 return templ;
12067 /* Return a string suitable for output of Neon immediate shift operation
12068 (VSHR or VSHL) MNEM. */
12070 char *
12071 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12072 machine_mode mode, int quad,
12073 bool isleftshift)
12075 int width, is_valid;
12076 static char templ[40];
12078 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12079 gcc_assert (is_valid != 0);
12081 if (quad)
12082 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12083 else
12084 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12086 return templ;
12089 /* Output a sequence of pairwise operations to implement a reduction.
12090 NOTE: We do "too much work" here, because pairwise operations work on two
12091 registers-worth of operands in one go. Unfortunately we can't exploit those
12092 extra calculations to do the full operation in fewer steps, I don't think.
12093 Although all vector elements of the result but the first are ignored, we
12094 actually calculate the same result in each of the elements. An alternative
12095 such as initially loading a vector with zero to use as each of the second
12096 operands would use up an additional register and take an extra instruction,
12097 for no particular gain. */
12099 void
12100 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12101 rtx (*reduc) (rtx, rtx, rtx))
12103 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12104 rtx tmpsum = op1;
12106 for (i = parts / 2; i >= 1; i /= 2)
12108 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12109 emit_insn (reduc (dest, tmpsum, tmpsum));
12110 tmpsum = dest;
12114 /* If VALS is a vector constant that can be loaded into a register
12115 using VDUP, generate instructions to do so and return an RTX to
12116 assign to the register. Otherwise return NULL_RTX. */
12118 static rtx
12119 neon_vdup_constant (rtx vals)
12121 machine_mode mode = GET_MODE (vals);
12122 machine_mode inner_mode = GET_MODE_INNER (mode);
12123 rtx x;
12125 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12126 return NULL_RTX;
12128 if (!const_vec_duplicate_p (vals, &x))
12129 /* The elements are not all the same. We could handle repeating
12130 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12131 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12132 vdup.i16). */
12133 return NULL_RTX;
12135 /* We can load this constant by using VDUP and a constant in a
12136 single ARM register. This will be cheaper than a vector
12137 load. */
12139 x = copy_to_mode_reg (inner_mode, x);
12140 return gen_rtx_VEC_DUPLICATE (mode, x);
12143 /* Generate code to load VALS, which is a PARALLEL containing only
12144 constants (for vec_init) or CONST_VECTOR, efficiently into a
12145 register. Returns an RTX to copy into the register, or NULL_RTX
12146 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12149 neon_make_constant (rtx vals)
12151 machine_mode mode = GET_MODE (vals);
12152 rtx target;
12153 rtx const_vec = NULL_RTX;
12154 int n_elts = GET_MODE_NUNITS (mode);
12155 int n_const = 0;
12156 int i;
12158 if (GET_CODE (vals) == CONST_VECTOR)
12159 const_vec = vals;
12160 else if (GET_CODE (vals) == PARALLEL)
12162 /* A CONST_VECTOR must contain only CONST_INTs and
12163 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12164 Only store valid constants in a CONST_VECTOR. */
12165 for (i = 0; i < n_elts; ++i)
12167 rtx x = XVECEXP (vals, 0, i);
12168 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12169 n_const++;
12171 if (n_const == n_elts)
12172 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12174 else
12175 gcc_unreachable ();
12177 if (const_vec != NULL
12178 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12179 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12180 return const_vec;
12181 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12182 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12183 pipeline cycle; creating the constant takes one or two ARM
12184 pipeline cycles. */
12185 return target;
12186 else if (const_vec != NULL_RTX)
12187 /* Load from constant pool. On Cortex-A8 this takes two cycles
12188 (for either double or quad vectors). We can not take advantage
12189 of single-cycle VLD1 because we need a PC-relative addressing
12190 mode. */
12191 return const_vec;
12192 else
12193 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12194 We can not construct an initializer. */
12195 return NULL_RTX;
12198 /* Initialize vector TARGET to VALS. */
12200 void
12201 neon_expand_vector_init (rtx target, rtx vals)
12203 machine_mode mode = GET_MODE (target);
12204 machine_mode inner_mode = GET_MODE_INNER (mode);
12205 int n_elts = GET_MODE_NUNITS (mode);
12206 int n_var = 0, one_var = -1;
12207 bool all_same = true;
12208 rtx x, mem;
12209 int i;
12211 for (i = 0; i < n_elts; ++i)
12213 x = XVECEXP (vals, 0, i);
12214 if (!CONSTANT_P (x))
12215 ++n_var, one_var = i;
12217 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12218 all_same = false;
12221 if (n_var == 0)
12223 rtx constant = neon_make_constant (vals);
12224 if (constant != NULL_RTX)
12226 emit_move_insn (target, constant);
12227 return;
12231 /* Splat a single non-constant element if we can. */
12232 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12234 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12235 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12236 return;
12239 /* One field is non-constant. Load constant then overwrite varying
12240 field. This is more efficient than using the stack. */
12241 if (n_var == 1)
12243 rtx copy = copy_rtx (vals);
12244 rtx index = GEN_INT (one_var);
12246 /* Load constant part of vector, substitute neighboring value for
12247 varying element. */
12248 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12249 neon_expand_vector_init (target, copy);
12251 /* Insert variable. */
12252 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12253 switch (mode)
12255 case E_V8QImode:
12256 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12257 break;
12258 case E_V16QImode:
12259 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12260 break;
12261 case E_V4HImode:
12262 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12263 break;
12264 case E_V8HImode:
12265 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12266 break;
12267 case E_V2SImode:
12268 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12269 break;
12270 case E_V4SImode:
12271 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12272 break;
12273 case E_V2SFmode:
12274 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12275 break;
12276 case E_V4SFmode:
12277 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12278 break;
12279 case E_V2DImode:
12280 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12281 break;
12282 default:
12283 gcc_unreachable ();
12285 return;
12288 /* Construct the vector in memory one field at a time
12289 and load the whole vector. */
12290 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12291 for (i = 0; i < n_elts; i++)
12292 emit_move_insn (adjust_address_nv (mem, inner_mode,
12293 i * GET_MODE_SIZE (inner_mode)),
12294 XVECEXP (vals, 0, i));
12295 emit_move_insn (target, mem);
12298 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12299 ERR if it doesn't. EXP indicates the source location, which includes the
12300 inlining history for intrinsics. */
12302 static void
12303 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12304 const_tree exp, const char *desc)
12306 HOST_WIDE_INT lane;
12308 gcc_assert (CONST_INT_P (operand));
12310 lane = INTVAL (operand);
12312 if (lane < low || lane >= high)
12314 if (exp)
12315 error ("%K%s %wd out of range %wd - %wd",
12316 exp, desc, lane, low, high - 1);
12317 else
12318 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12322 /* Bounds-check lanes. */
12324 void
12325 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12326 const_tree exp)
12328 bounds_check (operand, low, high, exp, "lane");
12331 /* Bounds-check constants. */
12333 void
12334 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12336 bounds_check (operand, low, high, NULL_TREE, "constant");
12339 HOST_WIDE_INT
12340 neon_element_bits (machine_mode mode)
12342 return GET_MODE_UNIT_BITSIZE (mode);
12346 /* Predicates for `match_operand' and `match_operator'. */
12348 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12349 WB is true if full writeback address modes are allowed and is false
12350 if limited writeback address modes (POST_INC and PRE_DEC) are
12351 allowed. */
12354 arm_coproc_mem_operand (rtx op, bool wb)
12356 rtx ind;
12358 /* Reject eliminable registers. */
12359 if (! (reload_in_progress || reload_completed || lra_in_progress)
12360 && ( reg_mentioned_p (frame_pointer_rtx, op)
12361 || reg_mentioned_p (arg_pointer_rtx, op)
12362 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12363 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12364 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12365 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12366 return FALSE;
12368 /* Constants are converted into offsets from labels. */
12369 if (!MEM_P (op))
12370 return FALSE;
12372 ind = XEXP (op, 0);
12374 if (reload_completed
12375 && (GET_CODE (ind) == LABEL_REF
12376 || (GET_CODE (ind) == CONST
12377 && GET_CODE (XEXP (ind, 0)) == PLUS
12378 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12379 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12380 return TRUE;
12382 /* Match: (mem (reg)). */
12383 if (REG_P (ind))
12384 return arm_address_register_rtx_p (ind, 0);
12386 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12387 acceptable in any case (subject to verification by
12388 arm_address_register_rtx_p). We need WB to be true to accept
12389 PRE_INC and POST_DEC. */
12390 if (GET_CODE (ind) == POST_INC
12391 || GET_CODE (ind) == PRE_DEC
12392 || (wb
12393 && (GET_CODE (ind) == PRE_INC
12394 || GET_CODE (ind) == POST_DEC)))
12395 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12397 if (wb
12398 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12399 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12400 && GET_CODE (XEXP (ind, 1)) == PLUS
12401 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12402 ind = XEXP (ind, 1);
12404 /* Match:
12405 (plus (reg)
12406 (const)). */
12407 if (GET_CODE (ind) == PLUS
12408 && REG_P (XEXP (ind, 0))
12409 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12410 && CONST_INT_P (XEXP (ind, 1))
12411 && INTVAL (XEXP (ind, 1)) > -1024
12412 && INTVAL (XEXP (ind, 1)) < 1024
12413 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12414 return TRUE;
12416 return FALSE;
12419 /* Return TRUE if OP is a memory operand which we can load or store a vector
12420 to/from. TYPE is one of the following values:
12421 0 - Vector load/stor (vldr)
12422 1 - Core registers (ldm)
12423 2 - Element/structure loads (vld1)
12426 neon_vector_mem_operand (rtx op, int type, bool strict)
12428 rtx ind;
12430 /* Reject eliminable registers. */
12431 if (strict && ! (reload_in_progress || reload_completed)
12432 && (reg_mentioned_p (frame_pointer_rtx, op)
12433 || reg_mentioned_p (arg_pointer_rtx, op)
12434 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12435 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12436 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12437 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12438 return FALSE;
12440 /* Constants are converted into offsets from labels. */
12441 if (!MEM_P (op))
12442 return FALSE;
12444 ind = XEXP (op, 0);
12446 if (reload_completed
12447 && (GET_CODE (ind) == LABEL_REF
12448 || (GET_CODE (ind) == CONST
12449 && GET_CODE (XEXP (ind, 0)) == PLUS
12450 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12451 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12452 return TRUE;
12454 /* Match: (mem (reg)). */
12455 if (REG_P (ind))
12456 return arm_address_register_rtx_p (ind, 0);
12458 /* Allow post-increment with Neon registers. */
12459 if ((type != 1 && GET_CODE (ind) == POST_INC)
12460 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12461 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12463 /* Allow post-increment by register for VLDn */
12464 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12465 && GET_CODE (XEXP (ind, 1)) == PLUS
12466 && REG_P (XEXP (XEXP (ind, 1), 1)))
12467 return true;
12469 /* Match:
12470 (plus (reg)
12471 (const)). */
12472 if (type == 0
12473 && GET_CODE (ind) == PLUS
12474 && REG_P (XEXP (ind, 0))
12475 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12476 && CONST_INT_P (XEXP (ind, 1))
12477 && INTVAL (XEXP (ind, 1)) > -1024
12478 /* For quad modes, we restrict the constant offset to be slightly less
12479 than what the instruction format permits. We have no such constraint
12480 on double mode offsets. (This must match arm_legitimate_index_p.) */
12481 && (INTVAL (XEXP (ind, 1))
12482 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12483 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12484 return TRUE;
12486 return FALSE;
12489 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12490 type. */
12492 neon_struct_mem_operand (rtx op)
12494 rtx ind;
12496 /* Reject eliminable registers. */
12497 if (! (reload_in_progress || reload_completed)
12498 && ( reg_mentioned_p (frame_pointer_rtx, op)
12499 || reg_mentioned_p (arg_pointer_rtx, op)
12500 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12501 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12502 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12503 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12504 return FALSE;
12506 /* Constants are converted into offsets from labels. */
12507 if (!MEM_P (op))
12508 return FALSE;
12510 ind = XEXP (op, 0);
12512 if (reload_completed
12513 && (GET_CODE (ind) == LABEL_REF
12514 || (GET_CODE (ind) == CONST
12515 && GET_CODE (XEXP (ind, 0)) == PLUS
12516 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12517 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12518 return TRUE;
12520 /* Match: (mem (reg)). */
12521 if (REG_P (ind))
12522 return arm_address_register_rtx_p (ind, 0);
12524 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12525 if (GET_CODE (ind) == POST_INC
12526 || GET_CODE (ind) == PRE_DEC)
12527 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12529 return FALSE;
12532 /* Return true if X is a register that will be eliminated later on. */
12534 arm_eliminable_register (rtx x)
12536 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12537 || REGNO (x) == ARG_POINTER_REGNUM
12538 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12539 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12542 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12543 coprocessor registers. Otherwise return NO_REGS. */
12545 enum reg_class
12546 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12548 if (mode == HFmode)
12550 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12551 return GENERAL_REGS;
12552 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12553 return NO_REGS;
12554 return GENERAL_REGS;
12557 /* The neon move patterns handle all legitimate vector and struct
12558 addresses. */
12559 if (TARGET_NEON
12560 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12561 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12562 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12563 || VALID_NEON_STRUCT_MODE (mode)))
12564 return NO_REGS;
12566 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12567 return NO_REGS;
12569 return GENERAL_REGS;
12572 /* Values which must be returned in the most-significant end of the return
12573 register. */
12575 static bool
12576 arm_return_in_msb (const_tree valtype)
12578 return (TARGET_AAPCS_BASED
12579 && BYTES_BIG_ENDIAN
12580 && (AGGREGATE_TYPE_P (valtype)
12581 || TREE_CODE (valtype) == COMPLEX_TYPE
12582 || FIXED_POINT_TYPE_P (valtype)));
12585 /* Return TRUE if X references a SYMBOL_REF. */
12587 symbol_mentioned_p (rtx x)
12589 const char * fmt;
12590 int i;
12592 if (GET_CODE (x) == SYMBOL_REF)
12593 return 1;
12595 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12596 are constant offsets, not symbols. */
12597 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12598 return 0;
12600 fmt = GET_RTX_FORMAT (GET_CODE (x));
12602 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12604 if (fmt[i] == 'E')
12606 int j;
12608 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12609 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12610 return 1;
12612 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12613 return 1;
12616 return 0;
12619 /* Return TRUE if X references a LABEL_REF. */
12621 label_mentioned_p (rtx x)
12623 const char * fmt;
12624 int i;
12626 if (GET_CODE (x) == LABEL_REF)
12627 return 1;
12629 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12630 instruction, but they are constant offsets, not symbols. */
12631 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12632 return 0;
12634 fmt = GET_RTX_FORMAT (GET_CODE (x));
12635 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12637 if (fmt[i] == 'E')
12639 int j;
12641 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12642 if (label_mentioned_p (XVECEXP (x, i, j)))
12643 return 1;
12645 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12646 return 1;
12649 return 0;
12653 tls_mentioned_p (rtx x)
12655 switch (GET_CODE (x))
12657 case CONST:
12658 return tls_mentioned_p (XEXP (x, 0));
12660 case UNSPEC:
12661 if (XINT (x, 1) == UNSPEC_TLS)
12662 return 1;
12664 /* Fall through. */
12665 default:
12666 return 0;
12670 /* Must not copy any rtx that uses a pc-relative address.
12671 Also, disallow copying of load-exclusive instructions that
12672 may appear after splitting of compare-and-swap-style operations
12673 so as to prevent those loops from being transformed away from their
12674 canonical forms (see PR 69904). */
12676 static bool
12677 arm_cannot_copy_insn_p (rtx_insn *insn)
12679 /* The tls call insn cannot be copied, as it is paired with a data
12680 word. */
12681 if (recog_memoized (insn) == CODE_FOR_tlscall)
12682 return true;
12684 subrtx_iterator::array_type array;
12685 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12687 const_rtx x = *iter;
12688 if (GET_CODE (x) == UNSPEC
12689 && (XINT (x, 1) == UNSPEC_PIC_BASE
12690 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12691 return true;
12694 rtx set = single_set (insn);
12695 if (set)
12697 rtx src = SET_SRC (set);
12698 if (GET_CODE (src) == ZERO_EXTEND)
12699 src = XEXP (src, 0);
12701 /* Catch the load-exclusive and load-acquire operations. */
12702 if (GET_CODE (src) == UNSPEC_VOLATILE
12703 && (XINT (src, 1) == VUNSPEC_LL
12704 || XINT (src, 1) == VUNSPEC_LAX))
12705 return true;
12707 return false;
12710 enum rtx_code
12711 minmax_code (rtx x)
12713 enum rtx_code code = GET_CODE (x);
12715 switch (code)
12717 case SMAX:
12718 return GE;
12719 case SMIN:
12720 return LE;
12721 case UMIN:
12722 return LEU;
12723 case UMAX:
12724 return GEU;
12725 default:
12726 gcc_unreachable ();
12730 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12732 bool
12733 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12734 int *mask, bool *signed_sat)
12736 /* The high bound must be a power of two minus one. */
12737 int log = exact_log2 (INTVAL (hi_bound) + 1);
12738 if (log == -1)
12739 return false;
12741 /* The low bound is either zero (for usat) or one less than the
12742 negation of the high bound (for ssat). */
12743 if (INTVAL (lo_bound) == 0)
12745 if (mask)
12746 *mask = log;
12747 if (signed_sat)
12748 *signed_sat = false;
12750 return true;
12753 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12755 if (mask)
12756 *mask = log + 1;
12757 if (signed_sat)
12758 *signed_sat = true;
12760 return true;
12763 return false;
12766 /* Return 1 if memory locations are adjacent. */
12768 adjacent_mem_locations (rtx a, rtx b)
12770 /* We don't guarantee to preserve the order of these memory refs. */
12771 if (volatile_refs_p (a) || volatile_refs_p (b))
12772 return 0;
12774 if ((REG_P (XEXP (a, 0))
12775 || (GET_CODE (XEXP (a, 0)) == PLUS
12776 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12777 && (REG_P (XEXP (b, 0))
12778 || (GET_CODE (XEXP (b, 0)) == PLUS
12779 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12781 HOST_WIDE_INT val0 = 0, val1 = 0;
12782 rtx reg0, reg1;
12783 int val_diff;
12785 if (GET_CODE (XEXP (a, 0)) == PLUS)
12787 reg0 = XEXP (XEXP (a, 0), 0);
12788 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12790 else
12791 reg0 = XEXP (a, 0);
12793 if (GET_CODE (XEXP (b, 0)) == PLUS)
12795 reg1 = XEXP (XEXP (b, 0), 0);
12796 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12798 else
12799 reg1 = XEXP (b, 0);
12801 /* Don't accept any offset that will require multiple
12802 instructions to handle, since this would cause the
12803 arith_adjacentmem pattern to output an overlong sequence. */
12804 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12805 return 0;
12807 /* Don't allow an eliminable register: register elimination can make
12808 the offset too large. */
12809 if (arm_eliminable_register (reg0))
12810 return 0;
12812 val_diff = val1 - val0;
12814 if (arm_ld_sched)
12816 /* If the target has load delay slots, then there's no benefit
12817 to using an ldm instruction unless the offset is zero and
12818 we are optimizing for size. */
12819 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12820 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12821 && (val_diff == 4 || val_diff == -4));
12824 return ((REGNO (reg0) == REGNO (reg1))
12825 && (val_diff == 4 || val_diff == -4));
12828 return 0;
12831 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12832 for load operations, false for store operations. CONSECUTIVE is true
12833 if the register numbers in the operation must be consecutive in the register
12834 bank. RETURN_PC is true if value is to be loaded in PC.
12835 The pattern we are trying to match for load is:
12836 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12837 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12840 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12842 where
12843 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12844 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12845 3. If consecutive is TRUE, then for kth register being loaded,
12846 REGNO (R_dk) = REGNO (R_d0) + k.
12847 The pattern for store is similar. */
12848 bool
12849 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12850 bool consecutive, bool return_pc)
12852 HOST_WIDE_INT count = XVECLEN (op, 0);
12853 rtx reg, mem, addr;
12854 unsigned regno;
12855 unsigned first_regno;
12856 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12857 rtx elt;
12858 bool addr_reg_in_reglist = false;
12859 bool update = false;
12860 int reg_increment;
12861 int offset_adj;
12862 int regs_per_val;
12864 /* If not in SImode, then registers must be consecutive
12865 (e.g., VLDM instructions for DFmode). */
12866 gcc_assert ((mode == SImode) || consecutive);
12867 /* Setting return_pc for stores is illegal. */
12868 gcc_assert (!return_pc || load);
12870 /* Set up the increments and the regs per val based on the mode. */
12871 reg_increment = GET_MODE_SIZE (mode);
12872 regs_per_val = reg_increment / 4;
12873 offset_adj = return_pc ? 1 : 0;
12875 if (count <= 1
12876 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12877 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12878 return false;
12880 /* Check if this is a write-back. */
12881 elt = XVECEXP (op, 0, offset_adj);
12882 if (GET_CODE (SET_SRC (elt)) == PLUS)
12884 i++;
12885 base = 1;
12886 update = true;
12888 /* The offset adjustment must be the number of registers being
12889 popped times the size of a single register. */
12890 if (!REG_P (SET_DEST (elt))
12891 || !REG_P (XEXP (SET_SRC (elt), 0))
12892 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12893 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12894 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12895 ((count - 1 - offset_adj) * reg_increment))
12896 return false;
12899 i = i + offset_adj;
12900 base = base + offset_adj;
12901 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12902 success depends on the type: VLDM can do just one reg,
12903 LDM must do at least two. */
12904 if ((count <= i) && (mode == SImode))
12905 return false;
12907 elt = XVECEXP (op, 0, i - 1);
12908 if (GET_CODE (elt) != SET)
12909 return false;
12911 if (load)
12913 reg = SET_DEST (elt);
12914 mem = SET_SRC (elt);
12916 else
12918 reg = SET_SRC (elt);
12919 mem = SET_DEST (elt);
12922 if (!REG_P (reg) || !MEM_P (mem))
12923 return false;
12925 regno = REGNO (reg);
12926 first_regno = regno;
12927 addr = XEXP (mem, 0);
12928 if (GET_CODE (addr) == PLUS)
12930 if (!CONST_INT_P (XEXP (addr, 1)))
12931 return false;
12933 offset = INTVAL (XEXP (addr, 1));
12934 addr = XEXP (addr, 0);
12937 if (!REG_P (addr))
12938 return false;
12940 /* Don't allow SP to be loaded unless it is also the base register. It
12941 guarantees that SP is reset correctly when an LDM instruction
12942 is interrupted. Otherwise, we might end up with a corrupt stack. */
12943 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12944 return false;
12946 for (; i < count; i++)
12948 elt = XVECEXP (op, 0, i);
12949 if (GET_CODE (elt) != SET)
12950 return false;
12952 if (load)
12954 reg = SET_DEST (elt);
12955 mem = SET_SRC (elt);
12957 else
12959 reg = SET_SRC (elt);
12960 mem = SET_DEST (elt);
12963 if (!REG_P (reg)
12964 || GET_MODE (reg) != mode
12965 || REGNO (reg) <= regno
12966 || (consecutive
12967 && (REGNO (reg) !=
12968 (unsigned int) (first_regno + regs_per_val * (i - base))))
12969 /* Don't allow SP to be loaded unless it is also the base register. It
12970 guarantees that SP is reset correctly when an LDM instruction
12971 is interrupted. Otherwise, we might end up with a corrupt stack. */
12972 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12973 || !MEM_P (mem)
12974 || GET_MODE (mem) != mode
12975 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12976 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12977 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12978 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12979 offset + (i - base) * reg_increment))
12980 && (!REG_P (XEXP (mem, 0))
12981 || offset + (i - base) * reg_increment != 0)))
12982 return false;
12984 regno = REGNO (reg);
12985 if (regno == REGNO (addr))
12986 addr_reg_in_reglist = true;
12989 if (load)
12991 if (update && addr_reg_in_reglist)
12992 return false;
12994 /* For Thumb-1, address register is always modified - either by write-back
12995 or by explicit load. If the pattern does not describe an update,
12996 then the address register must be in the list of loaded registers. */
12997 if (TARGET_THUMB1)
12998 return update || addr_reg_in_reglist;
13001 return true;
13004 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13005 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13006 instruction. ADD_OFFSET is nonzero if the base address register needs
13007 to be modified with an add instruction before we can use it. */
13009 static bool
13010 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13011 int nops, HOST_WIDE_INT add_offset)
13013 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13014 if the offset isn't small enough. The reason 2 ldrs are faster
13015 is because these ARMs are able to do more than one cache access
13016 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13017 whilst the ARM8 has a double bandwidth cache. This means that
13018 these cores can do both an instruction fetch and a data fetch in
13019 a single cycle, so the trick of calculating the address into a
13020 scratch register (one of the result regs) and then doing a load
13021 multiple actually becomes slower (and no smaller in code size).
13022 That is the transformation
13024 ldr rd1, [rbase + offset]
13025 ldr rd2, [rbase + offset + 4]
13029 add rd1, rbase, offset
13030 ldmia rd1, {rd1, rd2}
13032 produces worse code -- '3 cycles + any stalls on rd2' instead of
13033 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13034 access per cycle, the first sequence could never complete in less
13035 than 6 cycles, whereas the ldm sequence would only take 5 and
13036 would make better use of sequential accesses if not hitting the
13037 cache.
13039 We cheat here and test 'arm_ld_sched' which we currently know to
13040 only be true for the ARM8, ARM9 and StrongARM. If this ever
13041 changes, then the test below needs to be reworked. */
13042 if (nops == 2 && arm_ld_sched && add_offset != 0)
13043 return false;
13045 /* XScale has load-store double instructions, but they have stricter
13046 alignment requirements than load-store multiple, so we cannot
13047 use them.
13049 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13050 the pipeline until completion.
13052 NREGS CYCLES
13058 An ldr instruction takes 1-3 cycles, but does not block the
13059 pipeline.
13061 NREGS CYCLES
13062 1 1-3
13063 2 2-6
13064 3 3-9
13065 4 4-12
13067 Best case ldr will always win. However, the more ldr instructions
13068 we issue, the less likely we are to be able to schedule them well.
13069 Using ldr instructions also increases code size.
13071 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13072 for counts of 3 or 4 regs. */
13073 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13074 return false;
13075 return true;
13078 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13079 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13080 an array ORDER which describes the sequence to use when accessing the
13081 offsets that produces an ascending order. In this sequence, each
13082 offset must be larger by exactly 4 than the previous one. ORDER[0]
13083 must have been filled in with the lowest offset by the caller.
13084 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13085 we use to verify that ORDER produces an ascending order of registers.
13086 Return true if it was possible to construct such an order, false if
13087 not. */
13089 static bool
13090 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13091 int *unsorted_regs)
13093 int i;
13094 for (i = 1; i < nops; i++)
13096 int j;
13098 order[i] = order[i - 1];
13099 for (j = 0; j < nops; j++)
13100 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13102 /* We must find exactly one offset that is higher than the
13103 previous one by 4. */
13104 if (order[i] != order[i - 1])
13105 return false;
13106 order[i] = j;
13108 if (order[i] == order[i - 1])
13109 return false;
13110 /* The register numbers must be ascending. */
13111 if (unsorted_regs != NULL
13112 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13113 return false;
13115 return true;
13118 /* Used to determine in a peephole whether a sequence of load
13119 instructions can be changed into a load-multiple instruction.
13120 NOPS is the number of separate load instructions we are examining. The
13121 first NOPS entries in OPERANDS are the destination registers, the
13122 next NOPS entries are memory operands. If this function is
13123 successful, *BASE is set to the common base register of the memory
13124 accesses; *LOAD_OFFSET is set to the first memory location's offset
13125 from that base register.
13126 REGS is an array filled in with the destination register numbers.
13127 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13128 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13129 the sequence of registers in REGS matches the loads from ascending memory
13130 locations, and the function verifies that the register numbers are
13131 themselves ascending. If CHECK_REGS is false, the register numbers
13132 are stored in the order they are found in the operands. */
13133 static int
13134 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13135 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13137 int unsorted_regs[MAX_LDM_STM_OPS];
13138 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13139 int order[MAX_LDM_STM_OPS];
13140 rtx base_reg_rtx = NULL;
13141 int base_reg = -1;
13142 int i, ldm_case;
13144 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13145 easily extended if required. */
13146 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13148 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13150 /* Loop over the operands and check that the memory references are
13151 suitable (i.e. immediate offsets from the same base register). At
13152 the same time, extract the target register, and the memory
13153 offsets. */
13154 for (i = 0; i < nops; i++)
13156 rtx reg;
13157 rtx offset;
13159 /* Convert a subreg of a mem into the mem itself. */
13160 if (GET_CODE (operands[nops + i]) == SUBREG)
13161 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13163 gcc_assert (MEM_P (operands[nops + i]));
13165 /* Don't reorder volatile memory references; it doesn't seem worth
13166 looking for the case where the order is ok anyway. */
13167 if (MEM_VOLATILE_P (operands[nops + i]))
13168 return 0;
13170 offset = const0_rtx;
13172 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13173 || (GET_CODE (reg) == SUBREG
13174 && REG_P (reg = SUBREG_REG (reg))))
13175 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13176 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13177 || (GET_CODE (reg) == SUBREG
13178 && REG_P (reg = SUBREG_REG (reg))))
13179 && (CONST_INT_P (offset
13180 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13182 if (i == 0)
13184 base_reg = REGNO (reg);
13185 base_reg_rtx = reg;
13186 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13187 return 0;
13189 else if (base_reg != (int) REGNO (reg))
13190 /* Not addressed from the same base register. */
13191 return 0;
13193 unsorted_regs[i] = (REG_P (operands[i])
13194 ? REGNO (operands[i])
13195 : REGNO (SUBREG_REG (operands[i])));
13197 /* If it isn't an integer register, or if it overwrites the
13198 base register but isn't the last insn in the list, then
13199 we can't do this. */
13200 if (unsorted_regs[i] < 0
13201 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13202 || unsorted_regs[i] > 14
13203 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13204 return 0;
13206 /* Don't allow SP to be loaded unless it is also the base
13207 register. It guarantees that SP is reset correctly when
13208 an LDM instruction is interrupted. Otherwise, we might
13209 end up with a corrupt stack. */
13210 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13211 return 0;
13213 unsorted_offsets[i] = INTVAL (offset);
13214 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13215 order[0] = i;
13217 else
13218 /* Not a suitable memory address. */
13219 return 0;
13222 /* All the useful information has now been extracted from the
13223 operands into unsorted_regs and unsorted_offsets; additionally,
13224 order[0] has been set to the lowest offset in the list. Sort
13225 the offsets into order, verifying that they are adjacent, and
13226 check that the register numbers are ascending. */
13227 if (!compute_offset_order (nops, unsorted_offsets, order,
13228 check_regs ? unsorted_regs : NULL))
13229 return 0;
13231 if (saved_order)
13232 memcpy (saved_order, order, sizeof order);
13234 if (base)
13236 *base = base_reg;
13238 for (i = 0; i < nops; i++)
13239 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13241 *load_offset = unsorted_offsets[order[0]];
13244 if (TARGET_THUMB1
13245 && !peep2_reg_dead_p (nops, base_reg_rtx))
13246 return 0;
13248 if (unsorted_offsets[order[0]] == 0)
13249 ldm_case = 1; /* ldmia */
13250 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13251 ldm_case = 2; /* ldmib */
13252 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13253 ldm_case = 3; /* ldmda */
13254 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13255 ldm_case = 4; /* ldmdb */
13256 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13257 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13258 ldm_case = 5;
13259 else
13260 return 0;
13262 if (!multiple_operation_profitable_p (false, nops,
13263 ldm_case == 5
13264 ? unsorted_offsets[order[0]] : 0))
13265 return 0;
13267 return ldm_case;
13270 /* Used to determine in a peephole whether a sequence of store instructions can
13271 be changed into a store-multiple instruction.
13272 NOPS is the number of separate store instructions we are examining.
13273 NOPS_TOTAL is the total number of instructions recognized by the peephole
13274 pattern.
13275 The first NOPS entries in OPERANDS are the source registers, the next
13276 NOPS entries are memory operands. If this function is successful, *BASE is
13277 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13278 to the first memory location's offset from that base register. REGS is an
13279 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13280 likewise filled with the corresponding rtx's.
13281 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13282 numbers to an ascending order of stores.
13283 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13284 from ascending memory locations, and the function verifies that the register
13285 numbers are themselves ascending. If CHECK_REGS is false, the register
13286 numbers are stored in the order they are found in the operands. */
13287 static int
13288 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13289 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13290 HOST_WIDE_INT *load_offset, bool check_regs)
13292 int unsorted_regs[MAX_LDM_STM_OPS];
13293 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13294 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13295 int order[MAX_LDM_STM_OPS];
13296 int base_reg = -1;
13297 rtx base_reg_rtx = NULL;
13298 int i, stm_case;
13300 /* Write back of base register is currently only supported for Thumb 1. */
13301 int base_writeback = TARGET_THUMB1;
13303 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13304 easily extended if required. */
13305 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13307 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13309 /* Loop over the operands and check that the memory references are
13310 suitable (i.e. immediate offsets from the same base register). At
13311 the same time, extract the target register, and the memory
13312 offsets. */
13313 for (i = 0; i < nops; i++)
13315 rtx reg;
13316 rtx offset;
13318 /* Convert a subreg of a mem into the mem itself. */
13319 if (GET_CODE (operands[nops + i]) == SUBREG)
13320 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13322 gcc_assert (MEM_P (operands[nops + i]));
13324 /* Don't reorder volatile memory references; it doesn't seem worth
13325 looking for the case where the order is ok anyway. */
13326 if (MEM_VOLATILE_P (operands[nops + i]))
13327 return 0;
13329 offset = const0_rtx;
13331 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13332 || (GET_CODE (reg) == SUBREG
13333 && REG_P (reg = SUBREG_REG (reg))))
13334 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13335 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13336 || (GET_CODE (reg) == SUBREG
13337 && REG_P (reg = SUBREG_REG (reg))))
13338 && (CONST_INT_P (offset
13339 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13341 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13342 ? operands[i] : SUBREG_REG (operands[i]));
13343 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13345 if (i == 0)
13347 base_reg = REGNO (reg);
13348 base_reg_rtx = reg;
13349 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13350 return 0;
13352 else if (base_reg != (int) REGNO (reg))
13353 /* Not addressed from the same base register. */
13354 return 0;
13356 /* If it isn't an integer register, then we can't do this. */
13357 if (unsorted_regs[i] < 0
13358 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13359 /* The effects are unpredictable if the base register is
13360 both updated and stored. */
13361 || (base_writeback && unsorted_regs[i] == base_reg)
13362 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13363 || unsorted_regs[i] > 14)
13364 return 0;
13366 unsorted_offsets[i] = INTVAL (offset);
13367 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13368 order[0] = i;
13370 else
13371 /* Not a suitable memory address. */
13372 return 0;
13375 /* All the useful information has now been extracted from the
13376 operands into unsorted_regs and unsorted_offsets; additionally,
13377 order[0] has been set to the lowest offset in the list. Sort
13378 the offsets into order, verifying that they are adjacent, and
13379 check that the register numbers are ascending. */
13380 if (!compute_offset_order (nops, unsorted_offsets, order,
13381 check_regs ? unsorted_regs : NULL))
13382 return 0;
13384 if (saved_order)
13385 memcpy (saved_order, order, sizeof order);
13387 if (base)
13389 *base = base_reg;
13391 for (i = 0; i < nops; i++)
13393 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13394 if (reg_rtxs)
13395 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13398 *load_offset = unsorted_offsets[order[0]];
13401 if (TARGET_THUMB1
13402 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13403 return 0;
13405 if (unsorted_offsets[order[0]] == 0)
13406 stm_case = 1; /* stmia */
13407 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13408 stm_case = 2; /* stmib */
13409 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13410 stm_case = 3; /* stmda */
13411 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13412 stm_case = 4; /* stmdb */
13413 else
13414 return 0;
13416 if (!multiple_operation_profitable_p (false, nops, 0))
13417 return 0;
13419 return stm_case;
13422 /* Routines for use in generating RTL. */
13424 /* Generate a load-multiple instruction. COUNT is the number of loads in
13425 the instruction; REGS and MEMS are arrays containing the operands.
13426 BASEREG is the base register to be used in addressing the memory operands.
13427 WBACK_OFFSET is nonzero if the instruction should update the base
13428 register. */
13430 static rtx
13431 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13432 HOST_WIDE_INT wback_offset)
13434 int i = 0, j;
13435 rtx result;
13437 if (!multiple_operation_profitable_p (false, count, 0))
13439 rtx seq;
13441 start_sequence ();
13443 for (i = 0; i < count; i++)
13444 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13446 if (wback_offset != 0)
13447 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13449 seq = get_insns ();
13450 end_sequence ();
13452 return seq;
13455 result = gen_rtx_PARALLEL (VOIDmode,
13456 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13457 if (wback_offset != 0)
13459 XVECEXP (result, 0, 0)
13460 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13461 i = 1;
13462 count++;
13465 for (j = 0; i < count; i++, j++)
13466 XVECEXP (result, 0, i)
13467 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13469 return result;
13472 /* Generate a store-multiple instruction. COUNT is the number of stores in
13473 the instruction; REGS and MEMS are arrays containing the operands.
13474 BASEREG is the base register to be used in addressing the memory operands.
13475 WBACK_OFFSET is nonzero if the instruction should update the base
13476 register. */
13478 static rtx
13479 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13480 HOST_WIDE_INT wback_offset)
13482 int i = 0, j;
13483 rtx result;
13485 if (GET_CODE (basereg) == PLUS)
13486 basereg = XEXP (basereg, 0);
13488 if (!multiple_operation_profitable_p (false, count, 0))
13490 rtx seq;
13492 start_sequence ();
13494 for (i = 0; i < count; i++)
13495 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13497 if (wback_offset != 0)
13498 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13500 seq = get_insns ();
13501 end_sequence ();
13503 return seq;
13506 result = gen_rtx_PARALLEL (VOIDmode,
13507 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13508 if (wback_offset != 0)
13510 XVECEXP (result, 0, 0)
13511 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13512 i = 1;
13513 count++;
13516 for (j = 0; i < count; i++, j++)
13517 XVECEXP (result, 0, i)
13518 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13520 return result;
13523 /* Generate either a load-multiple or a store-multiple instruction. This
13524 function can be used in situations where we can start with a single MEM
13525 rtx and adjust its address upwards.
13526 COUNT is the number of operations in the instruction, not counting a
13527 possible update of the base register. REGS is an array containing the
13528 register operands.
13529 BASEREG is the base register to be used in addressing the memory operands,
13530 which are constructed from BASEMEM.
13531 WRITE_BACK specifies whether the generated instruction should include an
13532 update of the base register.
13533 OFFSETP is used to pass an offset to and from this function; this offset
13534 is not used when constructing the address (instead BASEMEM should have an
13535 appropriate offset in its address), it is used only for setting
13536 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13538 static rtx
13539 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13540 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13542 rtx mems[MAX_LDM_STM_OPS];
13543 HOST_WIDE_INT offset = *offsetp;
13544 int i;
13546 gcc_assert (count <= MAX_LDM_STM_OPS);
13548 if (GET_CODE (basereg) == PLUS)
13549 basereg = XEXP (basereg, 0);
13551 for (i = 0; i < count; i++)
13553 rtx addr = plus_constant (Pmode, basereg, i * 4);
13554 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13555 offset += 4;
13558 if (write_back)
13559 *offsetp = offset;
13561 if (is_load)
13562 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13563 write_back ? 4 * count : 0);
13564 else
13565 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13566 write_back ? 4 * count : 0);
13570 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13571 rtx basemem, HOST_WIDE_INT *offsetp)
13573 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13574 offsetp);
13578 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13579 rtx basemem, HOST_WIDE_INT *offsetp)
13581 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13582 offsetp);
13585 /* Called from a peephole2 expander to turn a sequence of loads into an
13586 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13587 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13588 is true if we can reorder the registers because they are used commutatively
13589 subsequently.
13590 Returns true iff we could generate a new instruction. */
13592 bool
13593 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13595 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13596 rtx mems[MAX_LDM_STM_OPS];
13597 int i, j, base_reg;
13598 rtx base_reg_rtx;
13599 HOST_WIDE_INT offset;
13600 int write_back = FALSE;
13601 int ldm_case;
13602 rtx addr;
13604 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13605 &base_reg, &offset, !sort_regs);
13607 if (ldm_case == 0)
13608 return false;
13610 if (sort_regs)
13611 for (i = 0; i < nops - 1; i++)
13612 for (j = i + 1; j < nops; j++)
13613 if (regs[i] > regs[j])
13615 int t = regs[i];
13616 regs[i] = regs[j];
13617 regs[j] = t;
13619 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13621 if (TARGET_THUMB1)
13623 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13624 gcc_assert (ldm_case == 1 || ldm_case == 5);
13625 write_back = TRUE;
13628 if (ldm_case == 5)
13630 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13631 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13632 offset = 0;
13633 if (!TARGET_THUMB1)
13634 base_reg_rtx = newbase;
13637 for (i = 0; i < nops; i++)
13639 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13640 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13641 SImode, addr, 0);
13643 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13644 write_back ? offset + i * 4 : 0));
13645 return true;
13648 /* Called from a peephole2 expander to turn a sequence of stores into an
13649 STM instruction. OPERANDS are the operands found by the peephole matcher;
13650 NOPS indicates how many separate stores we are trying to combine.
13651 Returns true iff we could generate a new instruction. */
13653 bool
13654 gen_stm_seq (rtx *operands, int nops)
13656 int i;
13657 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13658 rtx mems[MAX_LDM_STM_OPS];
13659 int base_reg;
13660 rtx base_reg_rtx;
13661 HOST_WIDE_INT offset;
13662 int write_back = FALSE;
13663 int stm_case;
13664 rtx addr;
13665 bool base_reg_dies;
13667 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13668 mem_order, &base_reg, &offset, true);
13670 if (stm_case == 0)
13671 return false;
13673 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13675 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13676 if (TARGET_THUMB1)
13678 gcc_assert (base_reg_dies);
13679 write_back = TRUE;
13682 if (stm_case == 5)
13684 gcc_assert (base_reg_dies);
13685 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13686 offset = 0;
13689 addr = plus_constant (Pmode, base_reg_rtx, offset);
13691 for (i = 0; i < nops; i++)
13693 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13694 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13695 SImode, addr, 0);
13697 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13698 write_back ? offset + i * 4 : 0));
13699 return true;
13702 /* Called from a peephole2 expander to turn a sequence of stores that are
13703 preceded by constant loads into an STM instruction. OPERANDS are the
13704 operands found by the peephole matcher; NOPS indicates how many
13705 separate stores we are trying to combine; there are 2 * NOPS
13706 instructions in the peephole.
13707 Returns true iff we could generate a new instruction. */
13709 bool
13710 gen_const_stm_seq (rtx *operands, int nops)
13712 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13713 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13714 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13715 rtx mems[MAX_LDM_STM_OPS];
13716 int base_reg;
13717 rtx base_reg_rtx;
13718 HOST_WIDE_INT offset;
13719 int write_back = FALSE;
13720 int stm_case;
13721 rtx addr;
13722 bool base_reg_dies;
13723 int i, j;
13724 HARD_REG_SET allocated;
13726 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13727 mem_order, &base_reg, &offset, false);
13729 if (stm_case == 0)
13730 return false;
13732 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13734 /* If the same register is used more than once, try to find a free
13735 register. */
13736 CLEAR_HARD_REG_SET (allocated);
13737 for (i = 0; i < nops; i++)
13739 for (j = i + 1; j < nops; j++)
13740 if (regs[i] == regs[j])
13742 rtx t = peep2_find_free_register (0, nops * 2,
13743 TARGET_THUMB1 ? "l" : "r",
13744 SImode, &allocated);
13745 if (t == NULL_RTX)
13746 return false;
13747 reg_rtxs[i] = t;
13748 regs[i] = REGNO (t);
13752 /* Compute an ordering that maps the register numbers to an ascending
13753 sequence. */
13754 reg_order[0] = 0;
13755 for (i = 0; i < nops; i++)
13756 if (regs[i] < regs[reg_order[0]])
13757 reg_order[0] = i;
13759 for (i = 1; i < nops; i++)
13761 int this_order = reg_order[i - 1];
13762 for (j = 0; j < nops; j++)
13763 if (regs[j] > regs[reg_order[i - 1]]
13764 && (this_order == reg_order[i - 1]
13765 || regs[j] < regs[this_order]))
13766 this_order = j;
13767 reg_order[i] = this_order;
13770 /* Ensure that registers that must be live after the instruction end
13771 up with the correct value. */
13772 for (i = 0; i < nops; i++)
13774 int this_order = reg_order[i];
13775 if ((this_order != mem_order[i]
13776 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13777 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13778 return false;
13781 /* Load the constants. */
13782 for (i = 0; i < nops; i++)
13784 rtx op = operands[2 * nops + mem_order[i]];
13785 sorted_regs[i] = regs[reg_order[i]];
13786 emit_move_insn (reg_rtxs[reg_order[i]], op);
13789 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13791 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13792 if (TARGET_THUMB1)
13794 gcc_assert (base_reg_dies);
13795 write_back = TRUE;
13798 if (stm_case == 5)
13800 gcc_assert (base_reg_dies);
13801 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13802 offset = 0;
13805 addr = plus_constant (Pmode, base_reg_rtx, offset);
13807 for (i = 0; i < nops; i++)
13809 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13810 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13811 SImode, addr, 0);
13813 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13814 write_back ? offset + i * 4 : 0));
13815 return true;
13818 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13819 unaligned copies on processors which support unaligned semantics for those
13820 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13821 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13822 An interleave factor of 1 (the minimum) will perform no interleaving.
13823 Load/store multiple are used for aligned addresses where possible. */
13825 static void
13826 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13827 HOST_WIDE_INT length,
13828 unsigned int interleave_factor)
13830 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13831 int *regnos = XALLOCAVEC (int, interleave_factor);
13832 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13833 HOST_WIDE_INT i, j;
13834 HOST_WIDE_INT remaining = length, words;
13835 rtx halfword_tmp = NULL, byte_tmp = NULL;
13836 rtx dst, src;
13837 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13838 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13839 HOST_WIDE_INT srcoffset, dstoffset;
13840 HOST_WIDE_INT src_autoinc, dst_autoinc;
13841 rtx mem, addr;
13843 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13845 /* Use hard registers if we have aligned source or destination so we can use
13846 load/store multiple with contiguous registers. */
13847 if (dst_aligned || src_aligned)
13848 for (i = 0; i < interleave_factor; i++)
13849 regs[i] = gen_rtx_REG (SImode, i);
13850 else
13851 for (i = 0; i < interleave_factor; i++)
13852 regs[i] = gen_reg_rtx (SImode);
13854 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13855 src = copy_addr_to_reg (XEXP (srcbase, 0));
13857 srcoffset = dstoffset = 0;
13859 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13860 For copying the last bytes we want to subtract this offset again. */
13861 src_autoinc = dst_autoinc = 0;
13863 for (i = 0; i < interleave_factor; i++)
13864 regnos[i] = i;
13866 /* Copy BLOCK_SIZE_BYTES chunks. */
13868 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13870 /* Load words. */
13871 if (src_aligned && interleave_factor > 1)
13873 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13874 TRUE, srcbase, &srcoffset));
13875 src_autoinc += UNITS_PER_WORD * interleave_factor;
13877 else
13879 for (j = 0; j < interleave_factor; j++)
13881 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13882 - src_autoinc));
13883 mem = adjust_automodify_address (srcbase, SImode, addr,
13884 srcoffset + j * UNITS_PER_WORD);
13885 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13887 srcoffset += block_size_bytes;
13890 /* Store words. */
13891 if (dst_aligned && interleave_factor > 1)
13893 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13894 TRUE, dstbase, &dstoffset));
13895 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13897 else
13899 for (j = 0; j < interleave_factor; j++)
13901 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13902 - dst_autoinc));
13903 mem = adjust_automodify_address (dstbase, SImode, addr,
13904 dstoffset + j * UNITS_PER_WORD);
13905 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13907 dstoffset += block_size_bytes;
13910 remaining -= block_size_bytes;
13913 /* Copy any whole words left (note these aren't interleaved with any
13914 subsequent halfword/byte load/stores in the interests of simplicity). */
13916 words = remaining / UNITS_PER_WORD;
13918 gcc_assert (words < interleave_factor);
13920 if (src_aligned && words > 1)
13922 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13923 &srcoffset));
13924 src_autoinc += UNITS_PER_WORD * words;
13926 else
13928 for (j = 0; j < words; j++)
13930 addr = plus_constant (Pmode, src,
13931 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13932 mem = adjust_automodify_address (srcbase, SImode, addr,
13933 srcoffset + j * UNITS_PER_WORD);
13934 if (src_aligned)
13935 emit_move_insn (regs[j], mem);
13936 else
13937 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13939 srcoffset += words * UNITS_PER_WORD;
13942 if (dst_aligned && words > 1)
13944 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13945 &dstoffset));
13946 dst_autoinc += words * UNITS_PER_WORD;
13948 else
13950 for (j = 0; j < words; j++)
13952 addr = plus_constant (Pmode, dst,
13953 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13954 mem = adjust_automodify_address (dstbase, SImode, addr,
13955 dstoffset + j * UNITS_PER_WORD);
13956 if (dst_aligned)
13957 emit_move_insn (mem, regs[j]);
13958 else
13959 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13961 dstoffset += words * UNITS_PER_WORD;
13964 remaining -= words * UNITS_PER_WORD;
13966 gcc_assert (remaining < 4);
13968 /* Copy a halfword if necessary. */
13970 if (remaining >= 2)
13972 halfword_tmp = gen_reg_rtx (SImode);
13974 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13975 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13976 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13978 /* Either write out immediately, or delay until we've loaded the last
13979 byte, depending on interleave factor. */
13980 if (interleave_factor == 1)
13982 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13983 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13984 emit_insn (gen_unaligned_storehi (mem,
13985 gen_lowpart (HImode, halfword_tmp)));
13986 halfword_tmp = NULL;
13987 dstoffset += 2;
13990 remaining -= 2;
13991 srcoffset += 2;
13994 gcc_assert (remaining < 2);
13996 /* Copy last byte. */
13998 if ((remaining & 1) != 0)
14000 byte_tmp = gen_reg_rtx (SImode);
14002 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14003 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14004 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14006 if (interleave_factor == 1)
14008 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14009 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14010 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14011 byte_tmp = NULL;
14012 dstoffset++;
14015 remaining--;
14016 srcoffset++;
14019 /* Store last halfword if we haven't done so already. */
14021 if (halfword_tmp)
14023 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14024 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14025 emit_insn (gen_unaligned_storehi (mem,
14026 gen_lowpart (HImode, halfword_tmp)));
14027 dstoffset += 2;
14030 /* Likewise for last byte. */
14032 if (byte_tmp)
14034 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14035 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14036 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14037 dstoffset++;
14040 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14043 /* From mips_adjust_block_mem:
14045 Helper function for doing a loop-based block operation on memory
14046 reference MEM. Each iteration of the loop will operate on LENGTH
14047 bytes of MEM.
14049 Create a new base register for use within the loop and point it to
14050 the start of MEM. Create a new memory reference that uses this
14051 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14053 static void
14054 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14055 rtx *loop_mem)
14057 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14059 /* Although the new mem does not refer to a known location,
14060 it does keep up to LENGTH bytes of alignment. */
14061 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14062 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14065 /* From mips_block_move_loop:
14067 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14068 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14069 the memory regions do not overlap. */
14071 static void
14072 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14073 unsigned int interleave_factor,
14074 HOST_WIDE_INT bytes_per_iter)
14076 rtx src_reg, dest_reg, final_src, test;
14077 HOST_WIDE_INT leftover;
14079 leftover = length % bytes_per_iter;
14080 length -= leftover;
14082 /* Create registers and memory references for use within the loop. */
14083 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14084 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14086 /* Calculate the value that SRC_REG should have after the last iteration of
14087 the loop. */
14088 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14089 0, 0, OPTAB_WIDEN);
14091 /* Emit the start of the loop. */
14092 rtx_code_label *label = gen_label_rtx ();
14093 emit_label (label);
14095 /* Emit the loop body. */
14096 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14097 interleave_factor);
14099 /* Move on to the next block. */
14100 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14101 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14103 /* Emit the loop condition. */
14104 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14105 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14107 /* Mop up any left-over bytes. */
14108 if (leftover)
14109 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14112 /* Emit a block move when either the source or destination is unaligned (not
14113 aligned to a four-byte boundary). This may need further tuning depending on
14114 core type, optimize_size setting, etc. */
14116 static int
14117 arm_movmemqi_unaligned (rtx *operands)
14119 HOST_WIDE_INT length = INTVAL (operands[2]);
14121 if (optimize_size)
14123 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14124 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14125 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14126 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14127 or dst_aligned though: allow more interleaving in those cases since the
14128 resulting code can be smaller. */
14129 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14130 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14132 if (length > 12)
14133 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14134 interleave_factor, bytes_per_iter);
14135 else
14136 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14137 interleave_factor);
14139 else
14141 /* Note that the loop created by arm_block_move_unaligned_loop may be
14142 subject to loop unrolling, which makes tuning this condition a little
14143 redundant. */
14144 if (length > 32)
14145 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14146 else
14147 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14150 return 1;
14154 arm_gen_movmemqi (rtx *operands)
14156 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14157 HOST_WIDE_INT srcoffset, dstoffset;
14158 rtx src, dst, srcbase, dstbase;
14159 rtx part_bytes_reg = NULL;
14160 rtx mem;
14162 if (!CONST_INT_P (operands[2])
14163 || !CONST_INT_P (operands[3])
14164 || INTVAL (operands[2]) > 64)
14165 return 0;
14167 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14168 return arm_movmemqi_unaligned (operands);
14170 if (INTVAL (operands[3]) & 3)
14171 return 0;
14173 dstbase = operands[0];
14174 srcbase = operands[1];
14176 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14177 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14179 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14180 out_words_to_go = INTVAL (operands[2]) / 4;
14181 last_bytes = INTVAL (operands[2]) & 3;
14182 dstoffset = srcoffset = 0;
14184 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14185 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14187 while (in_words_to_go >= 2)
14189 if (in_words_to_go > 4)
14190 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14191 TRUE, srcbase, &srcoffset));
14192 else
14193 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14194 src, FALSE, srcbase,
14195 &srcoffset));
14197 if (out_words_to_go)
14199 if (out_words_to_go > 4)
14200 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14201 TRUE, dstbase, &dstoffset));
14202 else if (out_words_to_go != 1)
14203 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14204 out_words_to_go, dst,
14205 (last_bytes == 0
14206 ? FALSE : TRUE),
14207 dstbase, &dstoffset));
14208 else
14210 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14211 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14212 if (last_bytes != 0)
14214 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14215 dstoffset += 4;
14220 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14221 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14224 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14225 if (out_words_to_go)
14227 rtx sreg;
14229 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14230 sreg = copy_to_reg (mem);
14232 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14233 emit_move_insn (mem, sreg);
14234 in_words_to_go--;
14236 gcc_assert (!in_words_to_go); /* Sanity check */
14239 if (in_words_to_go)
14241 gcc_assert (in_words_to_go > 0);
14243 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14244 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14247 gcc_assert (!last_bytes || part_bytes_reg);
14249 if (BYTES_BIG_ENDIAN && last_bytes)
14251 rtx tmp = gen_reg_rtx (SImode);
14253 /* The bytes we want are in the top end of the word. */
14254 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14255 GEN_INT (8 * (4 - last_bytes))));
14256 part_bytes_reg = tmp;
14258 while (last_bytes)
14260 mem = adjust_automodify_address (dstbase, QImode,
14261 plus_constant (Pmode, dst,
14262 last_bytes - 1),
14263 dstoffset + last_bytes - 1);
14264 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14266 if (--last_bytes)
14268 tmp = gen_reg_rtx (SImode);
14269 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14270 part_bytes_reg = tmp;
14275 else
14277 if (last_bytes > 1)
14279 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14280 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14281 last_bytes -= 2;
14282 if (last_bytes)
14284 rtx tmp = gen_reg_rtx (SImode);
14285 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14286 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14287 part_bytes_reg = tmp;
14288 dstoffset += 2;
14292 if (last_bytes)
14294 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14295 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14299 return 1;
14302 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14303 by mode size. */
14304 inline static rtx
14305 next_consecutive_mem (rtx mem)
14307 machine_mode mode = GET_MODE (mem);
14308 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14309 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14311 return adjust_automodify_address (mem, mode, addr, offset);
14314 /* Copy using LDRD/STRD instructions whenever possible.
14315 Returns true upon success. */
14316 bool
14317 gen_movmem_ldrd_strd (rtx *operands)
14319 unsigned HOST_WIDE_INT len;
14320 HOST_WIDE_INT align;
14321 rtx src, dst, base;
14322 rtx reg0;
14323 bool src_aligned, dst_aligned;
14324 bool src_volatile, dst_volatile;
14326 gcc_assert (CONST_INT_P (operands[2]));
14327 gcc_assert (CONST_INT_P (operands[3]));
14329 len = UINTVAL (operands[2]);
14330 if (len > 64)
14331 return false;
14333 /* Maximum alignment we can assume for both src and dst buffers. */
14334 align = INTVAL (operands[3]);
14336 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14337 return false;
14339 /* Place src and dst addresses in registers
14340 and update the corresponding mem rtx. */
14341 dst = operands[0];
14342 dst_volatile = MEM_VOLATILE_P (dst);
14343 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14344 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14345 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14347 src = operands[1];
14348 src_volatile = MEM_VOLATILE_P (src);
14349 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14350 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14351 src = adjust_automodify_address (src, VOIDmode, base, 0);
14353 if (!unaligned_access && !(src_aligned && dst_aligned))
14354 return false;
14356 if (src_volatile || dst_volatile)
14357 return false;
14359 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14360 if (!(dst_aligned || src_aligned))
14361 return arm_gen_movmemqi (operands);
14363 /* If the either src or dst is unaligned we'll be accessing it as pairs
14364 of unaligned SImode accesses. Otherwise we can generate DImode
14365 ldrd/strd instructions. */
14366 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14367 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14369 while (len >= 8)
14371 len -= 8;
14372 reg0 = gen_reg_rtx (DImode);
14373 rtx low_reg = NULL_RTX;
14374 rtx hi_reg = NULL_RTX;
14376 if (!src_aligned || !dst_aligned)
14378 low_reg = gen_lowpart (SImode, reg0);
14379 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14381 if (src_aligned)
14382 emit_move_insn (reg0, src);
14383 else
14385 emit_insn (gen_unaligned_loadsi (low_reg, src));
14386 src = next_consecutive_mem (src);
14387 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14390 if (dst_aligned)
14391 emit_move_insn (dst, reg0);
14392 else
14394 emit_insn (gen_unaligned_storesi (dst, low_reg));
14395 dst = next_consecutive_mem (dst);
14396 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14399 src = next_consecutive_mem (src);
14400 dst = next_consecutive_mem (dst);
14403 gcc_assert (len < 8);
14404 if (len >= 4)
14406 /* More than a word but less than a double-word to copy. Copy a word. */
14407 reg0 = gen_reg_rtx (SImode);
14408 src = adjust_address (src, SImode, 0);
14409 dst = adjust_address (dst, SImode, 0);
14410 if (src_aligned)
14411 emit_move_insn (reg0, src);
14412 else
14413 emit_insn (gen_unaligned_loadsi (reg0, src));
14415 if (dst_aligned)
14416 emit_move_insn (dst, reg0);
14417 else
14418 emit_insn (gen_unaligned_storesi (dst, reg0));
14420 src = next_consecutive_mem (src);
14421 dst = next_consecutive_mem (dst);
14422 len -= 4;
14425 if (len == 0)
14426 return true;
14428 /* Copy the remaining bytes. */
14429 if (len >= 2)
14431 dst = adjust_address (dst, HImode, 0);
14432 src = adjust_address (src, HImode, 0);
14433 reg0 = gen_reg_rtx (SImode);
14434 if (src_aligned)
14435 emit_insn (gen_zero_extendhisi2 (reg0, src));
14436 else
14437 emit_insn (gen_unaligned_loadhiu (reg0, src));
14439 if (dst_aligned)
14440 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14441 else
14442 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14444 src = next_consecutive_mem (src);
14445 dst = next_consecutive_mem (dst);
14446 if (len == 2)
14447 return true;
14450 dst = adjust_address (dst, QImode, 0);
14451 src = adjust_address (src, QImode, 0);
14452 reg0 = gen_reg_rtx (QImode);
14453 emit_move_insn (reg0, src);
14454 emit_move_insn (dst, reg0);
14455 return true;
14458 /* Select a dominance comparison mode if possible for a test of the general
14459 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14460 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14461 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14462 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14463 In all cases OP will be either EQ or NE, but we don't need to know which
14464 here. If we are unable to support a dominance comparison we return
14465 CC mode. This will then fail to match for the RTL expressions that
14466 generate this call. */
14467 machine_mode
14468 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14470 enum rtx_code cond1, cond2;
14471 int swapped = 0;
14473 /* Currently we will probably get the wrong result if the individual
14474 comparisons are not simple. This also ensures that it is safe to
14475 reverse a comparison if necessary. */
14476 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14477 != CCmode)
14478 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14479 != CCmode))
14480 return CCmode;
14482 /* The if_then_else variant of this tests the second condition if the
14483 first passes, but is true if the first fails. Reverse the first
14484 condition to get a true "inclusive-or" expression. */
14485 if (cond_or == DOM_CC_NX_OR_Y)
14486 cond1 = reverse_condition (cond1);
14488 /* If the comparisons are not equal, and one doesn't dominate the other,
14489 then we can't do this. */
14490 if (cond1 != cond2
14491 && !comparison_dominates_p (cond1, cond2)
14492 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14493 return CCmode;
14495 if (swapped)
14496 std::swap (cond1, cond2);
14498 switch (cond1)
14500 case EQ:
14501 if (cond_or == DOM_CC_X_AND_Y)
14502 return CC_DEQmode;
14504 switch (cond2)
14506 case EQ: return CC_DEQmode;
14507 case LE: return CC_DLEmode;
14508 case LEU: return CC_DLEUmode;
14509 case GE: return CC_DGEmode;
14510 case GEU: return CC_DGEUmode;
14511 default: gcc_unreachable ();
14514 case LT:
14515 if (cond_or == DOM_CC_X_AND_Y)
14516 return CC_DLTmode;
14518 switch (cond2)
14520 case LT:
14521 return CC_DLTmode;
14522 case LE:
14523 return CC_DLEmode;
14524 case NE:
14525 return CC_DNEmode;
14526 default:
14527 gcc_unreachable ();
14530 case GT:
14531 if (cond_or == DOM_CC_X_AND_Y)
14532 return CC_DGTmode;
14534 switch (cond2)
14536 case GT:
14537 return CC_DGTmode;
14538 case GE:
14539 return CC_DGEmode;
14540 case NE:
14541 return CC_DNEmode;
14542 default:
14543 gcc_unreachable ();
14546 case LTU:
14547 if (cond_or == DOM_CC_X_AND_Y)
14548 return CC_DLTUmode;
14550 switch (cond2)
14552 case LTU:
14553 return CC_DLTUmode;
14554 case LEU:
14555 return CC_DLEUmode;
14556 case NE:
14557 return CC_DNEmode;
14558 default:
14559 gcc_unreachable ();
14562 case GTU:
14563 if (cond_or == DOM_CC_X_AND_Y)
14564 return CC_DGTUmode;
14566 switch (cond2)
14568 case GTU:
14569 return CC_DGTUmode;
14570 case GEU:
14571 return CC_DGEUmode;
14572 case NE:
14573 return CC_DNEmode;
14574 default:
14575 gcc_unreachable ();
14578 /* The remaining cases only occur when both comparisons are the
14579 same. */
14580 case NE:
14581 gcc_assert (cond1 == cond2);
14582 return CC_DNEmode;
14584 case LE:
14585 gcc_assert (cond1 == cond2);
14586 return CC_DLEmode;
14588 case GE:
14589 gcc_assert (cond1 == cond2);
14590 return CC_DGEmode;
14592 case LEU:
14593 gcc_assert (cond1 == cond2);
14594 return CC_DLEUmode;
14596 case GEU:
14597 gcc_assert (cond1 == cond2);
14598 return CC_DGEUmode;
14600 default:
14601 gcc_unreachable ();
14605 machine_mode
14606 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14608 /* All floating point compares return CCFP if it is an equality
14609 comparison, and CCFPE otherwise. */
14610 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14612 switch (op)
14614 case EQ:
14615 case NE:
14616 case UNORDERED:
14617 case ORDERED:
14618 case UNLT:
14619 case UNLE:
14620 case UNGT:
14621 case UNGE:
14622 case UNEQ:
14623 case LTGT:
14624 return CCFPmode;
14626 case LT:
14627 case LE:
14628 case GT:
14629 case GE:
14630 return CCFPEmode;
14632 default:
14633 gcc_unreachable ();
14637 /* A compare with a shifted operand. Because of canonicalization, the
14638 comparison will have to be swapped when we emit the assembler. */
14639 if (GET_MODE (y) == SImode
14640 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14641 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14642 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14643 || GET_CODE (x) == ROTATERT))
14644 return CC_SWPmode;
14646 /* This operation is performed swapped, but since we only rely on the Z
14647 flag we don't need an additional mode. */
14648 if (GET_MODE (y) == SImode
14649 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14650 && GET_CODE (x) == NEG
14651 && (op == EQ || op == NE))
14652 return CC_Zmode;
14654 /* This is a special case that is used by combine to allow a
14655 comparison of a shifted byte load to be split into a zero-extend
14656 followed by a comparison of the shifted integer (only valid for
14657 equalities and unsigned inequalities). */
14658 if (GET_MODE (x) == SImode
14659 && GET_CODE (x) == ASHIFT
14660 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14661 && GET_CODE (XEXP (x, 0)) == SUBREG
14662 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14663 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14664 && (op == EQ || op == NE
14665 || op == GEU || op == GTU || op == LTU || op == LEU)
14666 && CONST_INT_P (y))
14667 return CC_Zmode;
14669 /* A construct for a conditional compare, if the false arm contains
14670 0, then both conditions must be true, otherwise either condition
14671 must be true. Not all conditions are possible, so CCmode is
14672 returned if it can't be done. */
14673 if (GET_CODE (x) == IF_THEN_ELSE
14674 && (XEXP (x, 2) == const0_rtx
14675 || XEXP (x, 2) == const1_rtx)
14676 && COMPARISON_P (XEXP (x, 0))
14677 && COMPARISON_P (XEXP (x, 1)))
14678 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14679 INTVAL (XEXP (x, 2)));
14681 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14682 if (GET_CODE (x) == AND
14683 && (op == EQ || op == NE)
14684 && COMPARISON_P (XEXP (x, 0))
14685 && COMPARISON_P (XEXP (x, 1)))
14686 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14687 DOM_CC_X_AND_Y);
14689 if (GET_CODE (x) == IOR
14690 && (op == EQ || op == NE)
14691 && COMPARISON_P (XEXP (x, 0))
14692 && COMPARISON_P (XEXP (x, 1)))
14693 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14694 DOM_CC_X_OR_Y);
14696 /* An operation (on Thumb) where we want to test for a single bit.
14697 This is done by shifting that bit up into the top bit of a
14698 scratch register; we can then branch on the sign bit. */
14699 if (TARGET_THUMB1
14700 && GET_MODE (x) == SImode
14701 && (op == EQ || op == NE)
14702 && GET_CODE (x) == ZERO_EXTRACT
14703 && XEXP (x, 1) == const1_rtx)
14704 return CC_Nmode;
14706 /* An operation that sets the condition codes as a side-effect, the
14707 V flag is not set correctly, so we can only use comparisons where
14708 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14709 instead.) */
14710 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14711 if (GET_MODE (x) == SImode
14712 && y == const0_rtx
14713 && (op == EQ || op == NE || op == LT || op == GE)
14714 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14715 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14716 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14717 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14718 || GET_CODE (x) == LSHIFTRT
14719 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14720 || GET_CODE (x) == ROTATERT
14721 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14722 return CC_NOOVmode;
14724 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14725 return CC_Zmode;
14727 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14728 && GET_CODE (x) == PLUS
14729 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14730 return CC_Cmode;
14732 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14734 switch (op)
14736 case EQ:
14737 case NE:
14738 /* A DImode comparison against zero can be implemented by
14739 or'ing the two halves together. */
14740 if (y == const0_rtx)
14741 return CC_Zmode;
14743 /* We can do an equality test in three Thumb instructions. */
14744 if (!TARGET_32BIT)
14745 return CC_Zmode;
14747 /* FALLTHROUGH */
14749 case LTU:
14750 case LEU:
14751 case GTU:
14752 case GEU:
14753 /* DImode unsigned comparisons can be implemented by cmp +
14754 cmpeq without a scratch register. Not worth doing in
14755 Thumb-2. */
14756 if (TARGET_32BIT)
14757 return CC_CZmode;
14759 /* FALLTHROUGH */
14761 case LT:
14762 case LE:
14763 case GT:
14764 case GE:
14765 /* DImode signed and unsigned comparisons can be implemented
14766 by cmp + sbcs with a scratch register, but that does not
14767 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14768 gcc_assert (op != EQ && op != NE);
14769 return CC_NCVmode;
14771 default:
14772 gcc_unreachable ();
14776 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14777 return GET_MODE (x);
14779 return CCmode;
14782 /* X and Y are two things to compare using CODE. Emit the compare insn and
14783 return the rtx for register 0 in the proper mode. FP means this is a
14784 floating point compare: I don't think that it is needed on the arm. */
14786 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14788 machine_mode mode;
14789 rtx cc_reg;
14790 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14792 /* We might have X as a constant, Y as a register because of the predicates
14793 used for cmpdi. If so, force X to a register here. */
14794 if (dimode_comparison && !REG_P (x))
14795 x = force_reg (DImode, x);
14797 mode = SELECT_CC_MODE (code, x, y);
14798 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14800 if (dimode_comparison
14801 && mode != CC_CZmode)
14803 rtx clobber, set;
14805 /* To compare two non-zero values for equality, XOR them and
14806 then compare against zero. Not used for ARM mode; there
14807 CC_CZmode is cheaper. */
14808 if (mode == CC_Zmode && y != const0_rtx)
14810 gcc_assert (!reload_completed);
14811 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14812 y = const0_rtx;
14815 /* A scratch register is required. */
14816 if (reload_completed)
14817 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14818 else
14819 scratch = gen_rtx_SCRATCH (SImode);
14821 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14822 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14823 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14825 else
14826 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14828 return cc_reg;
14831 /* Generate a sequence of insns that will generate the correct return
14832 address mask depending on the physical architecture that the program
14833 is running on. */
14835 arm_gen_return_addr_mask (void)
14837 rtx reg = gen_reg_rtx (Pmode);
14839 emit_insn (gen_return_addr_mask (reg));
14840 return reg;
14843 void
14844 arm_reload_in_hi (rtx *operands)
14846 rtx ref = operands[1];
14847 rtx base, scratch;
14848 HOST_WIDE_INT offset = 0;
14850 if (GET_CODE (ref) == SUBREG)
14852 offset = SUBREG_BYTE (ref);
14853 ref = SUBREG_REG (ref);
14856 if (REG_P (ref))
14858 /* We have a pseudo which has been spilt onto the stack; there
14859 are two cases here: the first where there is a simple
14860 stack-slot replacement and a second where the stack-slot is
14861 out of range, or is used as a subreg. */
14862 if (reg_equiv_mem (REGNO (ref)))
14864 ref = reg_equiv_mem (REGNO (ref));
14865 base = find_replacement (&XEXP (ref, 0));
14867 else
14868 /* The slot is out of range, or was dressed up in a SUBREG. */
14869 base = reg_equiv_address (REGNO (ref));
14871 /* PR 62554: If there is no equivalent memory location then just move
14872 the value as an SImode register move. This happens when the target
14873 architecture variant does not have an HImode register move. */
14874 if (base == NULL)
14876 gcc_assert (REG_P (operands[0]));
14877 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14878 gen_rtx_SUBREG (SImode, ref, 0)));
14879 return;
14882 else
14883 base = find_replacement (&XEXP (ref, 0));
14885 /* Handle the case where the address is too complex to be offset by 1. */
14886 if (GET_CODE (base) == MINUS
14887 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14889 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14891 emit_set_insn (base_plus, base);
14892 base = base_plus;
14894 else if (GET_CODE (base) == PLUS)
14896 /* The addend must be CONST_INT, or we would have dealt with it above. */
14897 HOST_WIDE_INT hi, lo;
14899 offset += INTVAL (XEXP (base, 1));
14900 base = XEXP (base, 0);
14902 /* Rework the address into a legal sequence of insns. */
14903 /* Valid range for lo is -4095 -> 4095 */
14904 lo = (offset >= 0
14905 ? (offset & 0xfff)
14906 : -((-offset) & 0xfff));
14908 /* Corner case, if lo is the max offset then we would be out of range
14909 once we have added the additional 1 below, so bump the msb into the
14910 pre-loading insn(s). */
14911 if (lo == 4095)
14912 lo &= 0x7ff;
14914 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14915 ^ (HOST_WIDE_INT) 0x80000000)
14916 - (HOST_WIDE_INT) 0x80000000);
14918 gcc_assert (hi + lo == offset);
14920 if (hi != 0)
14922 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14924 /* Get the base address; addsi3 knows how to handle constants
14925 that require more than one insn. */
14926 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14927 base = base_plus;
14928 offset = lo;
14932 /* Operands[2] may overlap operands[0] (though it won't overlap
14933 operands[1]), that's why we asked for a DImode reg -- so we can
14934 use the bit that does not overlap. */
14935 if (REGNO (operands[2]) == REGNO (operands[0]))
14936 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14937 else
14938 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14940 emit_insn (gen_zero_extendqisi2 (scratch,
14941 gen_rtx_MEM (QImode,
14942 plus_constant (Pmode, base,
14943 offset))));
14944 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14945 gen_rtx_MEM (QImode,
14946 plus_constant (Pmode, base,
14947 offset + 1))));
14948 if (!BYTES_BIG_ENDIAN)
14949 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14950 gen_rtx_IOR (SImode,
14951 gen_rtx_ASHIFT
14952 (SImode,
14953 gen_rtx_SUBREG (SImode, operands[0], 0),
14954 GEN_INT (8)),
14955 scratch));
14956 else
14957 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14958 gen_rtx_IOR (SImode,
14959 gen_rtx_ASHIFT (SImode, scratch,
14960 GEN_INT (8)),
14961 gen_rtx_SUBREG (SImode, operands[0], 0)));
14964 /* Handle storing a half-word to memory during reload by synthesizing as two
14965 byte stores. Take care not to clobber the input values until after we
14966 have moved them somewhere safe. This code assumes that if the DImode
14967 scratch in operands[2] overlaps either the input value or output address
14968 in some way, then that value must die in this insn (we absolutely need
14969 two scratch registers for some corner cases). */
14970 void
14971 arm_reload_out_hi (rtx *operands)
14973 rtx ref = operands[0];
14974 rtx outval = operands[1];
14975 rtx base, scratch;
14976 HOST_WIDE_INT offset = 0;
14978 if (GET_CODE (ref) == SUBREG)
14980 offset = SUBREG_BYTE (ref);
14981 ref = SUBREG_REG (ref);
14984 if (REG_P (ref))
14986 /* We have a pseudo which has been spilt onto the stack; there
14987 are two cases here: the first where there is a simple
14988 stack-slot replacement and a second where the stack-slot is
14989 out of range, or is used as a subreg. */
14990 if (reg_equiv_mem (REGNO (ref)))
14992 ref = reg_equiv_mem (REGNO (ref));
14993 base = find_replacement (&XEXP (ref, 0));
14995 else
14996 /* The slot is out of range, or was dressed up in a SUBREG. */
14997 base = reg_equiv_address (REGNO (ref));
14999 /* PR 62254: If there is no equivalent memory location then just move
15000 the value as an SImode register move. This happens when the target
15001 architecture variant does not have an HImode register move. */
15002 if (base == NULL)
15004 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15006 if (REG_P (outval))
15008 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15009 gen_rtx_SUBREG (SImode, outval, 0)));
15011 else /* SUBREG_P (outval) */
15013 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15014 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15015 SUBREG_REG (outval)));
15016 else
15017 /* FIXME: Handle other cases ? */
15018 gcc_unreachable ();
15020 return;
15023 else
15024 base = find_replacement (&XEXP (ref, 0));
15026 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15028 /* Handle the case where the address is too complex to be offset by 1. */
15029 if (GET_CODE (base) == MINUS
15030 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15032 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15034 /* Be careful not to destroy OUTVAL. */
15035 if (reg_overlap_mentioned_p (base_plus, outval))
15037 /* Updating base_plus might destroy outval, see if we can
15038 swap the scratch and base_plus. */
15039 if (!reg_overlap_mentioned_p (scratch, outval))
15040 std::swap (scratch, base_plus);
15041 else
15043 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15045 /* Be conservative and copy OUTVAL into the scratch now,
15046 this should only be necessary if outval is a subreg
15047 of something larger than a word. */
15048 /* XXX Might this clobber base? I can't see how it can,
15049 since scratch is known to overlap with OUTVAL, and
15050 must be wider than a word. */
15051 emit_insn (gen_movhi (scratch_hi, outval));
15052 outval = scratch_hi;
15056 emit_set_insn (base_plus, base);
15057 base = base_plus;
15059 else if (GET_CODE (base) == PLUS)
15061 /* The addend must be CONST_INT, or we would have dealt with it above. */
15062 HOST_WIDE_INT hi, lo;
15064 offset += INTVAL (XEXP (base, 1));
15065 base = XEXP (base, 0);
15067 /* Rework the address into a legal sequence of insns. */
15068 /* Valid range for lo is -4095 -> 4095 */
15069 lo = (offset >= 0
15070 ? (offset & 0xfff)
15071 : -((-offset) & 0xfff));
15073 /* Corner case, if lo is the max offset then we would be out of range
15074 once we have added the additional 1 below, so bump the msb into the
15075 pre-loading insn(s). */
15076 if (lo == 4095)
15077 lo &= 0x7ff;
15079 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15080 ^ (HOST_WIDE_INT) 0x80000000)
15081 - (HOST_WIDE_INT) 0x80000000);
15083 gcc_assert (hi + lo == offset);
15085 if (hi != 0)
15087 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15089 /* Be careful not to destroy OUTVAL. */
15090 if (reg_overlap_mentioned_p (base_plus, outval))
15092 /* Updating base_plus might destroy outval, see if we
15093 can swap the scratch and base_plus. */
15094 if (!reg_overlap_mentioned_p (scratch, outval))
15095 std::swap (scratch, base_plus);
15096 else
15098 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15100 /* Be conservative and copy outval into scratch now,
15101 this should only be necessary if outval is a
15102 subreg of something larger than a word. */
15103 /* XXX Might this clobber base? I can't see how it
15104 can, since scratch is known to overlap with
15105 outval. */
15106 emit_insn (gen_movhi (scratch_hi, outval));
15107 outval = scratch_hi;
15111 /* Get the base address; addsi3 knows how to handle constants
15112 that require more than one insn. */
15113 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15114 base = base_plus;
15115 offset = lo;
15119 if (BYTES_BIG_ENDIAN)
15121 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15122 plus_constant (Pmode, base,
15123 offset + 1)),
15124 gen_lowpart (QImode, outval)));
15125 emit_insn (gen_lshrsi3 (scratch,
15126 gen_rtx_SUBREG (SImode, outval, 0),
15127 GEN_INT (8)));
15128 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15129 offset)),
15130 gen_lowpart (QImode, scratch)));
15132 else
15134 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15135 offset)),
15136 gen_lowpart (QImode, outval)));
15137 emit_insn (gen_lshrsi3 (scratch,
15138 gen_rtx_SUBREG (SImode, outval, 0),
15139 GEN_INT (8)));
15140 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15141 plus_constant (Pmode, base,
15142 offset + 1)),
15143 gen_lowpart (QImode, scratch)));
15147 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15148 (padded to the size of a word) should be passed in a register. */
15150 static bool
15151 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15153 if (TARGET_AAPCS_BASED)
15154 return must_pass_in_stack_var_size (mode, type);
15155 else
15156 return must_pass_in_stack_var_size_or_pad (mode, type);
15160 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15161 byte of a stack argument has useful data. For legacy APCS ABIs we use
15162 the default. For AAPCS based ABIs small aggregate types are placed
15163 in the lowest memory address. */
15165 static pad_direction
15166 arm_function_arg_padding (machine_mode mode, const_tree type)
15168 if (!TARGET_AAPCS_BASED)
15169 return default_function_arg_padding (mode, type);
15171 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15172 return PAD_DOWNWARD;
15174 return PAD_UPWARD;
15178 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15179 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15180 register has useful data, and return the opposite if the most
15181 significant byte does. */
15183 bool
15184 arm_pad_reg_upward (machine_mode mode,
15185 tree type, int first ATTRIBUTE_UNUSED)
15187 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15189 /* For AAPCS, small aggregates, small fixed-point types,
15190 and small complex types are always padded upwards. */
15191 if (type)
15193 if ((AGGREGATE_TYPE_P (type)
15194 || TREE_CODE (type) == COMPLEX_TYPE
15195 || FIXED_POINT_TYPE_P (type))
15196 && int_size_in_bytes (type) <= 4)
15197 return true;
15199 else
15201 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15202 && GET_MODE_SIZE (mode) <= 4)
15203 return true;
15207 /* Otherwise, use default padding. */
15208 return !BYTES_BIG_ENDIAN;
15211 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15212 assuming that the address in the base register is word aligned. */
15213 bool
15214 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15216 HOST_WIDE_INT max_offset;
15218 /* Offset must be a multiple of 4 in Thumb mode. */
15219 if (TARGET_THUMB2 && ((offset & 3) != 0))
15220 return false;
15222 if (TARGET_THUMB2)
15223 max_offset = 1020;
15224 else if (TARGET_ARM)
15225 max_offset = 255;
15226 else
15227 return false;
15229 return ((offset <= max_offset) && (offset >= -max_offset));
15232 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15233 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15234 Assumes that the address in the base register RN is word aligned. Pattern
15235 guarantees that both memory accesses use the same base register,
15236 the offsets are constants within the range, and the gap between the offsets is 4.
15237 If preload complete then check that registers are legal. WBACK indicates whether
15238 address is updated. LOAD indicates whether memory access is load or store. */
15239 bool
15240 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15241 bool wback, bool load)
15243 unsigned int t, t2, n;
15245 if (!reload_completed)
15246 return true;
15248 if (!offset_ok_for_ldrd_strd (offset))
15249 return false;
15251 t = REGNO (rt);
15252 t2 = REGNO (rt2);
15253 n = REGNO (rn);
15255 if ((TARGET_THUMB2)
15256 && ((wback && (n == t || n == t2))
15257 || (t == SP_REGNUM)
15258 || (t == PC_REGNUM)
15259 || (t2 == SP_REGNUM)
15260 || (t2 == PC_REGNUM)
15261 || (!load && (n == PC_REGNUM))
15262 || (load && (t == t2))
15263 /* Triggers Cortex-M3 LDRD errata. */
15264 || (!wback && load && fix_cm3_ldrd && (n == t))))
15265 return false;
15267 if ((TARGET_ARM)
15268 && ((wback && (n == t || n == t2))
15269 || (t2 == PC_REGNUM)
15270 || (t % 2 != 0) /* First destination register is not even. */
15271 || (t2 != t + 1)
15272 /* PC can be used as base register (for offset addressing only),
15273 but it is depricated. */
15274 || (n == PC_REGNUM)))
15275 return false;
15277 return true;
15280 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15281 operand MEM's address contains an immediate offset from the base
15282 register and has no side effects, in which case it sets BASE and
15283 OFFSET accordingly. */
15284 static bool
15285 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15287 rtx addr;
15289 gcc_assert (base != NULL && offset != NULL);
15291 /* TODO: Handle more general memory operand patterns, such as
15292 PRE_DEC and PRE_INC. */
15294 if (side_effects_p (mem))
15295 return false;
15297 /* Can't deal with subregs. */
15298 if (GET_CODE (mem) == SUBREG)
15299 return false;
15301 gcc_assert (MEM_P (mem));
15303 *offset = const0_rtx;
15305 addr = XEXP (mem, 0);
15307 /* If addr isn't valid for DImode, then we can't handle it. */
15308 if (!arm_legitimate_address_p (DImode, addr,
15309 reload_in_progress || reload_completed))
15310 return false;
15312 if (REG_P (addr))
15314 *base = addr;
15315 return true;
15317 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15319 *base = XEXP (addr, 0);
15320 *offset = XEXP (addr, 1);
15321 return (REG_P (*base) && CONST_INT_P (*offset));
15324 return false;
15327 /* Called from a peephole2 to replace two word-size accesses with a
15328 single LDRD/STRD instruction. Returns true iff we can generate a
15329 new instruction sequence. That is, both accesses use the same base
15330 register and the gap between constant offsets is 4. This function
15331 may reorder its operands to match ldrd/strd RTL templates.
15332 OPERANDS are the operands found by the peephole matcher;
15333 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15334 corresponding memory operands. LOAD indicaates whether the access
15335 is load or store. CONST_STORE indicates a store of constant
15336 integer values held in OPERANDS[4,5] and assumes that the pattern
15337 is of length 4 insn, for the purpose of checking dead registers.
15338 COMMUTE indicates that register operands may be reordered. */
15339 bool
15340 gen_operands_ldrd_strd (rtx *operands, bool load,
15341 bool const_store, bool commute)
15343 int nops = 2;
15344 HOST_WIDE_INT offsets[2], offset;
15345 rtx base = NULL_RTX;
15346 rtx cur_base, cur_offset, tmp;
15347 int i, gap;
15348 HARD_REG_SET regset;
15350 gcc_assert (!const_store || !load);
15351 /* Check that the memory references are immediate offsets from the
15352 same base register. Extract the base register, the destination
15353 registers, and the corresponding memory offsets. */
15354 for (i = 0; i < nops; i++)
15356 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15357 return false;
15359 if (i == 0)
15360 base = cur_base;
15361 else if (REGNO (base) != REGNO (cur_base))
15362 return false;
15364 offsets[i] = INTVAL (cur_offset);
15365 if (GET_CODE (operands[i]) == SUBREG)
15367 tmp = SUBREG_REG (operands[i]);
15368 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15369 operands[i] = tmp;
15373 /* Make sure there is no dependency between the individual loads. */
15374 if (load && REGNO (operands[0]) == REGNO (base))
15375 return false; /* RAW */
15377 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15378 return false; /* WAW */
15380 /* If the same input register is used in both stores
15381 when storing different constants, try to find a free register.
15382 For example, the code
15383 mov r0, 0
15384 str r0, [r2]
15385 mov r0, 1
15386 str r0, [r2, #4]
15387 can be transformed into
15388 mov r1, 0
15389 mov r0, 1
15390 strd r1, r0, [r2]
15391 in Thumb mode assuming that r1 is free.
15392 For ARM mode do the same but only if the starting register
15393 can be made to be even. */
15394 if (const_store
15395 && REGNO (operands[0]) == REGNO (operands[1])
15396 && INTVAL (operands[4]) != INTVAL (operands[5]))
15398 if (TARGET_THUMB2)
15400 CLEAR_HARD_REG_SET (regset);
15401 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15402 if (tmp == NULL_RTX)
15403 return false;
15405 /* Use the new register in the first load to ensure that
15406 if the original input register is not dead after peephole,
15407 then it will have the correct constant value. */
15408 operands[0] = tmp;
15410 else if (TARGET_ARM)
15412 int regno = REGNO (operands[0]);
15413 if (!peep2_reg_dead_p (4, operands[0]))
15415 /* When the input register is even and is not dead after the
15416 pattern, it has to hold the second constant but we cannot
15417 form a legal STRD in ARM mode with this register as the second
15418 register. */
15419 if (regno % 2 == 0)
15420 return false;
15422 /* Is regno-1 free? */
15423 SET_HARD_REG_SET (regset);
15424 CLEAR_HARD_REG_BIT(regset, regno - 1);
15425 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15426 if (tmp == NULL_RTX)
15427 return false;
15429 operands[0] = tmp;
15431 else
15433 /* Find a DImode register. */
15434 CLEAR_HARD_REG_SET (regset);
15435 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15436 if (tmp != NULL_RTX)
15438 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15439 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15441 else
15443 /* Can we use the input register to form a DI register? */
15444 SET_HARD_REG_SET (regset);
15445 CLEAR_HARD_REG_BIT(regset,
15446 regno % 2 == 0 ? regno + 1 : regno - 1);
15447 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15448 if (tmp == NULL_RTX)
15449 return false;
15450 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15454 gcc_assert (operands[0] != NULL_RTX);
15455 gcc_assert (operands[1] != NULL_RTX);
15456 gcc_assert (REGNO (operands[0]) % 2 == 0);
15457 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15461 /* Make sure the instructions are ordered with lower memory access first. */
15462 if (offsets[0] > offsets[1])
15464 gap = offsets[0] - offsets[1];
15465 offset = offsets[1];
15467 /* Swap the instructions such that lower memory is accessed first. */
15468 std::swap (operands[0], operands[1]);
15469 std::swap (operands[2], operands[3]);
15470 if (const_store)
15471 std::swap (operands[4], operands[5]);
15473 else
15475 gap = offsets[1] - offsets[0];
15476 offset = offsets[0];
15479 /* Make sure accesses are to consecutive memory locations. */
15480 if (gap != 4)
15481 return false;
15483 /* Make sure we generate legal instructions. */
15484 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15485 false, load))
15486 return true;
15488 /* In Thumb state, where registers are almost unconstrained, there
15489 is little hope to fix it. */
15490 if (TARGET_THUMB2)
15491 return false;
15493 if (load && commute)
15495 /* Try reordering registers. */
15496 std::swap (operands[0], operands[1]);
15497 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15498 false, load))
15499 return true;
15502 if (const_store)
15504 /* If input registers are dead after this pattern, they can be
15505 reordered or replaced by other registers that are free in the
15506 current pattern. */
15507 if (!peep2_reg_dead_p (4, operands[0])
15508 || !peep2_reg_dead_p (4, operands[1]))
15509 return false;
15511 /* Try to reorder the input registers. */
15512 /* For example, the code
15513 mov r0, 0
15514 mov r1, 1
15515 str r1, [r2]
15516 str r0, [r2, #4]
15517 can be transformed into
15518 mov r1, 0
15519 mov r0, 1
15520 strd r0, [r2]
15522 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15523 false, false))
15525 std::swap (operands[0], operands[1]);
15526 return true;
15529 /* Try to find a free DI register. */
15530 CLEAR_HARD_REG_SET (regset);
15531 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15532 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15533 while (true)
15535 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15536 if (tmp == NULL_RTX)
15537 return false;
15539 /* DREG must be an even-numbered register in DImode.
15540 Split it into SI registers. */
15541 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15542 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15543 gcc_assert (operands[0] != NULL_RTX);
15544 gcc_assert (operands[1] != NULL_RTX);
15545 gcc_assert (REGNO (operands[0]) % 2 == 0);
15546 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15548 return (operands_ok_ldrd_strd (operands[0], operands[1],
15549 base, offset,
15550 false, load));
15554 return false;
15560 /* Print a symbolic form of X to the debug file, F. */
15561 static void
15562 arm_print_value (FILE *f, rtx x)
15564 switch (GET_CODE (x))
15566 case CONST_INT:
15567 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15568 return;
15570 case CONST_DOUBLE:
15571 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15572 return;
15574 case CONST_VECTOR:
15576 int i;
15578 fprintf (f, "<");
15579 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15581 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15582 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15583 fputc (',', f);
15585 fprintf (f, ">");
15587 return;
15589 case CONST_STRING:
15590 fprintf (f, "\"%s\"", XSTR (x, 0));
15591 return;
15593 case SYMBOL_REF:
15594 fprintf (f, "`%s'", XSTR (x, 0));
15595 return;
15597 case LABEL_REF:
15598 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15599 return;
15601 case CONST:
15602 arm_print_value (f, XEXP (x, 0));
15603 return;
15605 case PLUS:
15606 arm_print_value (f, XEXP (x, 0));
15607 fprintf (f, "+");
15608 arm_print_value (f, XEXP (x, 1));
15609 return;
15611 case PC:
15612 fprintf (f, "pc");
15613 return;
15615 default:
15616 fprintf (f, "????");
15617 return;
15621 /* Routines for manipulation of the constant pool. */
15623 /* Arm instructions cannot load a large constant directly into a
15624 register; they have to come from a pc relative load. The constant
15625 must therefore be placed in the addressable range of the pc
15626 relative load. Depending on the precise pc relative load
15627 instruction the range is somewhere between 256 bytes and 4k. This
15628 means that we often have to dump a constant inside a function, and
15629 generate code to branch around it.
15631 It is important to minimize this, since the branches will slow
15632 things down and make the code larger.
15634 Normally we can hide the table after an existing unconditional
15635 branch so that there is no interruption of the flow, but in the
15636 worst case the code looks like this:
15638 ldr rn, L1
15640 b L2
15641 align
15642 L1: .long value
15646 ldr rn, L3
15648 b L4
15649 align
15650 L3: .long value
15654 We fix this by performing a scan after scheduling, which notices
15655 which instructions need to have their operands fetched from the
15656 constant table and builds the table.
15658 The algorithm starts by building a table of all the constants that
15659 need fixing up and all the natural barriers in the function (places
15660 where a constant table can be dropped without breaking the flow).
15661 For each fixup we note how far the pc-relative replacement will be
15662 able to reach and the offset of the instruction into the function.
15664 Having built the table we then group the fixes together to form
15665 tables that are as large as possible (subject to addressing
15666 constraints) and emit each table of constants after the last
15667 barrier that is within range of all the instructions in the group.
15668 If a group does not contain a barrier, then we forcibly create one
15669 by inserting a jump instruction into the flow. Once the table has
15670 been inserted, the insns are then modified to reference the
15671 relevant entry in the pool.
15673 Possible enhancements to the algorithm (not implemented) are:
15675 1) For some processors and object formats, there may be benefit in
15676 aligning the pools to the start of cache lines; this alignment
15677 would need to be taken into account when calculating addressability
15678 of a pool. */
15680 /* These typedefs are located at the start of this file, so that
15681 they can be used in the prototypes there. This comment is to
15682 remind readers of that fact so that the following structures
15683 can be understood more easily.
15685 typedef struct minipool_node Mnode;
15686 typedef struct minipool_fixup Mfix; */
15688 struct minipool_node
15690 /* Doubly linked chain of entries. */
15691 Mnode * next;
15692 Mnode * prev;
15693 /* The maximum offset into the code that this entry can be placed. While
15694 pushing fixes for forward references, all entries are sorted in order
15695 of increasing max_address. */
15696 HOST_WIDE_INT max_address;
15697 /* Similarly for an entry inserted for a backwards ref. */
15698 HOST_WIDE_INT min_address;
15699 /* The number of fixes referencing this entry. This can become zero
15700 if we "unpush" an entry. In this case we ignore the entry when we
15701 come to emit the code. */
15702 int refcount;
15703 /* The offset from the start of the minipool. */
15704 HOST_WIDE_INT offset;
15705 /* The value in table. */
15706 rtx value;
15707 /* The mode of value. */
15708 machine_mode mode;
15709 /* The size of the value. With iWMMXt enabled
15710 sizes > 4 also imply an alignment of 8-bytes. */
15711 int fix_size;
15714 struct minipool_fixup
15716 Mfix * next;
15717 rtx_insn * insn;
15718 HOST_WIDE_INT address;
15719 rtx * loc;
15720 machine_mode mode;
15721 int fix_size;
15722 rtx value;
15723 Mnode * minipool;
15724 HOST_WIDE_INT forwards;
15725 HOST_WIDE_INT backwards;
15728 /* Fixes less than a word need padding out to a word boundary. */
15729 #define MINIPOOL_FIX_SIZE(mode) \
15730 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15732 static Mnode * minipool_vector_head;
15733 static Mnode * minipool_vector_tail;
15734 static rtx_code_label *minipool_vector_label;
15735 static int minipool_pad;
15737 /* The linked list of all minipool fixes required for this function. */
15738 Mfix * minipool_fix_head;
15739 Mfix * minipool_fix_tail;
15740 /* The fix entry for the current minipool, once it has been placed. */
15741 Mfix * minipool_barrier;
15743 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15744 #define JUMP_TABLES_IN_TEXT_SECTION 0
15745 #endif
15747 static HOST_WIDE_INT
15748 get_jump_table_size (rtx_jump_table_data *insn)
15750 /* ADDR_VECs only take room if read-only data does into the text
15751 section. */
15752 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15754 rtx body = PATTERN (insn);
15755 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15756 HOST_WIDE_INT size;
15757 HOST_WIDE_INT modesize;
15759 modesize = GET_MODE_SIZE (GET_MODE (body));
15760 size = modesize * XVECLEN (body, elt);
15761 switch (modesize)
15763 case 1:
15764 /* Round up size of TBB table to a halfword boundary. */
15765 size = (size + 1) & ~HOST_WIDE_INT_1;
15766 break;
15767 case 2:
15768 /* No padding necessary for TBH. */
15769 break;
15770 case 4:
15771 /* Add two bytes for alignment on Thumb. */
15772 if (TARGET_THUMB)
15773 size += 2;
15774 break;
15775 default:
15776 gcc_unreachable ();
15778 return size;
15781 return 0;
15784 /* Return the maximum amount of padding that will be inserted before
15785 label LABEL. */
15787 static HOST_WIDE_INT
15788 get_label_padding (rtx label)
15790 HOST_WIDE_INT align, min_insn_size;
15792 align = 1 << label_to_alignment (label);
15793 min_insn_size = TARGET_THUMB ? 2 : 4;
15794 return align > min_insn_size ? align - min_insn_size : 0;
15797 /* Move a minipool fix MP from its current location to before MAX_MP.
15798 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15799 constraints may need updating. */
15800 static Mnode *
15801 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15802 HOST_WIDE_INT max_address)
15804 /* The code below assumes these are different. */
15805 gcc_assert (mp != max_mp);
15807 if (max_mp == NULL)
15809 if (max_address < mp->max_address)
15810 mp->max_address = max_address;
15812 else
15814 if (max_address > max_mp->max_address - mp->fix_size)
15815 mp->max_address = max_mp->max_address - mp->fix_size;
15816 else
15817 mp->max_address = max_address;
15819 /* Unlink MP from its current position. Since max_mp is non-null,
15820 mp->prev must be non-null. */
15821 mp->prev->next = mp->next;
15822 if (mp->next != NULL)
15823 mp->next->prev = mp->prev;
15824 else
15825 minipool_vector_tail = mp->prev;
15827 /* Re-insert it before MAX_MP. */
15828 mp->next = max_mp;
15829 mp->prev = max_mp->prev;
15830 max_mp->prev = mp;
15832 if (mp->prev != NULL)
15833 mp->prev->next = mp;
15834 else
15835 minipool_vector_head = mp;
15838 /* Save the new entry. */
15839 max_mp = mp;
15841 /* Scan over the preceding entries and adjust their addresses as
15842 required. */
15843 while (mp->prev != NULL
15844 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15846 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15847 mp = mp->prev;
15850 return max_mp;
15853 /* Add a constant to the minipool for a forward reference. Returns the
15854 node added or NULL if the constant will not fit in this pool. */
15855 static Mnode *
15856 add_minipool_forward_ref (Mfix *fix)
15858 /* If set, max_mp is the first pool_entry that has a lower
15859 constraint than the one we are trying to add. */
15860 Mnode * max_mp = NULL;
15861 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15862 Mnode * mp;
15864 /* If the minipool starts before the end of FIX->INSN then this FIX
15865 can not be placed into the current pool. Furthermore, adding the
15866 new constant pool entry may cause the pool to start FIX_SIZE bytes
15867 earlier. */
15868 if (minipool_vector_head &&
15869 (fix->address + get_attr_length (fix->insn)
15870 >= minipool_vector_head->max_address - fix->fix_size))
15871 return NULL;
15873 /* Scan the pool to see if a constant with the same value has
15874 already been added. While we are doing this, also note the
15875 location where we must insert the constant if it doesn't already
15876 exist. */
15877 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15879 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15880 && fix->mode == mp->mode
15881 && (!LABEL_P (fix->value)
15882 || (CODE_LABEL_NUMBER (fix->value)
15883 == CODE_LABEL_NUMBER (mp->value)))
15884 && rtx_equal_p (fix->value, mp->value))
15886 /* More than one fix references this entry. */
15887 mp->refcount++;
15888 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15891 /* Note the insertion point if necessary. */
15892 if (max_mp == NULL
15893 && mp->max_address > max_address)
15894 max_mp = mp;
15896 /* If we are inserting an 8-bytes aligned quantity and
15897 we have not already found an insertion point, then
15898 make sure that all such 8-byte aligned quantities are
15899 placed at the start of the pool. */
15900 if (ARM_DOUBLEWORD_ALIGN
15901 && max_mp == NULL
15902 && fix->fix_size >= 8
15903 && mp->fix_size < 8)
15905 max_mp = mp;
15906 max_address = mp->max_address;
15910 /* The value is not currently in the minipool, so we need to create
15911 a new entry for it. If MAX_MP is NULL, the entry will be put on
15912 the end of the list since the placement is less constrained than
15913 any existing entry. Otherwise, we insert the new fix before
15914 MAX_MP and, if necessary, adjust the constraints on the other
15915 entries. */
15916 mp = XNEW (Mnode);
15917 mp->fix_size = fix->fix_size;
15918 mp->mode = fix->mode;
15919 mp->value = fix->value;
15920 mp->refcount = 1;
15921 /* Not yet required for a backwards ref. */
15922 mp->min_address = -65536;
15924 if (max_mp == NULL)
15926 mp->max_address = max_address;
15927 mp->next = NULL;
15928 mp->prev = minipool_vector_tail;
15930 if (mp->prev == NULL)
15932 minipool_vector_head = mp;
15933 minipool_vector_label = gen_label_rtx ();
15935 else
15936 mp->prev->next = mp;
15938 minipool_vector_tail = mp;
15940 else
15942 if (max_address > max_mp->max_address - mp->fix_size)
15943 mp->max_address = max_mp->max_address - mp->fix_size;
15944 else
15945 mp->max_address = max_address;
15947 mp->next = max_mp;
15948 mp->prev = max_mp->prev;
15949 max_mp->prev = mp;
15950 if (mp->prev != NULL)
15951 mp->prev->next = mp;
15952 else
15953 minipool_vector_head = mp;
15956 /* Save the new entry. */
15957 max_mp = mp;
15959 /* Scan over the preceding entries and adjust their addresses as
15960 required. */
15961 while (mp->prev != NULL
15962 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15964 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15965 mp = mp->prev;
15968 return max_mp;
15971 static Mnode *
15972 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15973 HOST_WIDE_INT min_address)
15975 HOST_WIDE_INT offset;
15977 /* The code below assumes these are different. */
15978 gcc_assert (mp != min_mp);
15980 if (min_mp == NULL)
15982 if (min_address > mp->min_address)
15983 mp->min_address = min_address;
15985 else
15987 /* We will adjust this below if it is too loose. */
15988 mp->min_address = min_address;
15990 /* Unlink MP from its current position. Since min_mp is non-null,
15991 mp->next must be non-null. */
15992 mp->next->prev = mp->prev;
15993 if (mp->prev != NULL)
15994 mp->prev->next = mp->next;
15995 else
15996 minipool_vector_head = mp->next;
15998 /* Reinsert it after MIN_MP. */
15999 mp->prev = min_mp;
16000 mp->next = min_mp->next;
16001 min_mp->next = mp;
16002 if (mp->next != NULL)
16003 mp->next->prev = mp;
16004 else
16005 minipool_vector_tail = mp;
16008 min_mp = mp;
16010 offset = 0;
16011 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16013 mp->offset = offset;
16014 if (mp->refcount > 0)
16015 offset += mp->fix_size;
16017 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16018 mp->next->min_address = mp->min_address + mp->fix_size;
16021 return min_mp;
16024 /* Add a constant to the minipool for a backward reference. Returns the
16025 node added or NULL if the constant will not fit in this pool.
16027 Note that the code for insertion for a backwards reference can be
16028 somewhat confusing because the calculated offsets for each fix do
16029 not take into account the size of the pool (which is still under
16030 construction. */
16031 static Mnode *
16032 add_minipool_backward_ref (Mfix *fix)
16034 /* If set, min_mp is the last pool_entry that has a lower constraint
16035 than the one we are trying to add. */
16036 Mnode *min_mp = NULL;
16037 /* This can be negative, since it is only a constraint. */
16038 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16039 Mnode *mp;
16041 /* If we can't reach the current pool from this insn, or if we can't
16042 insert this entry at the end of the pool without pushing other
16043 fixes out of range, then we don't try. This ensures that we
16044 can't fail later on. */
16045 if (min_address >= minipool_barrier->address
16046 || (minipool_vector_tail->min_address + fix->fix_size
16047 >= minipool_barrier->address))
16048 return NULL;
16050 /* Scan the pool to see if a constant with the same value has
16051 already been added. While we are doing this, also note the
16052 location where we must insert the constant if it doesn't already
16053 exist. */
16054 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16056 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16057 && fix->mode == mp->mode
16058 && (!LABEL_P (fix->value)
16059 || (CODE_LABEL_NUMBER (fix->value)
16060 == CODE_LABEL_NUMBER (mp->value)))
16061 && rtx_equal_p (fix->value, mp->value)
16062 /* Check that there is enough slack to move this entry to the
16063 end of the table (this is conservative). */
16064 && (mp->max_address
16065 > (minipool_barrier->address
16066 + minipool_vector_tail->offset
16067 + minipool_vector_tail->fix_size)))
16069 mp->refcount++;
16070 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16073 if (min_mp != NULL)
16074 mp->min_address += fix->fix_size;
16075 else
16077 /* Note the insertion point if necessary. */
16078 if (mp->min_address < min_address)
16080 /* For now, we do not allow the insertion of 8-byte alignment
16081 requiring nodes anywhere but at the start of the pool. */
16082 if (ARM_DOUBLEWORD_ALIGN
16083 && fix->fix_size >= 8 && mp->fix_size < 8)
16084 return NULL;
16085 else
16086 min_mp = mp;
16088 else if (mp->max_address
16089 < minipool_barrier->address + mp->offset + fix->fix_size)
16091 /* Inserting before this entry would push the fix beyond
16092 its maximum address (which can happen if we have
16093 re-located a forwards fix); force the new fix to come
16094 after it. */
16095 if (ARM_DOUBLEWORD_ALIGN
16096 && fix->fix_size >= 8 && mp->fix_size < 8)
16097 return NULL;
16098 else
16100 min_mp = mp;
16101 min_address = mp->min_address + fix->fix_size;
16104 /* Do not insert a non-8-byte aligned quantity before 8-byte
16105 aligned quantities. */
16106 else if (ARM_DOUBLEWORD_ALIGN
16107 && fix->fix_size < 8
16108 && mp->fix_size >= 8)
16110 min_mp = mp;
16111 min_address = mp->min_address + fix->fix_size;
16116 /* We need to create a new entry. */
16117 mp = XNEW (Mnode);
16118 mp->fix_size = fix->fix_size;
16119 mp->mode = fix->mode;
16120 mp->value = fix->value;
16121 mp->refcount = 1;
16122 mp->max_address = minipool_barrier->address + 65536;
16124 mp->min_address = min_address;
16126 if (min_mp == NULL)
16128 mp->prev = NULL;
16129 mp->next = minipool_vector_head;
16131 if (mp->next == NULL)
16133 minipool_vector_tail = mp;
16134 minipool_vector_label = gen_label_rtx ();
16136 else
16137 mp->next->prev = mp;
16139 minipool_vector_head = mp;
16141 else
16143 mp->next = min_mp->next;
16144 mp->prev = min_mp;
16145 min_mp->next = mp;
16147 if (mp->next != NULL)
16148 mp->next->prev = mp;
16149 else
16150 minipool_vector_tail = mp;
16153 /* Save the new entry. */
16154 min_mp = mp;
16156 if (mp->prev)
16157 mp = mp->prev;
16158 else
16159 mp->offset = 0;
16161 /* Scan over the following entries and adjust their offsets. */
16162 while (mp->next != NULL)
16164 if (mp->next->min_address < mp->min_address + mp->fix_size)
16165 mp->next->min_address = mp->min_address + mp->fix_size;
16167 if (mp->refcount)
16168 mp->next->offset = mp->offset + mp->fix_size;
16169 else
16170 mp->next->offset = mp->offset;
16172 mp = mp->next;
16175 return min_mp;
16178 static void
16179 assign_minipool_offsets (Mfix *barrier)
16181 HOST_WIDE_INT offset = 0;
16182 Mnode *mp;
16184 minipool_barrier = barrier;
16186 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16188 mp->offset = offset;
16190 if (mp->refcount > 0)
16191 offset += mp->fix_size;
16195 /* Output the literal table */
16196 static void
16197 dump_minipool (rtx_insn *scan)
16199 Mnode * mp;
16200 Mnode * nmp;
16201 int align64 = 0;
16203 if (ARM_DOUBLEWORD_ALIGN)
16204 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16205 if (mp->refcount > 0 && mp->fix_size >= 8)
16207 align64 = 1;
16208 break;
16211 if (dump_file)
16212 fprintf (dump_file,
16213 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16214 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16216 scan = emit_label_after (gen_label_rtx (), scan);
16217 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16218 scan = emit_label_after (minipool_vector_label, scan);
16220 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16222 if (mp->refcount > 0)
16224 if (dump_file)
16226 fprintf (dump_file,
16227 ";; Offset %u, min %ld, max %ld ",
16228 (unsigned) mp->offset, (unsigned long) mp->min_address,
16229 (unsigned long) mp->max_address);
16230 arm_print_value (dump_file, mp->value);
16231 fputc ('\n', dump_file);
16234 rtx val = copy_rtx (mp->value);
16236 switch (GET_MODE_SIZE (mp->mode))
16238 #ifdef HAVE_consttable_1
16239 case 1:
16240 scan = emit_insn_after (gen_consttable_1 (val), scan);
16241 break;
16243 #endif
16244 #ifdef HAVE_consttable_2
16245 case 2:
16246 scan = emit_insn_after (gen_consttable_2 (val), scan);
16247 break;
16249 #endif
16250 #ifdef HAVE_consttable_4
16251 case 4:
16252 scan = emit_insn_after (gen_consttable_4 (val), scan);
16253 break;
16255 #endif
16256 #ifdef HAVE_consttable_8
16257 case 8:
16258 scan = emit_insn_after (gen_consttable_8 (val), scan);
16259 break;
16261 #endif
16262 #ifdef HAVE_consttable_16
16263 case 16:
16264 scan = emit_insn_after (gen_consttable_16 (val), scan);
16265 break;
16267 #endif
16268 default:
16269 gcc_unreachable ();
16273 nmp = mp->next;
16274 free (mp);
16277 minipool_vector_head = minipool_vector_tail = NULL;
16278 scan = emit_insn_after (gen_consttable_end (), scan);
16279 scan = emit_barrier_after (scan);
16282 /* Return the cost of forcibly inserting a barrier after INSN. */
16283 static int
16284 arm_barrier_cost (rtx_insn *insn)
16286 /* Basing the location of the pool on the loop depth is preferable,
16287 but at the moment, the basic block information seems to be
16288 corrupt by this stage of the compilation. */
16289 int base_cost = 50;
16290 rtx_insn *next = next_nonnote_insn (insn);
16292 if (next != NULL && LABEL_P (next))
16293 base_cost -= 20;
16295 switch (GET_CODE (insn))
16297 case CODE_LABEL:
16298 /* It will always be better to place the table before the label, rather
16299 than after it. */
16300 return 50;
16302 case INSN:
16303 case CALL_INSN:
16304 return base_cost;
16306 case JUMP_INSN:
16307 return base_cost - 10;
16309 default:
16310 return base_cost + 10;
16314 /* Find the best place in the insn stream in the range
16315 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16316 Create the barrier by inserting a jump and add a new fix entry for
16317 it. */
16318 static Mfix *
16319 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16321 HOST_WIDE_INT count = 0;
16322 rtx_barrier *barrier;
16323 rtx_insn *from = fix->insn;
16324 /* The instruction after which we will insert the jump. */
16325 rtx_insn *selected = NULL;
16326 int selected_cost;
16327 /* The address at which the jump instruction will be placed. */
16328 HOST_WIDE_INT selected_address;
16329 Mfix * new_fix;
16330 HOST_WIDE_INT max_count = max_address - fix->address;
16331 rtx_code_label *label = gen_label_rtx ();
16333 selected_cost = arm_barrier_cost (from);
16334 selected_address = fix->address;
16336 while (from && count < max_count)
16338 rtx_jump_table_data *tmp;
16339 int new_cost;
16341 /* This code shouldn't have been called if there was a natural barrier
16342 within range. */
16343 gcc_assert (!BARRIER_P (from));
16345 /* Count the length of this insn. This must stay in sync with the
16346 code that pushes minipool fixes. */
16347 if (LABEL_P (from))
16348 count += get_label_padding (from);
16349 else
16350 count += get_attr_length (from);
16352 /* If there is a jump table, add its length. */
16353 if (tablejump_p (from, NULL, &tmp))
16355 count += get_jump_table_size (tmp);
16357 /* Jump tables aren't in a basic block, so base the cost on
16358 the dispatch insn. If we select this location, we will
16359 still put the pool after the table. */
16360 new_cost = arm_barrier_cost (from);
16362 if (count < max_count
16363 && (!selected || new_cost <= selected_cost))
16365 selected = tmp;
16366 selected_cost = new_cost;
16367 selected_address = fix->address + count;
16370 /* Continue after the dispatch table. */
16371 from = NEXT_INSN (tmp);
16372 continue;
16375 new_cost = arm_barrier_cost (from);
16377 if (count < max_count
16378 && (!selected || new_cost <= selected_cost))
16380 selected = from;
16381 selected_cost = new_cost;
16382 selected_address = fix->address + count;
16385 from = NEXT_INSN (from);
16388 /* Make sure that we found a place to insert the jump. */
16389 gcc_assert (selected);
16391 /* Make sure we do not split a call and its corresponding
16392 CALL_ARG_LOCATION note. */
16393 if (CALL_P (selected))
16395 rtx_insn *next = NEXT_INSN (selected);
16396 if (next && NOTE_P (next)
16397 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16398 selected = next;
16401 /* Create a new JUMP_INSN that branches around a barrier. */
16402 from = emit_jump_insn_after (gen_jump (label), selected);
16403 JUMP_LABEL (from) = label;
16404 barrier = emit_barrier_after (from);
16405 emit_label_after (label, barrier);
16407 /* Create a minipool barrier entry for the new barrier. */
16408 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16409 new_fix->insn = barrier;
16410 new_fix->address = selected_address;
16411 new_fix->next = fix->next;
16412 fix->next = new_fix;
16414 return new_fix;
16417 /* Record that there is a natural barrier in the insn stream at
16418 ADDRESS. */
16419 static void
16420 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16422 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16424 fix->insn = insn;
16425 fix->address = address;
16427 fix->next = NULL;
16428 if (minipool_fix_head != NULL)
16429 minipool_fix_tail->next = fix;
16430 else
16431 minipool_fix_head = fix;
16433 minipool_fix_tail = fix;
16436 /* Record INSN, which will need fixing up to load a value from the
16437 minipool. ADDRESS is the offset of the insn since the start of the
16438 function; LOC is a pointer to the part of the insn which requires
16439 fixing; VALUE is the constant that must be loaded, which is of type
16440 MODE. */
16441 static void
16442 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16443 machine_mode mode, rtx value)
16445 gcc_assert (!arm_disable_literal_pool);
16446 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16448 fix->insn = insn;
16449 fix->address = address;
16450 fix->loc = loc;
16451 fix->mode = mode;
16452 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16453 fix->value = value;
16454 fix->forwards = get_attr_pool_range (insn);
16455 fix->backwards = get_attr_neg_pool_range (insn);
16456 fix->minipool = NULL;
16458 /* If an insn doesn't have a range defined for it, then it isn't
16459 expecting to be reworked by this code. Better to stop now than
16460 to generate duff assembly code. */
16461 gcc_assert (fix->forwards || fix->backwards);
16463 /* If an entry requires 8-byte alignment then assume all constant pools
16464 require 4 bytes of padding. Trying to do this later on a per-pool
16465 basis is awkward because existing pool entries have to be modified. */
16466 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16467 minipool_pad = 4;
16469 if (dump_file)
16471 fprintf (dump_file,
16472 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16473 GET_MODE_NAME (mode),
16474 INSN_UID (insn), (unsigned long) address,
16475 -1 * (long)fix->backwards, (long)fix->forwards);
16476 arm_print_value (dump_file, fix->value);
16477 fprintf (dump_file, "\n");
16480 /* Add it to the chain of fixes. */
16481 fix->next = NULL;
16483 if (minipool_fix_head != NULL)
16484 minipool_fix_tail->next = fix;
16485 else
16486 minipool_fix_head = fix;
16488 minipool_fix_tail = fix;
16491 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16492 Returns the number of insns needed, or 99 if we always want to synthesize
16493 the value. */
16495 arm_max_const_double_inline_cost ()
16497 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16500 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16501 Returns the number of insns needed, or 99 if we don't know how to
16502 do it. */
16504 arm_const_double_inline_cost (rtx val)
16506 rtx lowpart, highpart;
16507 machine_mode mode;
16509 mode = GET_MODE (val);
16511 if (mode == VOIDmode)
16512 mode = DImode;
16514 gcc_assert (GET_MODE_SIZE (mode) == 8);
16516 lowpart = gen_lowpart (SImode, val);
16517 highpart = gen_highpart_mode (SImode, mode, val);
16519 gcc_assert (CONST_INT_P (lowpart));
16520 gcc_assert (CONST_INT_P (highpart));
16522 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16523 NULL_RTX, NULL_RTX, 0, 0)
16524 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16525 NULL_RTX, NULL_RTX, 0, 0));
16528 /* Cost of loading a SImode constant. */
16529 static inline int
16530 arm_const_inline_cost (enum rtx_code code, rtx val)
16532 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16533 NULL_RTX, NULL_RTX, 1, 0);
16536 /* Return true if it is worthwhile to split a 64-bit constant into two
16537 32-bit operations. This is the case if optimizing for size, or
16538 if we have load delay slots, or if one 32-bit part can be done with
16539 a single data operation. */
16540 bool
16541 arm_const_double_by_parts (rtx val)
16543 machine_mode mode = GET_MODE (val);
16544 rtx part;
16546 if (optimize_size || arm_ld_sched)
16547 return true;
16549 if (mode == VOIDmode)
16550 mode = DImode;
16552 part = gen_highpart_mode (SImode, mode, val);
16554 gcc_assert (CONST_INT_P (part));
16556 if (const_ok_for_arm (INTVAL (part))
16557 || const_ok_for_arm (~INTVAL (part)))
16558 return true;
16560 part = gen_lowpart (SImode, val);
16562 gcc_assert (CONST_INT_P (part));
16564 if (const_ok_for_arm (INTVAL (part))
16565 || const_ok_for_arm (~INTVAL (part)))
16566 return true;
16568 return false;
16571 /* Return true if it is possible to inline both the high and low parts
16572 of a 64-bit constant into 32-bit data processing instructions. */
16573 bool
16574 arm_const_double_by_immediates (rtx val)
16576 machine_mode mode = GET_MODE (val);
16577 rtx part;
16579 if (mode == VOIDmode)
16580 mode = DImode;
16582 part = gen_highpart_mode (SImode, mode, val);
16584 gcc_assert (CONST_INT_P (part));
16586 if (!const_ok_for_arm (INTVAL (part)))
16587 return false;
16589 part = gen_lowpart (SImode, val);
16591 gcc_assert (CONST_INT_P (part));
16593 if (!const_ok_for_arm (INTVAL (part)))
16594 return false;
16596 return true;
16599 /* Scan INSN and note any of its operands that need fixing.
16600 If DO_PUSHES is false we do not actually push any of the fixups
16601 needed. */
16602 static void
16603 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16605 int opno;
16607 extract_constrain_insn (insn);
16609 if (recog_data.n_alternatives == 0)
16610 return;
16612 /* Fill in recog_op_alt with information about the constraints of
16613 this insn. */
16614 preprocess_constraints (insn);
16616 const operand_alternative *op_alt = which_op_alt ();
16617 for (opno = 0; opno < recog_data.n_operands; opno++)
16619 /* Things we need to fix can only occur in inputs. */
16620 if (recog_data.operand_type[opno] != OP_IN)
16621 continue;
16623 /* If this alternative is a memory reference, then any mention
16624 of constants in this alternative is really to fool reload
16625 into allowing us to accept one there. We need to fix them up
16626 now so that we output the right code. */
16627 if (op_alt[opno].memory_ok)
16629 rtx op = recog_data.operand[opno];
16631 if (CONSTANT_P (op))
16633 if (do_pushes)
16634 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16635 recog_data.operand_mode[opno], op);
16637 else if (MEM_P (op)
16638 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16639 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16641 if (do_pushes)
16643 rtx cop = avoid_constant_pool_reference (op);
16645 /* Casting the address of something to a mode narrower
16646 than a word can cause avoid_constant_pool_reference()
16647 to return the pool reference itself. That's no good to
16648 us here. Lets just hope that we can use the
16649 constant pool value directly. */
16650 if (op == cop)
16651 cop = get_pool_constant (XEXP (op, 0));
16653 push_minipool_fix (insn, address,
16654 recog_data.operand_loc[opno],
16655 recog_data.operand_mode[opno], cop);
16662 return;
16665 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16666 and unions in the context of ARMv8-M Security Extensions. It is used as a
16667 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16668 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16669 or four masks, depending on whether it is being computed for a
16670 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16671 respectively. The tree for the type of the argument or a field within an
16672 argument is passed in ARG_TYPE, the current register this argument or field
16673 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16674 argument or field starts at is passed in STARTING_BIT and the last used bit
16675 is kept in LAST_USED_BIT which is also updated accordingly. */
16677 static unsigned HOST_WIDE_INT
16678 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16679 uint32_t * padding_bits_to_clear,
16680 unsigned starting_bit, int * last_used_bit)
16683 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16685 if (TREE_CODE (arg_type) == RECORD_TYPE)
16687 unsigned current_bit = starting_bit;
16688 tree field;
16689 long int offset, size;
16692 field = TYPE_FIELDS (arg_type);
16693 while (field)
16695 /* The offset within a structure is always an offset from
16696 the start of that structure. Make sure we take that into the
16697 calculation of the register based offset that we use here. */
16698 offset = starting_bit;
16699 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16700 offset %= 32;
16702 /* This is the actual size of the field, for bitfields this is the
16703 bitfield width and not the container size. */
16704 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16706 if (*last_used_bit != offset)
16708 if (offset < *last_used_bit)
16710 /* This field's offset is before the 'last_used_bit', that
16711 means this field goes on the next register. So we need to
16712 pad the rest of the current register and increase the
16713 register number. */
16714 uint32_t mask;
16715 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16716 mask++;
16718 padding_bits_to_clear[*regno] |= mask;
16719 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16720 (*regno)++;
16722 else
16724 /* Otherwise we pad the bits between the last field's end and
16725 the start of the new field. */
16726 uint32_t mask;
16728 mask = ((uint32_t)-1) >> (32 - offset);
16729 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16730 padding_bits_to_clear[*regno] |= mask;
16732 current_bit = offset;
16735 /* Calculate further padding bits for inner structs/unions too. */
16736 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16738 *last_used_bit = current_bit;
16739 not_to_clear_reg_mask
16740 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16741 padding_bits_to_clear, offset,
16742 last_used_bit);
16744 else
16746 /* Update 'current_bit' with this field's size. If the
16747 'current_bit' lies in a subsequent register, update 'regno' and
16748 reset 'current_bit' to point to the current bit in that new
16749 register. */
16750 current_bit += size;
16751 while (current_bit >= 32)
16753 current_bit-=32;
16754 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16755 (*regno)++;
16757 *last_used_bit = current_bit;
16760 field = TREE_CHAIN (field);
16762 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16764 else if (TREE_CODE (arg_type) == UNION_TYPE)
16766 tree field, field_t;
16767 int i, regno_t, field_size;
16768 int max_reg = -1;
16769 int max_bit = -1;
16770 uint32_t mask;
16771 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16772 = {-1, -1, -1, -1};
16774 /* To compute the padding bits in a union we only consider bits as
16775 padding bits if they are always either a padding bit or fall outside a
16776 fields size for all fields in the union. */
16777 field = TYPE_FIELDS (arg_type);
16778 while (field)
16780 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16781 = {0U, 0U, 0U, 0U};
16782 int last_used_bit_t = *last_used_bit;
16783 regno_t = *regno;
16784 field_t = TREE_TYPE (field);
16786 /* If the field's type is either a record or a union make sure to
16787 compute their padding bits too. */
16788 if (RECORD_OR_UNION_TYPE_P (field_t))
16789 not_to_clear_reg_mask
16790 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16791 &padding_bits_to_clear_t[0],
16792 starting_bit, &last_used_bit_t);
16793 else
16795 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16796 regno_t = (field_size / 32) + *regno;
16797 last_used_bit_t = (starting_bit + field_size) % 32;
16800 for (i = *regno; i < regno_t; i++)
16802 /* For all but the last register used by this field only keep the
16803 padding bits that were padding bits in this field. */
16804 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16807 /* For the last register, keep all padding bits that were padding
16808 bits in this field and any padding bits that are still valid
16809 as padding bits but fall outside of this field's size. */
16810 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16811 padding_bits_to_clear_res[regno_t]
16812 &= padding_bits_to_clear_t[regno_t] | mask;
16814 /* Update the maximum size of the fields in terms of registers used
16815 ('max_reg') and the 'last_used_bit' in said register. */
16816 if (max_reg < regno_t)
16818 max_reg = regno_t;
16819 max_bit = last_used_bit_t;
16821 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16822 max_bit = last_used_bit_t;
16824 field = TREE_CHAIN (field);
16827 /* Update the current padding_bits_to_clear using the intersection of the
16828 padding bits of all the fields. */
16829 for (i=*regno; i < max_reg; i++)
16830 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16832 /* Do not keep trailing padding bits, we do not know yet whether this
16833 is the end of the argument. */
16834 mask = ((uint32_t) 1 << max_bit) - 1;
16835 padding_bits_to_clear[max_reg]
16836 |= padding_bits_to_clear_res[max_reg] & mask;
16838 *regno = max_reg;
16839 *last_used_bit = max_bit;
16841 else
16842 /* This function should only be used for structs and unions. */
16843 gcc_unreachable ();
16845 return not_to_clear_reg_mask;
16848 /* In the context of ARMv8-M Security Extensions, this function is used for both
16849 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16850 registers are used when returning or passing arguments, which is then
16851 returned as a mask. It will also compute a mask to indicate padding/unused
16852 bits for each of these registers, and passes this through the
16853 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16854 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16855 the starting register used to pass this argument or return value is passed
16856 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16857 for struct and union types. */
16859 static unsigned HOST_WIDE_INT
16860 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16861 uint32_t * padding_bits_to_clear)
16864 int last_used_bit = 0;
16865 unsigned HOST_WIDE_INT not_to_clear_mask;
16867 if (RECORD_OR_UNION_TYPE_P (arg_type))
16869 not_to_clear_mask
16870 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16871 padding_bits_to_clear, 0,
16872 &last_used_bit);
16875 /* If the 'last_used_bit' is not zero, that means we are still using a
16876 part of the last 'regno'. In such cases we must clear the trailing
16877 bits. Otherwise we are not using regno and we should mark it as to
16878 clear. */
16879 if (last_used_bit != 0)
16880 padding_bits_to_clear[regno]
16881 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16882 else
16883 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16885 else
16887 not_to_clear_mask = 0;
16888 /* We are not dealing with structs nor unions. So these arguments may be
16889 passed in floating point registers too. In some cases a BLKmode is
16890 used when returning or passing arguments in multiple VFP registers. */
16891 if (GET_MODE (arg_rtx) == BLKmode)
16893 int i, arg_regs;
16894 rtx reg;
16896 /* This should really only occur when dealing with the hard-float
16897 ABI. */
16898 gcc_assert (TARGET_HARD_FLOAT_ABI);
16900 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16902 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16903 gcc_assert (REG_P (reg));
16905 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16907 /* If we are dealing with DF mode, make sure we don't
16908 clear either of the registers it addresses. */
16909 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16910 if (arg_regs > 1)
16912 unsigned HOST_WIDE_INT mask;
16913 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16914 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16915 not_to_clear_mask |= mask;
16919 else
16921 /* Otherwise we can rely on the MODE to determine how many registers
16922 are being used by this argument. */
16923 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16924 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16925 if (arg_regs > 1)
16927 unsigned HOST_WIDE_INT
16928 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16929 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16930 not_to_clear_mask |= mask;
16935 return not_to_clear_mask;
16938 /* Clears caller saved registers not used to pass arguments before a
16939 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16940 registers is done in __gnu_cmse_nonsecure_call libcall.
16941 See libgcc/config/arm/cmse_nonsecure_call.S. */
16943 static void
16944 cmse_nonsecure_call_clear_caller_saved (void)
16946 basic_block bb;
16948 FOR_EACH_BB_FN (bb, cfun)
16950 rtx_insn *insn;
16952 FOR_BB_INSNS (bb, insn)
16954 uint64_t to_clear_mask, float_mask;
16955 rtx_insn *seq;
16956 rtx pat, call, unspec, reg, cleared_reg, tmp;
16957 unsigned int regno, maxregno;
16958 rtx address;
16959 CUMULATIVE_ARGS args_so_far_v;
16960 cumulative_args_t args_so_far;
16961 tree arg_type, fntype;
16962 bool using_r4, first_param = true;
16963 function_args_iterator args_iter;
16964 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16965 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16967 if (!NONDEBUG_INSN_P (insn))
16968 continue;
16970 if (!CALL_P (insn))
16971 continue;
16973 pat = PATTERN (insn);
16974 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16975 call = XVECEXP (pat, 0, 0);
16977 /* Get the real call RTX if the insn sets a value, ie. returns. */
16978 if (GET_CODE (call) == SET)
16979 call = SET_SRC (call);
16981 /* Check if it is a cmse_nonsecure_call. */
16982 unspec = XEXP (call, 0);
16983 if (GET_CODE (unspec) != UNSPEC
16984 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16985 continue;
16987 /* Determine the caller-saved registers we need to clear. */
16988 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16989 maxregno = NUM_ARG_REGS - 1;
16990 /* Only look at the caller-saved floating point registers in case of
16991 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16992 lazy store and loads which clear both caller- and callee-saved
16993 registers. */
16994 if (TARGET_HARD_FLOAT_ABI)
16996 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16997 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16998 to_clear_mask |= float_mask;
16999 maxregno = D7_VFP_REGNUM;
17002 /* Make sure the register used to hold the function address is not
17003 cleared. */
17004 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17005 gcc_assert (MEM_P (address));
17006 gcc_assert (REG_P (XEXP (address, 0)));
17007 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17009 /* Set basic block of call insn so that df rescan is performed on
17010 insns inserted here. */
17011 set_block_for_insn (insn, bb);
17012 df_set_flags (DF_DEFER_INSN_RESCAN);
17013 start_sequence ();
17015 /* Make sure the scheduler doesn't schedule other insns beyond
17016 here. */
17017 emit_insn (gen_blockage ());
17019 /* Walk through all arguments and clear registers appropriately.
17021 fntype = TREE_TYPE (MEM_EXPR (address));
17022 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17023 NULL_TREE);
17024 args_so_far = pack_cumulative_args (&args_so_far_v);
17025 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17027 rtx arg_rtx;
17028 machine_mode arg_mode = TYPE_MODE (arg_type);
17030 if (VOID_TYPE_P (arg_type))
17031 continue;
17033 if (!first_param)
17034 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17035 true);
17037 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17038 true);
17039 gcc_assert (REG_P (arg_rtx));
17040 to_clear_mask
17041 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17042 REGNO (arg_rtx),
17043 padding_bits_to_clear_ptr);
17045 first_param = false;
17048 /* Clear padding bits where needed. */
17049 cleared_reg = XEXP (address, 0);
17050 reg = gen_rtx_REG (SImode, IP_REGNUM);
17051 using_r4 = false;
17052 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17054 if (padding_bits_to_clear[regno] == 0)
17055 continue;
17057 /* If this is a Thumb-1 target copy the address of the function
17058 we are calling from 'r4' into 'ip' such that we can use r4 to
17059 clear the unused bits in the arguments. */
17060 if (TARGET_THUMB1 && !using_r4)
17062 using_r4 = true;
17063 reg = cleared_reg;
17064 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17065 reg);
17068 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17069 emit_move_insn (reg, tmp);
17070 /* Also fill the top half of the negated
17071 padding_bits_to_clear. */
17072 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17074 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17075 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17076 GEN_INT (16),
17077 GEN_INT (16)),
17078 tmp));
17081 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17082 gen_rtx_REG (SImode, regno),
17083 reg));
17086 if (using_r4)
17087 emit_move_insn (cleared_reg,
17088 gen_rtx_REG (SImode, IP_REGNUM));
17090 /* We use right shift and left shift to clear the LSB of the address
17091 we jump to instead of using bic, to avoid having to use an extra
17092 register on Thumb-1. */
17093 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17094 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17095 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17096 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17098 /* Clearing all registers that leak before doing a non-secure
17099 call. */
17100 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17102 if (!(to_clear_mask & (1LL << regno)))
17103 continue;
17105 /* If regno is an even vfp register and its successor is also to
17106 be cleared, use vmov. */
17107 if (IS_VFP_REGNUM (regno))
17109 if (TARGET_VFP_DOUBLE
17110 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17111 && to_clear_mask & (1LL << (regno + 1)))
17112 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17113 CONST0_RTX (DFmode));
17114 else
17115 emit_move_insn (gen_rtx_REG (SFmode, regno),
17116 CONST0_RTX (SFmode));
17118 else
17119 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17122 seq = get_insns ();
17123 end_sequence ();
17124 emit_insn_before (seq, insn);
17130 /* Rewrite move insn into subtract of 0 if the condition codes will
17131 be useful in next conditional jump insn. */
17133 static void
17134 thumb1_reorg (void)
17136 basic_block bb;
17138 FOR_EACH_BB_FN (bb, cfun)
17140 rtx dest, src;
17141 rtx cmp, op0, op1, set = NULL;
17142 rtx_insn *prev, *insn = BB_END (bb);
17143 bool insn_clobbered = false;
17145 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17146 insn = PREV_INSN (insn);
17148 /* Find the last cbranchsi4_insn in basic block BB. */
17149 if (insn == BB_HEAD (bb)
17150 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17151 continue;
17153 /* Get the register with which we are comparing. */
17154 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17155 op0 = XEXP (cmp, 0);
17156 op1 = XEXP (cmp, 1);
17158 /* Check that comparison is against ZERO. */
17159 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17160 continue;
17162 /* Find the first flag setting insn before INSN in basic block BB. */
17163 gcc_assert (insn != BB_HEAD (bb));
17164 for (prev = PREV_INSN (insn);
17165 (!insn_clobbered
17166 && prev != BB_HEAD (bb)
17167 && (NOTE_P (prev)
17168 || DEBUG_INSN_P (prev)
17169 || ((set = single_set (prev)) != NULL
17170 && get_attr_conds (prev) == CONDS_NOCOND)));
17171 prev = PREV_INSN (prev))
17173 if (reg_set_p (op0, prev))
17174 insn_clobbered = true;
17177 /* Skip if op0 is clobbered by insn other than prev. */
17178 if (insn_clobbered)
17179 continue;
17181 if (!set)
17182 continue;
17184 dest = SET_DEST (set);
17185 src = SET_SRC (set);
17186 if (!low_register_operand (dest, SImode)
17187 || !low_register_operand (src, SImode))
17188 continue;
17190 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17191 in INSN. Both src and dest of the move insn are checked. */
17192 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17194 dest = copy_rtx (dest);
17195 src = copy_rtx (src);
17196 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17197 PATTERN (prev) = gen_rtx_SET (dest, src);
17198 INSN_CODE (prev) = -1;
17199 /* Set test register in INSN to dest. */
17200 XEXP (cmp, 0) = copy_rtx (dest);
17201 INSN_CODE (insn) = -1;
17206 /* Convert instructions to their cc-clobbering variant if possible, since
17207 that allows us to use smaller encodings. */
17209 static void
17210 thumb2_reorg (void)
17212 basic_block bb;
17213 regset_head live;
17215 INIT_REG_SET (&live);
17217 /* We are freeing block_for_insn in the toplev to keep compatibility
17218 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17219 compute_bb_for_insn ();
17220 df_analyze ();
17222 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17224 FOR_EACH_BB_FN (bb, cfun)
17226 if ((current_tune->disparage_flag_setting_t16_encodings
17227 == tune_params::DISPARAGE_FLAGS_ALL)
17228 && optimize_bb_for_speed_p (bb))
17229 continue;
17231 rtx_insn *insn;
17232 Convert_Action action = SKIP;
17233 Convert_Action action_for_partial_flag_setting
17234 = ((current_tune->disparage_flag_setting_t16_encodings
17235 != tune_params::DISPARAGE_FLAGS_NEITHER)
17236 && optimize_bb_for_speed_p (bb))
17237 ? SKIP : CONV;
17239 COPY_REG_SET (&live, DF_LR_OUT (bb));
17240 df_simulate_initialize_backwards (bb, &live);
17241 FOR_BB_INSNS_REVERSE (bb, insn)
17243 if (NONJUMP_INSN_P (insn)
17244 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17245 && GET_CODE (PATTERN (insn)) == SET)
17247 action = SKIP;
17248 rtx pat = PATTERN (insn);
17249 rtx dst = XEXP (pat, 0);
17250 rtx src = XEXP (pat, 1);
17251 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17253 if (UNARY_P (src) || BINARY_P (src))
17254 op0 = XEXP (src, 0);
17256 if (BINARY_P (src))
17257 op1 = XEXP (src, 1);
17259 if (low_register_operand (dst, SImode))
17261 switch (GET_CODE (src))
17263 case PLUS:
17264 /* Adding two registers and storing the result
17265 in the first source is already a 16-bit
17266 operation. */
17267 if (rtx_equal_p (dst, op0)
17268 && register_operand (op1, SImode))
17269 break;
17271 if (low_register_operand (op0, SImode))
17273 /* ADDS <Rd>,<Rn>,<Rm> */
17274 if (low_register_operand (op1, SImode))
17275 action = CONV;
17276 /* ADDS <Rdn>,#<imm8> */
17277 /* SUBS <Rdn>,#<imm8> */
17278 else if (rtx_equal_p (dst, op0)
17279 && CONST_INT_P (op1)
17280 && IN_RANGE (INTVAL (op1), -255, 255))
17281 action = CONV;
17282 /* ADDS <Rd>,<Rn>,#<imm3> */
17283 /* SUBS <Rd>,<Rn>,#<imm3> */
17284 else if (CONST_INT_P (op1)
17285 && IN_RANGE (INTVAL (op1), -7, 7))
17286 action = CONV;
17288 /* ADCS <Rd>, <Rn> */
17289 else if (GET_CODE (XEXP (src, 0)) == PLUS
17290 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17291 && low_register_operand (XEXP (XEXP (src, 0), 1),
17292 SImode)
17293 && COMPARISON_P (op1)
17294 && cc_register (XEXP (op1, 0), VOIDmode)
17295 && maybe_get_arm_condition_code (op1) == ARM_CS
17296 && XEXP (op1, 1) == const0_rtx)
17297 action = CONV;
17298 break;
17300 case MINUS:
17301 /* RSBS <Rd>,<Rn>,#0
17302 Not handled here: see NEG below. */
17303 /* SUBS <Rd>,<Rn>,#<imm3>
17304 SUBS <Rdn>,#<imm8>
17305 Not handled here: see PLUS above. */
17306 /* SUBS <Rd>,<Rn>,<Rm> */
17307 if (low_register_operand (op0, SImode)
17308 && low_register_operand (op1, SImode))
17309 action = CONV;
17310 break;
17312 case MULT:
17313 /* MULS <Rdm>,<Rn>,<Rdm>
17314 As an exception to the rule, this is only used
17315 when optimizing for size since MULS is slow on all
17316 known implementations. We do not even want to use
17317 MULS in cold code, if optimizing for speed, so we
17318 test the global flag here. */
17319 if (!optimize_size)
17320 break;
17321 /* Fall through. */
17322 case AND:
17323 case IOR:
17324 case XOR:
17325 /* ANDS <Rdn>,<Rm> */
17326 if (rtx_equal_p (dst, op0)
17327 && low_register_operand (op1, SImode))
17328 action = action_for_partial_flag_setting;
17329 else if (rtx_equal_p (dst, op1)
17330 && low_register_operand (op0, SImode))
17331 action = action_for_partial_flag_setting == SKIP
17332 ? SKIP : SWAP_CONV;
17333 break;
17335 case ASHIFTRT:
17336 case ASHIFT:
17337 case LSHIFTRT:
17338 /* ASRS <Rdn>,<Rm> */
17339 /* LSRS <Rdn>,<Rm> */
17340 /* LSLS <Rdn>,<Rm> */
17341 if (rtx_equal_p (dst, op0)
17342 && low_register_operand (op1, SImode))
17343 action = action_for_partial_flag_setting;
17344 /* ASRS <Rd>,<Rm>,#<imm5> */
17345 /* LSRS <Rd>,<Rm>,#<imm5> */
17346 /* LSLS <Rd>,<Rm>,#<imm5> */
17347 else if (low_register_operand (op0, SImode)
17348 && CONST_INT_P (op1)
17349 && IN_RANGE (INTVAL (op1), 0, 31))
17350 action = action_for_partial_flag_setting;
17351 break;
17353 case ROTATERT:
17354 /* RORS <Rdn>,<Rm> */
17355 if (rtx_equal_p (dst, op0)
17356 && low_register_operand (op1, SImode))
17357 action = action_for_partial_flag_setting;
17358 break;
17360 case NOT:
17361 /* MVNS <Rd>,<Rm> */
17362 if (low_register_operand (op0, SImode))
17363 action = action_for_partial_flag_setting;
17364 break;
17366 case NEG:
17367 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17368 if (low_register_operand (op0, SImode))
17369 action = CONV;
17370 break;
17372 case CONST_INT:
17373 /* MOVS <Rd>,#<imm8> */
17374 if (CONST_INT_P (src)
17375 && IN_RANGE (INTVAL (src), 0, 255))
17376 action = action_for_partial_flag_setting;
17377 break;
17379 case REG:
17380 /* MOVS and MOV<c> with registers have different
17381 encodings, so are not relevant here. */
17382 break;
17384 default:
17385 break;
17389 if (action != SKIP)
17391 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17392 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17393 rtvec vec;
17395 if (action == SWAP_CONV)
17397 src = copy_rtx (src);
17398 XEXP (src, 0) = op1;
17399 XEXP (src, 1) = op0;
17400 pat = gen_rtx_SET (dst, src);
17401 vec = gen_rtvec (2, pat, clobber);
17403 else /* action == CONV */
17404 vec = gen_rtvec (2, pat, clobber);
17406 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17407 INSN_CODE (insn) = -1;
17411 if (NONDEBUG_INSN_P (insn))
17412 df_simulate_one_insn_backwards (bb, insn, &live);
17416 CLEAR_REG_SET (&live);
17419 /* Gcc puts the pool in the wrong place for ARM, since we can only
17420 load addresses a limited distance around the pc. We do some
17421 special munging to move the constant pool values to the correct
17422 point in the code. */
17423 static void
17424 arm_reorg (void)
17426 rtx_insn *insn;
17427 HOST_WIDE_INT address = 0;
17428 Mfix * fix;
17430 if (use_cmse)
17431 cmse_nonsecure_call_clear_caller_saved ();
17432 if (TARGET_THUMB1)
17433 thumb1_reorg ();
17434 else if (TARGET_THUMB2)
17435 thumb2_reorg ();
17437 /* Ensure all insns that must be split have been split at this point.
17438 Otherwise, the pool placement code below may compute incorrect
17439 insn lengths. Note that when optimizing, all insns have already
17440 been split at this point. */
17441 if (!optimize)
17442 split_all_insns_noflow ();
17444 /* Make sure we do not attempt to create a literal pool even though it should
17445 no longer be necessary to create any. */
17446 if (arm_disable_literal_pool)
17447 return ;
17449 minipool_fix_head = minipool_fix_tail = NULL;
17451 /* The first insn must always be a note, or the code below won't
17452 scan it properly. */
17453 insn = get_insns ();
17454 gcc_assert (NOTE_P (insn));
17455 minipool_pad = 0;
17457 /* Scan all the insns and record the operands that will need fixing. */
17458 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17460 if (BARRIER_P (insn))
17461 push_minipool_barrier (insn, address);
17462 else if (INSN_P (insn))
17464 rtx_jump_table_data *table;
17466 note_invalid_constants (insn, address, true);
17467 address += get_attr_length (insn);
17469 /* If the insn is a vector jump, add the size of the table
17470 and skip the table. */
17471 if (tablejump_p (insn, NULL, &table))
17473 address += get_jump_table_size (table);
17474 insn = table;
17477 else if (LABEL_P (insn))
17478 /* Add the worst-case padding due to alignment. We don't add
17479 the _current_ padding because the minipool insertions
17480 themselves might change it. */
17481 address += get_label_padding (insn);
17484 fix = minipool_fix_head;
17486 /* Now scan the fixups and perform the required changes. */
17487 while (fix)
17489 Mfix * ftmp;
17490 Mfix * fdel;
17491 Mfix * last_added_fix;
17492 Mfix * last_barrier = NULL;
17493 Mfix * this_fix;
17495 /* Skip any further barriers before the next fix. */
17496 while (fix && BARRIER_P (fix->insn))
17497 fix = fix->next;
17499 /* No more fixes. */
17500 if (fix == NULL)
17501 break;
17503 last_added_fix = NULL;
17505 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17507 if (BARRIER_P (ftmp->insn))
17509 if (ftmp->address >= minipool_vector_head->max_address)
17510 break;
17512 last_barrier = ftmp;
17514 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17515 break;
17517 last_added_fix = ftmp; /* Keep track of the last fix added. */
17520 /* If we found a barrier, drop back to that; any fixes that we
17521 could have reached but come after the barrier will now go in
17522 the next mini-pool. */
17523 if (last_barrier != NULL)
17525 /* Reduce the refcount for those fixes that won't go into this
17526 pool after all. */
17527 for (fdel = last_barrier->next;
17528 fdel && fdel != ftmp;
17529 fdel = fdel->next)
17531 fdel->minipool->refcount--;
17532 fdel->minipool = NULL;
17535 ftmp = last_barrier;
17537 else
17539 /* ftmp is first fix that we can't fit into this pool and
17540 there no natural barriers that we could use. Insert a
17541 new barrier in the code somewhere between the previous
17542 fix and this one, and arrange to jump around it. */
17543 HOST_WIDE_INT max_address;
17545 /* The last item on the list of fixes must be a barrier, so
17546 we can never run off the end of the list of fixes without
17547 last_barrier being set. */
17548 gcc_assert (ftmp);
17550 max_address = minipool_vector_head->max_address;
17551 /* Check that there isn't another fix that is in range that
17552 we couldn't fit into this pool because the pool was
17553 already too large: we need to put the pool before such an
17554 instruction. The pool itself may come just after the
17555 fix because create_fix_barrier also allows space for a
17556 jump instruction. */
17557 if (ftmp->address < max_address)
17558 max_address = ftmp->address + 1;
17560 last_barrier = create_fix_barrier (last_added_fix, max_address);
17563 assign_minipool_offsets (last_barrier);
17565 while (ftmp)
17567 if (!BARRIER_P (ftmp->insn)
17568 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17569 == NULL))
17570 break;
17572 ftmp = ftmp->next;
17575 /* Scan over the fixes we have identified for this pool, fixing them
17576 up and adding the constants to the pool itself. */
17577 for (this_fix = fix; this_fix && ftmp != this_fix;
17578 this_fix = this_fix->next)
17579 if (!BARRIER_P (this_fix->insn))
17581 rtx addr
17582 = plus_constant (Pmode,
17583 gen_rtx_LABEL_REF (VOIDmode,
17584 minipool_vector_label),
17585 this_fix->minipool->offset);
17586 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17589 dump_minipool (last_barrier->insn);
17590 fix = ftmp;
17593 /* From now on we must synthesize any constants that we can't handle
17594 directly. This can happen if the RTL gets split during final
17595 instruction generation. */
17596 cfun->machine->after_arm_reorg = 1;
17598 /* Free the minipool memory. */
17599 obstack_free (&minipool_obstack, minipool_startobj);
17602 /* Routines to output assembly language. */
17604 /* Return string representation of passed in real value. */
17605 static const char *
17606 fp_const_from_val (REAL_VALUE_TYPE *r)
17608 if (!fp_consts_inited)
17609 init_fp_table ();
17611 gcc_assert (real_equal (r, &value_fp0));
17612 return "0";
17615 /* OPERANDS[0] is the entire list of insns that constitute pop,
17616 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17617 is in the list, UPDATE is true iff the list contains explicit
17618 update of base register. */
17619 void
17620 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17621 bool update)
17623 int i;
17624 char pattern[100];
17625 int offset;
17626 const char *conditional;
17627 int num_saves = XVECLEN (operands[0], 0);
17628 unsigned int regno;
17629 unsigned int regno_base = REGNO (operands[1]);
17630 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17632 offset = 0;
17633 offset += update ? 1 : 0;
17634 offset += return_pc ? 1 : 0;
17636 /* Is the base register in the list? */
17637 for (i = offset; i < num_saves; i++)
17639 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17640 /* If SP is in the list, then the base register must be SP. */
17641 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17642 /* If base register is in the list, there must be no explicit update. */
17643 if (regno == regno_base)
17644 gcc_assert (!update);
17647 conditional = reverse ? "%?%D0" : "%?%d0";
17648 /* Can't use POP if returning from an interrupt. */
17649 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17650 sprintf (pattern, "pop%s\t{", conditional);
17651 else
17653 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17654 It's just a convention, their semantics are identical. */
17655 if (regno_base == SP_REGNUM)
17656 sprintf (pattern, "ldmfd%s\t", conditional);
17657 else if (update)
17658 sprintf (pattern, "ldmia%s\t", conditional);
17659 else
17660 sprintf (pattern, "ldm%s\t", conditional);
17662 strcat (pattern, reg_names[regno_base]);
17663 if (update)
17664 strcat (pattern, "!, {");
17665 else
17666 strcat (pattern, ", {");
17669 /* Output the first destination register. */
17670 strcat (pattern,
17671 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17673 /* Output the rest of the destination registers. */
17674 for (i = offset + 1; i < num_saves; i++)
17676 strcat (pattern, ", ");
17677 strcat (pattern,
17678 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17681 strcat (pattern, "}");
17683 if (interrupt_p && return_pc)
17684 strcat (pattern, "^");
17686 output_asm_insn (pattern, &cond);
17690 /* Output the assembly for a store multiple. */
17692 const char *
17693 vfp_output_vstmd (rtx * operands)
17695 char pattern[100];
17696 int p;
17697 int base;
17698 int i;
17699 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17700 ? XEXP (operands[0], 0)
17701 : XEXP (XEXP (operands[0], 0), 0);
17702 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17704 if (push_p)
17705 strcpy (pattern, "vpush%?.64\t{%P1");
17706 else
17707 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17709 p = strlen (pattern);
17711 gcc_assert (REG_P (operands[1]));
17713 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17714 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17716 p += sprintf (&pattern[p], ", d%d", base + i);
17718 strcpy (&pattern[p], "}");
17720 output_asm_insn (pattern, operands);
17721 return "";
17725 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17726 number of bytes pushed. */
17728 static int
17729 vfp_emit_fstmd (int base_reg, int count)
17731 rtx par;
17732 rtx dwarf;
17733 rtx tmp, reg;
17734 int i;
17736 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17737 register pairs are stored by a store multiple insn. We avoid this
17738 by pushing an extra pair. */
17739 if (count == 2 && !arm_arch6)
17741 if (base_reg == LAST_VFP_REGNUM - 3)
17742 base_reg -= 2;
17743 count++;
17746 /* FSTMD may not store more than 16 doubleword registers at once. Split
17747 larger stores into multiple parts (up to a maximum of two, in
17748 practice). */
17749 if (count > 16)
17751 int saved;
17752 /* NOTE: base_reg is an internal register number, so each D register
17753 counts as 2. */
17754 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17755 saved += vfp_emit_fstmd (base_reg, 16);
17756 return saved;
17759 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17760 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17762 reg = gen_rtx_REG (DFmode, base_reg);
17763 base_reg += 2;
17765 XVECEXP (par, 0, 0)
17766 = gen_rtx_SET (gen_frame_mem
17767 (BLKmode,
17768 gen_rtx_PRE_MODIFY (Pmode,
17769 stack_pointer_rtx,
17770 plus_constant
17771 (Pmode, stack_pointer_rtx,
17772 - (count * 8)))
17774 gen_rtx_UNSPEC (BLKmode,
17775 gen_rtvec (1, reg),
17776 UNSPEC_PUSH_MULT));
17778 tmp = gen_rtx_SET (stack_pointer_rtx,
17779 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17780 RTX_FRAME_RELATED_P (tmp) = 1;
17781 XVECEXP (dwarf, 0, 0) = tmp;
17783 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17784 RTX_FRAME_RELATED_P (tmp) = 1;
17785 XVECEXP (dwarf, 0, 1) = tmp;
17787 for (i = 1; i < count; i++)
17789 reg = gen_rtx_REG (DFmode, base_reg);
17790 base_reg += 2;
17791 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17793 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17794 plus_constant (Pmode,
17795 stack_pointer_rtx,
17796 i * 8)),
17797 reg);
17798 RTX_FRAME_RELATED_P (tmp) = 1;
17799 XVECEXP (dwarf, 0, i + 1) = tmp;
17802 par = emit_insn (par);
17803 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17804 RTX_FRAME_RELATED_P (par) = 1;
17806 return count * 8;
17809 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17810 has the cmse_nonsecure_call attribute and returns false otherwise. */
17812 bool
17813 detect_cmse_nonsecure_call (tree addr)
17815 if (!addr)
17816 return FALSE;
17818 tree fntype = TREE_TYPE (addr);
17819 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17820 TYPE_ATTRIBUTES (fntype)))
17821 return TRUE;
17822 return FALSE;
17826 /* Emit a call instruction with pattern PAT. ADDR is the address of
17827 the call target. */
17829 void
17830 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17832 rtx insn;
17834 insn = emit_call_insn (pat);
17836 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17837 If the call might use such an entry, add a use of the PIC register
17838 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17839 if (TARGET_VXWORKS_RTP
17840 && flag_pic
17841 && !sibcall
17842 && GET_CODE (addr) == SYMBOL_REF
17843 && (SYMBOL_REF_DECL (addr)
17844 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17845 : !SYMBOL_REF_LOCAL_P (addr)))
17847 require_pic_register ();
17848 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17851 if (TARGET_AAPCS_BASED)
17853 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17854 linker. We need to add an IP clobber to allow setting
17855 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17856 is not needed since it's a fixed register. */
17857 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17858 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17862 /* Output a 'call' insn. */
17863 const char *
17864 output_call (rtx *operands)
17866 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17868 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17869 if (REGNO (operands[0]) == LR_REGNUM)
17871 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17872 output_asm_insn ("mov%?\t%0, %|lr", operands);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17877 if (TARGET_INTERWORK || arm_arch4t)
17878 output_asm_insn ("bx%?\t%0", operands);
17879 else
17880 output_asm_insn ("mov%?\t%|pc, %0", operands);
17882 return "";
17885 /* Output a move from arm registers to arm registers of a long double
17886 OPERANDS[0] is the destination.
17887 OPERANDS[1] is the source. */
17888 const char *
17889 output_mov_long_double_arm_from_arm (rtx *operands)
17891 /* We have to be careful here because the two might overlap. */
17892 int dest_start = REGNO (operands[0]);
17893 int src_start = REGNO (operands[1]);
17894 rtx ops[2];
17895 int i;
17897 if (dest_start < src_start)
17899 for (i = 0; i < 3; i++)
17901 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17902 ops[1] = gen_rtx_REG (SImode, src_start + i);
17903 output_asm_insn ("mov%?\t%0, %1", ops);
17906 else
17908 for (i = 2; i >= 0; i--)
17910 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17911 ops[1] = gen_rtx_REG (SImode, src_start + i);
17912 output_asm_insn ("mov%?\t%0, %1", ops);
17916 return "";
17919 void
17920 arm_emit_movpair (rtx dest, rtx src)
17922 /* If the src is an immediate, simplify it. */
17923 if (CONST_INT_P (src))
17925 HOST_WIDE_INT val = INTVAL (src);
17926 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17927 if ((val >> 16) & 0x0000ffff)
17929 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17930 GEN_INT (16)),
17931 GEN_INT ((val >> 16) & 0x0000ffff));
17932 rtx_insn *insn = get_last_insn ();
17933 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17935 return;
17937 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17938 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17939 rtx_insn *insn = get_last_insn ();
17940 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17943 /* Output a move between double words. It must be REG<-MEM
17944 or MEM<-REG. */
17945 const char *
17946 output_move_double (rtx *operands, bool emit, int *count)
17948 enum rtx_code code0 = GET_CODE (operands[0]);
17949 enum rtx_code code1 = GET_CODE (operands[1]);
17950 rtx otherops[3];
17951 if (count)
17952 *count = 1;
17954 /* The only case when this might happen is when
17955 you are looking at the length of a DImode instruction
17956 that has an invalid constant in it. */
17957 if (code0 == REG && code1 != MEM)
17959 gcc_assert (!emit);
17960 *count = 2;
17961 return "";
17964 if (code0 == REG)
17966 unsigned int reg0 = REGNO (operands[0]);
17968 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17970 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17972 switch (GET_CODE (XEXP (operands[1], 0)))
17974 case REG:
17976 if (emit)
17978 if (TARGET_LDRD
17979 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17980 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17981 else
17982 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17984 break;
17986 case PRE_INC:
17987 gcc_assert (TARGET_LDRD);
17988 if (emit)
17989 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17990 break;
17992 case PRE_DEC:
17993 if (emit)
17995 if (TARGET_LDRD)
17996 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17997 else
17998 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18000 break;
18002 case POST_INC:
18003 if (emit)
18005 if (TARGET_LDRD)
18006 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18007 else
18008 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18010 break;
18012 case POST_DEC:
18013 gcc_assert (TARGET_LDRD);
18014 if (emit)
18015 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18016 break;
18018 case PRE_MODIFY:
18019 case POST_MODIFY:
18020 /* Autoicrement addressing modes should never have overlapping
18021 base and destination registers, and overlapping index registers
18022 are already prohibited, so this doesn't need to worry about
18023 fix_cm3_ldrd. */
18024 otherops[0] = operands[0];
18025 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18026 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18028 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18030 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18032 /* Registers overlap so split out the increment. */
18033 if (emit)
18035 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18036 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18038 if (count)
18039 *count = 2;
18041 else
18043 /* Use a single insn if we can.
18044 FIXME: IWMMXT allows offsets larger than ldrd can
18045 handle, fix these up with a pair of ldr. */
18046 if (TARGET_THUMB2
18047 || !CONST_INT_P (otherops[2])
18048 || (INTVAL (otherops[2]) > -256
18049 && INTVAL (otherops[2]) < 256))
18051 if (emit)
18052 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18054 else
18056 if (emit)
18058 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18059 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18061 if (count)
18062 *count = 2;
18067 else
18069 /* Use a single insn if we can.
18070 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18071 fix these up with a pair of ldr. */
18072 if (TARGET_THUMB2
18073 || !CONST_INT_P (otherops[2])
18074 || (INTVAL (otherops[2]) > -256
18075 && INTVAL (otherops[2]) < 256))
18077 if (emit)
18078 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18080 else
18082 if (emit)
18084 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18085 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18087 if (count)
18088 *count = 2;
18091 break;
18093 case LABEL_REF:
18094 case CONST:
18095 /* We might be able to use ldrd %0, %1 here. However the range is
18096 different to ldr/adr, and it is broken on some ARMv7-M
18097 implementations. */
18098 /* Use the second register of the pair to avoid problematic
18099 overlap. */
18100 otherops[1] = operands[1];
18101 if (emit)
18102 output_asm_insn ("adr%?\t%0, %1", otherops);
18103 operands[1] = otherops[0];
18104 if (emit)
18106 if (TARGET_LDRD)
18107 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18108 else
18109 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18112 if (count)
18113 *count = 2;
18114 break;
18116 /* ??? This needs checking for thumb2. */
18117 default:
18118 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18119 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18121 otherops[0] = operands[0];
18122 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18123 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18125 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18127 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18129 switch ((int) INTVAL (otherops[2]))
18131 case -8:
18132 if (emit)
18133 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18134 return "";
18135 case -4:
18136 if (TARGET_THUMB2)
18137 break;
18138 if (emit)
18139 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18140 return "";
18141 case 4:
18142 if (TARGET_THUMB2)
18143 break;
18144 if (emit)
18145 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18146 return "";
18149 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18150 operands[1] = otherops[0];
18151 if (TARGET_LDRD
18152 && (REG_P (otherops[2])
18153 || TARGET_THUMB2
18154 || (CONST_INT_P (otherops[2])
18155 && INTVAL (otherops[2]) > -256
18156 && INTVAL (otherops[2]) < 256)))
18158 if (reg_overlap_mentioned_p (operands[0],
18159 otherops[2]))
18161 /* Swap base and index registers over to
18162 avoid a conflict. */
18163 std::swap (otherops[1], otherops[2]);
18165 /* If both registers conflict, it will usually
18166 have been fixed by a splitter. */
18167 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18168 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18170 if (emit)
18172 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18173 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18175 if (count)
18176 *count = 2;
18178 else
18180 otherops[0] = operands[0];
18181 if (emit)
18182 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18184 return "";
18187 if (CONST_INT_P (otherops[2]))
18189 if (emit)
18191 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18192 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18193 else
18194 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18197 else
18199 if (emit)
18200 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18203 else
18205 if (emit)
18206 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18209 if (count)
18210 *count = 2;
18212 if (TARGET_LDRD)
18213 return "ldrd%?\t%0, [%1]";
18215 return "ldmia%?\t%1, %M0";
18217 else
18219 otherops[1] = adjust_address (operands[1], SImode, 4);
18220 /* Take care of overlapping base/data reg. */
18221 if (reg_mentioned_p (operands[0], operands[1]))
18223 if (emit)
18225 output_asm_insn ("ldr%?\t%0, %1", otherops);
18226 output_asm_insn ("ldr%?\t%0, %1", operands);
18228 if (count)
18229 *count = 2;
18232 else
18234 if (emit)
18236 output_asm_insn ("ldr%?\t%0, %1", operands);
18237 output_asm_insn ("ldr%?\t%0, %1", otherops);
18239 if (count)
18240 *count = 2;
18245 else
18247 /* Constraints should ensure this. */
18248 gcc_assert (code0 == MEM && code1 == REG);
18249 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18250 || (TARGET_ARM && TARGET_LDRD));
18252 switch (GET_CODE (XEXP (operands[0], 0)))
18254 case REG:
18255 if (emit)
18257 if (TARGET_LDRD)
18258 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18259 else
18260 output_asm_insn ("stm%?\t%m0, %M1", operands);
18262 break;
18264 case PRE_INC:
18265 gcc_assert (TARGET_LDRD);
18266 if (emit)
18267 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18268 break;
18270 case PRE_DEC:
18271 if (emit)
18273 if (TARGET_LDRD)
18274 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18275 else
18276 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18278 break;
18280 case POST_INC:
18281 if (emit)
18283 if (TARGET_LDRD)
18284 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18285 else
18286 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18288 break;
18290 case POST_DEC:
18291 gcc_assert (TARGET_LDRD);
18292 if (emit)
18293 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18294 break;
18296 case PRE_MODIFY:
18297 case POST_MODIFY:
18298 otherops[0] = operands[1];
18299 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18300 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18302 /* IWMMXT allows offsets larger than ldrd can handle,
18303 fix these up with a pair of ldr. */
18304 if (!TARGET_THUMB2
18305 && CONST_INT_P (otherops[2])
18306 && (INTVAL(otherops[2]) <= -256
18307 || INTVAL(otherops[2]) >= 256))
18309 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18311 if (emit)
18313 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18314 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18316 if (count)
18317 *count = 2;
18319 else
18321 if (emit)
18323 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18324 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18326 if (count)
18327 *count = 2;
18330 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18332 if (emit)
18333 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18335 else
18337 if (emit)
18338 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18340 break;
18342 case PLUS:
18343 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18344 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18346 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18348 case -8:
18349 if (emit)
18350 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18351 return "";
18353 case -4:
18354 if (TARGET_THUMB2)
18355 break;
18356 if (emit)
18357 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18358 return "";
18360 case 4:
18361 if (TARGET_THUMB2)
18362 break;
18363 if (emit)
18364 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18365 return "";
18368 if (TARGET_LDRD
18369 && (REG_P (otherops[2])
18370 || TARGET_THUMB2
18371 || (CONST_INT_P (otherops[2])
18372 && INTVAL (otherops[2]) > -256
18373 && INTVAL (otherops[2]) < 256)))
18375 otherops[0] = operands[1];
18376 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18377 if (emit)
18378 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18379 return "";
18381 /* Fall through */
18383 default:
18384 otherops[0] = adjust_address (operands[0], SImode, 4);
18385 otherops[1] = operands[1];
18386 if (emit)
18388 output_asm_insn ("str%?\t%1, %0", operands);
18389 output_asm_insn ("str%?\t%H1, %0", otherops);
18391 if (count)
18392 *count = 2;
18396 return "";
18399 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18400 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18402 const char *
18403 output_move_quad (rtx *operands)
18405 if (REG_P (operands[0]))
18407 /* Load, or reg->reg move. */
18409 if (MEM_P (operands[1]))
18411 switch (GET_CODE (XEXP (operands[1], 0)))
18413 case REG:
18414 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18415 break;
18417 case LABEL_REF:
18418 case CONST:
18419 output_asm_insn ("adr%?\t%0, %1", operands);
18420 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18421 break;
18423 default:
18424 gcc_unreachable ();
18427 else
18429 rtx ops[2];
18430 int dest, src, i;
18432 gcc_assert (REG_P (operands[1]));
18434 dest = REGNO (operands[0]);
18435 src = REGNO (operands[1]);
18437 /* This seems pretty dumb, but hopefully GCC won't try to do it
18438 very often. */
18439 if (dest < src)
18440 for (i = 0; i < 4; i++)
18442 ops[0] = gen_rtx_REG (SImode, dest + i);
18443 ops[1] = gen_rtx_REG (SImode, src + i);
18444 output_asm_insn ("mov%?\t%0, %1", ops);
18446 else
18447 for (i = 3; i >= 0; i--)
18449 ops[0] = gen_rtx_REG (SImode, dest + i);
18450 ops[1] = gen_rtx_REG (SImode, src + i);
18451 output_asm_insn ("mov%?\t%0, %1", ops);
18455 else
18457 gcc_assert (MEM_P (operands[0]));
18458 gcc_assert (REG_P (operands[1]));
18459 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18461 switch (GET_CODE (XEXP (operands[0], 0)))
18463 case REG:
18464 output_asm_insn ("stm%?\t%m0, %M1", operands);
18465 break;
18467 default:
18468 gcc_unreachable ();
18472 return "";
18475 /* Output a VFP load or store instruction. */
18477 const char *
18478 output_move_vfp (rtx *operands)
18480 rtx reg, mem, addr, ops[2];
18481 int load = REG_P (operands[0]);
18482 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18483 int sp = (!TARGET_VFP_FP16INST
18484 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18485 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18486 const char *templ;
18487 char buff[50];
18488 machine_mode mode;
18490 reg = operands[!load];
18491 mem = operands[load];
18493 mode = GET_MODE (reg);
18495 gcc_assert (REG_P (reg));
18496 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18497 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18498 || mode == SFmode
18499 || mode == DFmode
18500 || mode == HImode
18501 || mode == SImode
18502 || mode == DImode
18503 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18504 gcc_assert (MEM_P (mem));
18506 addr = XEXP (mem, 0);
18508 switch (GET_CODE (addr))
18510 case PRE_DEC:
18511 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18512 ops[0] = XEXP (addr, 0);
18513 ops[1] = reg;
18514 break;
18516 case POST_INC:
18517 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18518 ops[0] = XEXP (addr, 0);
18519 ops[1] = reg;
18520 break;
18522 default:
18523 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18524 ops[0] = reg;
18525 ops[1] = mem;
18526 break;
18529 sprintf (buff, templ,
18530 load ? "ld" : "st",
18531 dp ? "64" : sp ? "32" : "16",
18532 dp ? "P" : "",
18533 integer_p ? "\t%@ int" : "");
18534 output_asm_insn (buff, ops);
18536 return "";
18539 /* Output a Neon double-word or quad-word load or store, or a load
18540 or store for larger structure modes.
18542 WARNING: The ordering of elements is weird in big-endian mode,
18543 because the EABI requires that vectors stored in memory appear
18544 as though they were stored by a VSTM, as required by the EABI.
18545 GCC RTL defines element ordering based on in-memory order.
18546 This can be different from the architectural ordering of elements
18547 within a NEON register. The intrinsics defined in arm_neon.h use the
18548 NEON register element ordering, not the GCC RTL element ordering.
18550 For example, the in-memory ordering of a big-endian a quadword
18551 vector with 16-bit elements when stored from register pair {d0,d1}
18552 will be (lowest address first, d0[N] is NEON register element N):
18554 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18556 When necessary, quadword registers (dN, dN+1) are moved to ARM
18557 registers from rN in the order:
18559 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18561 So that STM/LDM can be used on vectors in ARM registers, and the
18562 same memory layout will result as if VSTM/VLDM were used.
18564 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18565 possible, which allows use of appropriate alignment tags.
18566 Note that the choice of "64" is independent of the actual vector
18567 element size; this size simply ensures that the behavior is
18568 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18570 Due to limitations of those instructions, use of VST1.64/VLD1.64
18571 is not possible if:
18572 - the address contains PRE_DEC, or
18573 - the mode refers to more than 4 double-word registers
18575 In those cases, it would be possible to replace VSTM/VLDM by a
18576 sequence of instructions; this is not currently implemented since
18577 this is not certain to actually improve performance. */
18579 const char *
18580 output_move_neon (rtx *operands)
18582 rtx reg, mem, addr, ops[2];
18583 int regno, nregs, load = REG_P (operands[0]);
18584 const char *templ;
18585 char buff[50];
18586 machine_mode mode;
18588 reg = operands[!load];
18589 mem = operands[load];
18591 mode = GET_MODE (reg);
18593 gcc_assert (REG_P (reg));
18594 regno = REGNO (reg);
18595 nregs = REG_NREGS (reg) / 2;
18596 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18597 || NEON_REGNO_OK_FOR_QUAD (regno));
18598 gcc_assert (VALID_NEON_DREG_MODE (mode)
18599 || VALID_NEON_QREG_MODE (mode)
18600 || VALID_NEON_STRUCT_MODE (mode));
18601 gcc_assert (MEM_P (mem));
18603 addr = XEXP (mem, 0);
18605 /* Strip off const from addresses like (const (plus (...))). */
18606 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18607 addr = XEXP (addr, 0);
18609 switch (GET_CODE (addr))
18611 case POST_INC:
18612 /* We have to use vldm / vstm for too-large modes. */
18613 if (nregs > 4)
18615 templ = "v%smia%%?\t%%0!, %%h1";
18616 ops[0] = XEXP (addr, 0);
18618 else
18620 templ = "v%s1.64\t%%h1, %%A0";
18621 ops[0] = mem;
18623 ops[1] = reg;
18624 break;
18626 case PRE_DEC:
18627 /* We have to use vldm / vstm in this case, since there is no
18628 pre-decrement form of the vld1 / vst1 instructions. */
18629 templ = "v%smdb%%?\t%%0!, %%h1";
18630 ops[0] = XEXP (addr, 0);
18631 ops[1] = reg;
18632 break;
18634 case POST_MODIFY:
18635 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18636 gcc_unreachable ();
18638 case REG:
18639 /* We have to use vldm / vstm for too-large modes. */
18640 if (nregs > 1)
18642 if (nregs > 4)
18643 templ = "v%smia%%?\t%%m0, %%h1";
18644 else
18645 templ = "v%s1.64\t%%h1, %%A0";
18647 ops[0] = mem;
18648 ops[1] = reg;
18649 break;
18651 /* Fall through. */
18652 case LABEL_REF:
18653 case PLUS:
18655 int i;
18656 int overlap = -1;
18657 for (i = 0; i < nregs; i++)
18659 /* We're only using DImode here because it's a convenient size. */
18660 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18661 ops[1] = adjust_address (mem, DImode, 8 * i);
18662 if (reg_overlap_mentioned_p (ops[0], mem))
18664 gcc_assert (overlap == -1);
18665 overlap = i;
18667 else
18669 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18670 output_asm_insn (buff, ops);
18673 if (overlap != -1)
18675 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18676 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18677 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18678 output_asm_insn (buff, ops);
18681 return "";
18684 default:
18685 gcc_unreachable ();
18688 sprintf (buff, templ, load ? "ld" : "st");
18689 output_asm_insn (buff, ops);
18691 return "";
18694 /* Compute and return the length of neon_mov<mode>, where <mode> is
18695 one of VSTRUCT modes: EI, OI, CI or XI. */
18697 arm_attr_length_move_neon (rtx_insn *insn)
18699 rtx reg, mem, addr;
18700 int load;
18701 machine_mode mode;
18703 extract_insn_cached (insn);
18705 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18707 mode = GET_MODE (recog_data.operand[0]);
18708 switch (mode)
18710 case E_EImode:
18711 case E_OImode:
18712 return 8;
18713 case E_CImode:
18714 return 12;
18715 case E_XImode:
18716 return 16;
18717 default:
18718 gcc_unreachable ();
18722 load = REG_P (recog_data.operand[0]);
18723 reg = recog_data.operand[!load];
18724 mem = recog_data.operand[load];
18726 gcc_assert (MEM_P (mem));
18728 addr = XEXP (mem, 0);
18730 /* Strip off const from addresses like (const (plus (...))). */
18731 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18732 addr = XEXP (addr, 0);
18734 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18736 int insns = REG_NREGS (reg) / 2;
18737 return insns * 4;
18739 else
18740 return 4;
18743 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18744 return zero. */
18747 arm_address_offset_is_imm (rtx_insn *insn)
18749 rtx mem, addr;
18751 extract_insn_cached (insn);
18753 if (REG_P (recog_data.operand[0]))
18754 return 0;
18756 mem = recog_data.operand[0];
18758 gcc_assert (MEM_P (mem));
18760 addr = XEXP (mem, 0);
18762 if (REG_P (addr)
18763 || (GET_CODE (addr) == PLUS
18764 && REG_P (XEXP (addr, 0))
18765 && CONST_INT_P (XEXP (addr, 1))))
18766 return 1;
18767 else
18768 return 0;
18771 /* Output an ADD r, s, #n where n may be too big for one instruction.
18772 If adding zero to one register, output nothing. */
18773 const char *
18774 output_add_immediate (rtx *operands)
18776 HOST_WIDE_INT n = INTVAL (operands[2]);
18778 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18780 if (n < 0)
18781 output_multi_immediate (operands,
18782 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18783 -n);
18784 else
18785 output_multi_immediate (operands,
18786 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18790 return "";
18793 /* Output a multiple immediate operation.
18794 OPERANDS is the vector of operands referred to in the output patterns.
18795 INSTR1 is the output pattern to use for the first constant.
18796 INSTR2 is the output pattern to use for subsequent constants.
18797 IMMED_OP is the index of the constant slot in OPERANDS.
18798 N is the constant value. */
18799 static const char *
18800 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18801 int immed_op, HOST_WIDE_INT n)
18803 #if HOST_BITS_PER_WIDE_INT > 32
18804 n &= 0xffffffff;
18805 #endif
18807 if (n == 0)
18809 /* Quick and easy output. */
18810 operands[immed_op] = const0_rtx;
18811 output_asm_insn (instr1, operands);
18813 else
18815 int i;
18816 const char * instr = instr1;
18818 /* Note that n is never zero here (which would give no output). */
18819 for (i = 0; i < 32; i += 2)
18821 if (n & (3 << i))
18823 operands[immed_op] = GEN_INT (n & (255 << i));
18824 output_asm_insn (instr, operands);
18825 instr = instr2;
18826 i += 6;
18831 return "";
18834 /* Return the name of a shifter operation. */
18835 static const char *
18836 arm_shift_nmem(enum rtx_code code)
18838 switch (code)
18840 case ASHIFT:
18841 return ARM_LSL_NAME;
18843 case ASHIFTRT:
18844 return "asr";
18846 case LSHIFTRT:
18847 return "lsr";
18849 case ROTATERT:
18850 return "ror";
18852 default:
18853 abort();
18857 /* Return the appropriate ARM instruction for the operation code.
18858 The returned result should not be overwritten. OP is the rtx of the
18859 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18860 was shifted. */
18861 const char *
18862 arithmetic_instr (rtx op, int shift_first_arg)
18864 switch (GET_CODE (op))
18866 case PLUS:
18867 return "add";
18869 case MINUS:
18870 return shift_first_arg ? "rsb" : "sub";
18872 case IOR:
18873 return "orr";
18875 case XOR:
18876 return "eor";
18878 case AND:
18879 return "and";
18881 case ASHIFT:
18882 case ASHIFTRT:
18883 case LSHIFTRT:
18884 case ROTATERT:
18885 return arm_shift_nmem(GET_CODE(op));
18887 default:
18888 gcc_unreachable ();
18892 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18893 for the operation code. The returned result should not be overwritten.
18894 OP is the rtx code of the shift.
18895 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18896 shift. */
18897 static const char *
18898 shift_op (rtx op, HOST_WIDE_INT *amountp)
18900 const char * mnem;
18901 enum rtx_code code = GET_CODE (op);
18903 switch (code)
18905 case ROTATE:
18906 if (!CONST_INT_P (XEXP (op, 1)))
18908 output_operand_lossage ("invalid shift operand");
18909 return NULL;
18912 code = ROTATERT;
18913 *amountp = 32 - INTVAL (XEXP (op, 1));
18914 mnem = "ror";
18915 break;
18917 case ASHIFT:
18918 case ASHIFTRT:
18919 case LSHIFTRT:
18920 case ROTATERT:
18921 mnem = arm_shift_nmem(code);
18922 if (CONST_INT_P (XEXP (op, 1)))
18924 *amountp = INTVAL (XEXP (op, 1));
18926 else if (REG_P (XEXP (op, 1)))
18928 *amountp = -1;
18929 return mnem;
18931 else
18933 output_operand_lossage ("invalid shift operand");
18934 return NULL;
18936 break;
18938 case MULT:
18939 /* We never have to worry about the amount being other than a
18940 power of 2, since this case can never be reloaded from a reg. */
18941 if (!CONST_INT_P (XEXP (op, 1)))
18943 output_operand_lossage ("invalid shift operand");
18944 return NULL;
18947 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18949 /* Amount must be a power of two. */
18950 if (*amountp & (*amountp - 1))
18952 output_operand_lossage ("invalid shift operand");
18953 return NULL;
18956 *amountp = exact_log2 (*amountp);
18957 gcc_assert (IN_RANGE (*amountp, 0, 31));
18958 return ARM_LSL_NAME;
18960 default:
18961 output_operand_lossage ("invalid shift operand");
18962 return NULL;
18965 /* This is not 100% correct, but follows from the desire to merge
18966 multiplication by a power of 2 with the recognizer for a
18967 shift. >=32 is not a valid shift for "lsl", so we must try and
18968 output a shift that produces the correct arithmetical result.
18969 Using lsr #32 is identical except for the fact that the carry bit
18970 is not set correctly if we set the flags; but we never use the
18971 carry bit from such an operation, so we can ignore that. */
18972 if (code == ROTATERT)
18973 /* Rotate is just modulo 32. */
18974 *amountp &= 31;
18975 else if (*amountp != (*amountp & 31))
18977 if (code == ASHIFT)
18978 mnem = "lsr";
18979 *amountp = 32;
18982 /* Shifts of 0 are no-ops. */
18983 if (*amountp == 0)
18984 return NULL;
18986 return mnem;
18989 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18990 because /bin/as is horribly restrictive. The judgement about
18991 whether or not each character is 'printable' (and can be output as
18992 is) or not (and must be printed with an octal escape) must be made
18993 with reference to the *host* character set -- the situation is
18994 similar to that discussed in the comments above pp_c_char in
18995 c-pretty-print.c. */
18997 #define MAX_ASCII_LEN 51
18999 void
19000 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19002 int i;
19003 int len_so_far = 0;
19005 fputs ("\t.ascii\t\"", stream);
19007 for (i = 0; i < len; i++)
19009 int c = p[i];
19011 if (len_so_far >= MAX_ASCII_LEN)
19013 fputs ("\"\n\t.ascii\t\"", stream);
19014 len_so_far = 0;
19017 if (ISPRINT (c))
19019 if (c == '\\' || c == '\"')
19021 putc ('\\', stream);
19022 len_so_far++;
19024 putc (c, stream);
19025 len_so_far++;
19027 else
19029 fprintf (stream, "\\%03o", c);
19030 len_so_far += 4;
19034 fputs ("\"\n", stream);
19037 /* Whether a register is callee saved or not. This is necessary because high
19038 registers are marked as caller saved when optimizing for size on Thumb-1
19039 targets despite being callee saved in order to avoid using them. */
19040 #define callee_saved_reg_p(reg) \
19041 (!call_used_regs[reg] \
19042 || (TARGET_THUMB1 && optimize_size \
19043 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19045 /* Compute the register save mask for registers 0 through 12
19046 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19048 static unsigned long
19049 arm_compute_save_reg0_reg12_mask (void)
19051 unsigned long func_type = arm_current_func_type ();
19052 unsigned long save_reg_mask = 0;
19053 unsigned int reg;
19055 if (IS_INTERRUPT (func_type))
19057 unsigned int max_reg;
19058 /* Interrupt functions must not corrupt any registers,
19059 even call clobbered ones. If this is a leaf function
19060 we can just examine the registers used by the RTL, but
19061 otherwise we have to assume that whatever function is
19062 called might clobber anything, and so we have to save
19063 all the call-clobbered registers as well. */
19064 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19065 /* FIQ handlers have registers r8 - r12 banked, so
19066 we only need to check r0 - r7, Normal ISRs only
19067 bank r14 and r15, so we must check up to r12.
19068 r13 is the stack pointer which is always preserved,
19069 so we do not need to consider it here. */
19070 max_reg = 7;
19071 else
19072 max_reg = 12;
19074 for (reg = 0; reg <= max_reg; reg++)
19075 if (df_regs_ever_live_p (reg)
19076 || (! crtl->is_leaf && call_used_regs[reg]))
19077 save_reg_mask |= (1 << reg);
19079 /* Also save the pic base register if necessary. */
19080 if (flag_pic
19081 && !TARGET_SINGLE_PIC_BASE
19082 && arm_pic_register != INVALID_REGNUM
19083 && crtl->uses_pic_offset_table)
19084 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19086 else if (IS_VOLATILE(func_type))
19088 /* For noreturn functions we historically omitted register saves
19089 altogether. However this really messes up debugging. As a
19090 compromise save just the frame pointers. Combined with the link
19091 register saved elsewhere this should be sufficient to get
19092 a backtrace. */
19093 if (frame_pointer_needed)
19094 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19095 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19096 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19097 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19098 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19100 else
19102 /* In the normal case we only need to save those registers
19103 which are call saved and which are used by this function. */
19104 for (reg = 0; reg <= 11; reg++)
19105 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19106 save_reg_mask |= (1 << reg);
19108 /* Handle the frame pointer as a special case. */
19109 if (frame_pointer_needed)
19110 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19112 /* If we aren't loading the PIC register,
19113 don't stack it even though it may be live. */
19114 if (flag_pic
19115 && !TARGET_SINGLE_PIC_BASE
19116 && arm_pic_register != INVALID_REGNUM
19117 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19118 || crtl->uses_pic_offset_table))
19119 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19121 /* The prologue will copy SP into R0, so save it. */
19122 if (IS_STACKALIGN (func_type))
19123 save_reg_mask |= 1;
19126 /* Save registers so the exception handler can modify them. */
19127 if (crtl->calls_eh_return)
19129 unsigned int i;
19131 for (i = 0; ; i++)
19133 reg = EH_RETURN_DATA_REGNO (i);
19134 if (reg == INVALID_REGNUM)
19135 break;
19136 save_reg_mask |= 1 << reg;
19140 return save_reg_mask;
19143 /* Return true if r3 is live at the start of the function. */
19145 static bool
19146 arm_r3_live_at_start_p (void)
19148 /* Just look at cfg info, which is still close enough to correct at this
19149 point. This gives false positives for broken functions that might use
19150 uninitialized data that happens to be allocated in r3, but who cares? */
19151 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19154 /* Compute the number of bytes used to store the static chain register on the
19155 stack, above the stack frame. We need to know this accurately to get the
19156 alignment of the rest of the stack frame correct. */
19158 static int
19159 arm_compute_static_chain_stack_bytes (void)
19161 /* See the defining assertion in arm_expand_prologue. */
19162 if (IS_NESTED (arm_current_func_type ())
19163 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19164 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19165 && !df_regs_ever_live_p (LR_REGNUM)))
19166 && arm_r3_live_at_start_p ()
19167 && crtl->args.pretend_args_size == 0)
19168 return 4;
19170 return 0;
19173 /* Compute a bit mask of which core registers need to be
19174 saved on the stack for the current function.
19175 This is used by arm_compute_frame_layout, which may add extra registers. */
19177 static unsigned long
19178 arm_compute_save_core_reg_mask (void)
19180 unsigned int save_reg_mask = 0;
19181 unsigned long func_type = arm_current_func_type ();
19182 unsigned int reg;
19184 if (IS_NAKED (func_type))
19185 /* This should never really happen. */
19186 return 0;
19188 /* If we are creating a stack frame, then we must save the frame pointer,
19189 IP (which will hold the old stack pointer), LR and the PC. */
19190 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19191 save_reg_mask |=
19192 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19193 | (1 << IP_REGNUM)
19194 | (1 << LR_REGNUM)
19195 | (1 << PC_REGNUM);
19197 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19199 /* Decide if we need to save the link register.
19200 Interrupt routines have their own banked link register,
19201 so they never need to save it.
19202 Otherwise if we do not use the link register we do not need to save
19203 it. If we are pushing other registers onto the stack however, we
19204 can save an instruction in the epilogue by pushing the link register
19205 now and then popping it back into the PC. This incurs extra memory
19206 accesses though, so we only do it when optimizing for size, and only
19207 if we know that we will not need a fancy return sequence. */
19208 if (df_regs_ever_live_p (LR_REGNUM)
19209 || (save_reg_mask
19210 && optimize_size
19211 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19212 && !crtl->tail_call_emit
19213 && !crtl->calls_eh_return))
19214 save_reg_mask |= 1 << LR_REGNUM;
19216 if (cfun->machine->lr_save_eliminated)
19217 save_reg_mask &= ~ (1 << LR_REGNUM);
19219 if (TARGET_REALLY_IWMMXT
19220 && ((bit_count (save_reg_mask)
19221 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19222 arm_compute_static_chain_stack_bytes())
19223 ) % 2) != 0)
19225 /* The total number of registers that are going to be pushed
19226 onto the stack is odd. We need to ensure that the stack
19227 is 64-bit aligned before we start to save iWMMXt registers,
19228 and also before we start to create locals. (A local variable
19229 might be a double or long long which we will load/store using
19230 an iWMMXt instruction). Therefore we need to push another
19231 ARM register, so that the stack will be 64-bit aligned. We
19232 try to avoid using the arg registers (r0 -r3) as they might be
19233 used to pass values in a tail call. */
19234 for (reg = 4; reg <= 12; reg++)
19235 if ((save_reg_mask & (1 << reg)) == 0)
19236 break;
19238 if (reg <= 12)
19239 save_reg_mask |= (1 << reg);
19240 else
19242 cfun->machine->sibcall_blocked = 1;
19243 save_reg_mask |= (1 << 3);
19247 /* We may need to push an additional register for use initializing the
19248 PIC base register. */
19249 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19250 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19252 reg = thumb_find_work_register (1 << 4);
19253 if (!call_used_regs[reg])
19254 save_reg_mask |= (1 << reg);
19257 return save_reg_mask;
19260 /* Compute a bit mask of which core registers need to be
19261 saved on the stack for the current function. */
19262 static unsigned long
19263 thumb1_compute_save_core_reg_mask (void)
19265 unsigned long mask;
19266 unsigned reg;
19268 mask = 0;
19269 for (reg = 0; reg < 12; reg ++)
19270 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19271 mask |= 1 << reg;
19273 /* Handle the frame pointer as a special case. */
19274 if (frame_pointer_needed)
19275 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19277 if (flag_pic
19278 && !TARGET_SINGLE_PIC_BASE
19279 && arm_pic_register != INVALID_REGNUM
19280 && crtl->uses_pic_offset_table)
19281 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19283 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19284 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19285 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19287 /* LR will also be pushed if any lo regs are pushed. */
19288 if (mask & 0xff || thumb_force_lr_save ())
19289 mask |= (1 << LR_REGNUM);
19291 /* Make sure we have a low work register if we need one.
19292 We will need one if we are going to push a high register,
19293 but we are not currently intending to push a low register. */
19294 if ((mask & 0xff) == 0
19295 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19297 /* Use thumb_find_work_register to choose which register
19298 we will use. If the register is live then we will
19299 have to push it. Use LAST_LO_REGNUM as our fallback
19300 choice for the register to select. */
19301 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19302 /* Make sure the register returned by thumb_find_work_register is
19303 not part of the return value. */
19304 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19305 reg = LAST_LO_REGNUM;
19307 if (callee_saved_reg_p (reg))
19308 mask |= 1 << reg;
19311 /* The 504 below is 8 bytes less than 512 because there are two possible
19312 alignment words. We can't tell here if they will be present or not so we
19313 have to play it safe and assume that they are. */
19314 if ((CALLER_INTERWORKING_SLOT_SIZE +
19315 ROUND_UP_WORD (get_frame_size ()) +
19316 crtl->outgoing_args_size) >= 504)
19318 /* This is the same as the code in thumb1_expand_prologue() which
19319 determines which register to use for stack decrement. */
19320 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19321 if (mask & (1 << reg))
19322 break;
19324 if (reg > LAST_LO_REGNUM)
19326 /* Make sure we have a register available for stack decrement. */
19327 mask |= 1 << LAST_LO_REGNUM;
19331 return mask;
19335 /* Return the number of bytes required to save VFP registers. */
19336 static int
19337 arm_get_vfp_saved_size (void)
19339 unsigned int regno;
19340 int count;
19341 int saved;
19343 saved = 0;
19344 /* Space for saved VFP registers. */
19345 if (TARGET_HARD_FLOAT)
19347 count = 0;
19348 for (regno = FIRST_VFP_REGNUM;
19349 regno < LAST_VFP_REGNUM;
19350 regno += 2)
19352 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19353 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19355 if (count > 0)
19357 /* Workaround ARM10 VFPr1 bug. */
19358 if (count == 2 && !arm_arch6)
19359 count++;
19360 saved += count * 8;
19362 count = 0;
19364 else
19365 count++;
19367 if (count > 0)
19369 if (count == 2 && !arm_arch6)
19370 count++;
19371 saved += count * 8;
19374 return saved;
19378 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19379 everything bar the final return instruction. If simple_return is true,
19380 then do not output epilogue, because it has already been emitted in RTL. */
19381 const char *
19382 output_return_instruction (rtx operand, bool really_return, bool reverse,
19383 bool simple_return)
19385 char conditional[10];
19386 char instr[100];
19387 unsigned reg;
19388 unsigned long live_regs_mask;
19389 unsigned long func_type;
19390 arm_stack_offsets *offsets;
19392 func_type = arm_current_func_type ();
19394 if (IS_NAKED (func_type))
19395 return "";
19397 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19399 /* If this function was declared non-returning, and we have
19400 found a tail call, then we have to trust that the called
19401 function won't return. */
19402 if (really_return)
19404 rtx ops[2];
19406 /* Otherwise, trap an attempted return by aborting. */
19407 ops[0] = operand;
19408 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19409 : "abort");
19410 assemble_external_libcall (ops[1]);
19411 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19414 return "";
19417 gcc_assert (!cfun->calls_alloca || really_return);
19419 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19421 cfun->machine->return_used_this_function = 1;
19423 offsets = arm_get_frame_offsets ();
19424 live_regs_mask = offsets->saved_regs_mask;
19426 if (!simple_return && live_regs_mask)
19428 const char * return_reg;
19430 /* If we do not have any special requirements for function exit
19431 (e.g. interworking) then we can load the return address
19432 directly into the PC. Otherwise we must load it into LR. */
19433 if (really_return
19434 && !IS_CMSE_ENTRY (func_type)
19435 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19436 return_reg = reg_names[PC_REGNUM];
19437 else
19438 return_reg = reg_names[LR_REGNUM];
19440 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19442 /* There are three possible reasons for the IP register
19443 being saved. 1) a stack frame was created, in which case
19444 IP contains the old stack pointer, or 2) an ISR routine
19445 corrupted it, or 3) it was saved to align the stack on
19446 iWMMXt. In case 1, restore IP into SP, otherwise just
19447 restore IP. */
19448 if (frame_pointer_needed)
19450 live_regs_mask &= ~ (1 << IP_REGNUM);
19451 live_regs_mask |= (1 << SP_REGNUM);
19453 else
19454 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19457 /* On some ARM architectures it is faster to use LDR rather than
19458 LDM to load a single register. On other architectures, the
19459 cost is the same. In 26 bit mode, or for exception handlers,
19460 we have to use LDM to load the PC so that the CPSR is also
19461 restored. */
19462 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19463 if (live_regs_mask == (1U << reg))
19464 break;
19466 if (reg <= LAST_ARM_REGNUM
19467 && (reg != LR_REGNUM
19468 || ! really_return
19469 || ! IS_INTERRUPT (func_type)))
19471 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19472 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19474 else
19476 char *p;
19477 int first = 1;
19479 /* Generate the load multiple instruction to restore the
19480 registers. Note we can get here, even if
19481 frame_pointer_needed is true, but only if sp already
19482 points to the base of the saved core registers. */
19483 if (live_regs_mask & (1 << SP_REGNUM))
19485 unsigned HOST_WIDE_INT stack_adjust;
19487 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19488 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19490 if (stack_adjust && arm_arch5 && TARGET_ARM)
19491 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19492 else
19494 /* If we can't use ldmib (SA110 bug),
19495 then try to pop r3 instead. */
19496 if (stack_adjust)
19497 live_regs_mask |= 1 << 3;
19499 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19502 /* For interrupt returns we have to use an LDM rather than
19503 a POP so that we can use the exception return variant. */
19504 else if (IS_INTERRUPT (func_type))
19505 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19506 else
19507 sprintf (instr, "pop%s\t{", conditional);
19509 p = instr + strlen (instr);
19511 for (reg = 0; reg <= SP_REGNUM; reg++)
19512 if (live_regs_mask & (1 << reg))
19514 int l = strlen (reg_names[reg]);
19516 if (first)
19517 first = 0;
19518 else
19520 memcpy (p, ", ", 2);
19521 p += 2;
19524 memcpy (p, "%|", 2);
19525 memcpy (p + 2, reg_names[reg], l);
19526 p += l + 2;
19529 if (live_regs_mask & (1 << LR_REGNUM))
19531 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19532 /* If returning from an interrupt, restore the CPSR. */
19533 if (IS_INTERRUPT (func_type))
19534 strcat (p, "^");
19536 else
19537 strcpy (p, "}");
19540 output_asm_insn (instr, & operand);
19542 /* See if we need to generate an extra instruction to
19543 perform the actual function return. */
19544 if (really_return
19545 && func_type != ARM_FT_INTERWORKED
19546 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19548 /* The return has already been handled
19549 by loading the LR into the PC. */
19550 return "";
19554 if (really_return)
19556 switch ((int) ARM_FUNC_TYPE (func_type))
19558 case ARM_FT_ISR:
19559 case ARM_FT_FIQ:
19560 /* ??? This is wrong for unified assembly syntax. */
19561 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19562 break;
19564 case ARM_FT_INTERWORKED:
19565 gcc_assert (arm_arch5 || arm_arch4t);
19566 sprintf (instr, "bx%s\t%%|lr", conditional);
19567 break;
19569 case ARM_FT_EXCEPTION:
19570 /* ??? This is wrong for unified assembly syntax. */
19571 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19572 break;
19574 default:
19575 if (IS_CMSE_ENTRY (func_type))
19577 /* Check if we have to clear the 'GE bits' which is only used if
19578 parallel add and subtraction instructions are available. */
19579 if (TARGET_INT_SIMD)
19580 snprintf (instr, sizeof (instr),
19581 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19582 else
19583 snprintf (instr, sizeof (instr),
19584 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19586 output_asm_insn (instr, & operand);
19587 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19589 /* Clear the cumulative exception-status bits (0-4,7) and the
19590 condition code bits (28-31) of the FPSCR. We need to
19591 remember to clear the first scratch register used (IP) and
19592 save and restore the second (r4). */
19593 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19594 output_asm_insn (instr, & operand);
19595 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19596 output_asm_insn (instr, & operand);
19597 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19598 output_asm_insn (instr, & operand);
19599 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19600 output_asm_insn (instr, & operand);
19601 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19602 output_asm_insn (instr, & operand);
19603 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19604 output_asm_insn (instr, & operand);
19605 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19606 output_asm_insn (instr, & operand);
19607 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19608 output_asm_insn (instr, & operand);
19610 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19612 /* Use bx if it's available. */
19613 else if (arm_arch5 || arm_arch4t)
19614 sprintf (instr, "bx%s\t%%|lr", conditional);
19615 else
19616 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19617 break;
19620 output_asm_insn (instr, & operand);
19623 return "";
19626 /* Output in FILE asm statements needed to declare the NAME of the function
19627 defined by its DECL node. */
19629 void
19630 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19632 size_t cmse_name_len;
19633 char *cmse_name = 0;
19634 char cmse_prefix[] = "__acle_se_";
19636 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19637 extra function label for each function with the 'cmse_nonsecure_entry'
19638 attribute. This extra function label should be prepended with
19639 '__acle_se_', telling the linker that it needs to create secure gateway
19640 veneers for this function. */
19641 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19642 DECL_ATTRIBUTES (decl)))
19644 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19645 cmse_name = XALLOCAVEC (char, cmse_name_len);
19646 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19647 targetm.asm_out.globalize_label (file, cmse_name);
19649 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19650 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19653 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19654 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19655 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19656 ASM_OUTPUT_LABEL (file, name);
19658 if (cmse_name)
19659 ASM_OUTPUT_LABEL (file, cmse_name);
19661 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19664 /* Write the function name into the code section, directly preceding
19665 the function prologue.
19667 Code will be output similar to this:
19669 .ascii "arm_poke_function_name", 0
19670 .align
19672 .word 0xff000000 + (t1 - t0)
19673 arm_poke_function_name
19674 mov ip, sp
19675 stmfd sp!, {fp, ip, lr, pc}
19676 sub fp, ip, #4
19678 When performing a stack backtrace, code can inspect the value
19679 of 'pc' stored at 'fp' + 0. If the trace function then looks
19680 at location pc - 12 and the top 8 bits are set, then we know
19681 that there is a function name embedded immediately preceding this
19682 location and has length ((pc[-3]) & 0xff000000).
19684 We assume that pc is declared as a pointer to an unsigned long.
19686 It is of no benefit to output the function name if we are assembling
19687 a leaf function. These function types will not contain a stack
19688 backtrace structure, therefore it is not possible to determine the
19689 function name. */
19690 void
19691 arm_poke_function_name (FILE *stream, const char *name)
19693 unsigned long alignlength;
19694 unsigned long length;
19695 rtx x;
19697 length = strlen (name) + 1;
19698 alignlength = ROUND_UP_WORD (length);
19700 ASM_OUTPUT_ASCII (stream, name, length);
19701 ASM_OUTPUT_ALIGN (stream, 2);
19702 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19703 assemble_aligned_integer (UNITS_PER_WORD, x);
19706 /* Place some comments into the assembler stream
19707 describing the current function. */
19708 static void
19709 arm_output_function_prologue (FILE *f)
19711 unsigned long func_type;
19713 /* Sanity check. */
19714 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19716 func_type = arm_current_func_type ();
19718 switch ((int) ARM_FUNC_TYPE (func_type))
19720 default:
19721 case ARM_FT_NORMAL:
19722 break;
19723 case ARM_FT_INTERWORKED:
19724 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19725 break;
19726 case ARM_FT_ISR:
19727 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19728 break;
19729 case ARM_FT_FIQ:
19730 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19731 break;
19732 case ARM_FT_EXCEPTION:
19733 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19734 break;
19737 if (IS_NAKED (func_type))
19738 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19740 if (IS_VOLATILE (func_type))
19741 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19743 if (IS_NESTED (func_type))
19744 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19745 if (IS_STACKALIGN (func_type))
19746 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19747 if (IS_CMSE_ENTRY (func_type))
19748 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19750 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19751 crtl->args.size,
19752 crtl->args.pretend_args_size,
19753 (HOST_WIDE_INT) get_frame_size ());
19755 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19756 frame_pointer_needed,
19757 cfun->machine->uses_anonymous_args);
19759 if (cfun->machine->lr_save_eliminated)
19760 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19762 if (crtl->calls_eh_return)
19763 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19767 static void
19768 arm_output_function_epilogue (FILE *)
19770 arm_stack_offsets *offsets;
19772 if (TARGET_THUMB1)
19774 int regno;
19776 /* Emit any call-via-reg trampolines that are needed for v4t support
19777 of call_reg and call_value_reg type insns. */
19778 for (regno = 0; regno < LR_REGNUM; regno++)
19780 rtx label = cfun->machine->call_via[regno];
19782 if (label != NULL)
19784 switch_to_section (function_section (current_function_decl));
19785 targetm.asm_out.internal_label (asm_out_file, "L",
19786 CODE_LABEL_NUMBER (label));
19787 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19791 /* ??? Probably not safe to set this here, since it assumes that a
19792 function will be emitted as assembly immediately after we generate
19793 RTL for it. This does not happen for inline functions. */
19794 cfun->machine->return_used_this_function = 0;
19796 else /* TARGET_32BIT */
19798 /* We need to take into account any stack-frame rounding. */
19799 offsets = arm_get_frame_offsets ();
19801 gcc_assert (!use_return_insn (FALSE, NULL)
19802 || (cfun->machine->return_used_this_function != 0)
19803 || offsets->saved_regs == offsets->outgoing_args
19804 || frame_pointer_needed);
19808 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19809 STR and STRD. If an even number of registers are being pushed, one
19810 or more STRD patterns are created for each register pair. If an
19811 odd number of registers are pushed, emit an initial STR followed by
19812 as many STRD instructions as are needed. This works best when the
19813 stack is initially 64-bit aligned (the normal case), since it
19814 ensures that each STRD is also 64-bit aligned. */
19815 static void
19816 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19818 int num_regs = 0;
19819 int i;
19820 int regno;
19821 rtx par = NULL_RTX;
19822 rtx dwarf = NULL_RTX;
19823 rtx tmp;
19824 bool first = true;
19826 num_regs = bit_count (saved_regs_mask);
19828 /* Must be at least one register to save, and can't save SP or PC. */
19829 gcc_assert (num_regs > 0 && num_regs <= 14);
19830 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19831 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19833 /* Create sequence for DWARF info. All the frame-related data for
19834 debugging is held in this wrapper. */
19835 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19837 /* Describe the stack adjustment. */
19838 tmp = gen_rtx_SET (stack_pointer_rtx,
19839 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19840 RTX_FRAME_RELATED_P (tmp) = 1;
19841 XVECEXP (dwarf, 0, 0) = tmp;
19843 /* Find the first register. */
19844 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19847 i = 0;
19849 /* If there's an odd number of registers to push. Start off by
19850 pushing a single register. This ensures that subsequent strd
19851 operations are dword aligned (assuming that SP was originally
19852 64-bit aligned). */
19853 if ((num_regs & 1) != 0)
19855 rtx reg, mem, insn;
19857 reg = gen_rtx_REG (SImode, regno);
19858 if (num_regs == 1)
19859 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19860 stack_pointer_rtx));
19861 else
19862 mem = gen_frame_mem (Pmode,
19863 gen_rtx_PRE_MODIFY
19864 (Pmode, stack_pointer_rtx,
19865 plus_constant (Pmode, stack_pointer_rtx,
19866 -4 * num_regs)));
19868 tmp = gen_rtx_SET (mem, reg);
19869 RTX_FRAME_RELATED_P (tmp) = 1;
19870 insn = emit_insn (tmp);
19871 RTX_FRAME_RELATED_P (insn) = 1;
19872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19873 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19874 RTX_FRAME_RELATED_P (tmp) = 1;
19875 i++;
19876 regno++;
19877 XVECEXP (dwarf, 0, i) = tmp;
19878 first = false;
19881 while (i < num_regs)
19882 if (saved_regs_mask & (1 << regno))
19884 rtx reg1, reg2, mem1, mem2;
19885 rtx tmp0, tmp1, tmp2;
19886 int regno2;
19888 /* Find the register to pair with this one. */
19889 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19890 regno2++)
19893 reg1 = gen_rtx_REG (SImode, regno);
19894 reg2 = gen_rtx_REG (SImode, regno2);
19896 if (first)
19898 rtx insn;
19900 first = false;
19901 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19902 stack_pointer_rtx,
19903 -4 * num_regs));
19904 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19905 stack_pointer_rtx,
19906 -4 * (num_regs - 1)));
19907 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19908 plus_constant (Pmode, stack_pointer_rtx,
19909 -4 * (num_regs)));
19910 tmp1 = gen_rtx_SET (mem1, reg1);
19911 tmp2 = gen_rtx_SET (mem2, reg2);
19912 RTX_FRAME_RELATED_P (tmp0) = 1;
19913 RTX_FRAME_RELATED_P (tmp1) = 1;
19914 RTX_FRAME_RELATED_P (tmp2) = 1;
19915 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19916 XVECEXP (par, 0, 0) = tmp0;
19917 XVECEXP (par, 0, 1) = tmp1;
19918 XVECEXP (par, 0, 2) = tmp2;
19919 insn = emit_insn (par);
19920 RTX_FRAME_RELATED_P (insn) = 1;
19921 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19923 else
19925 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19926 stack_pointer_rtx,
19927 4 * i));
19928 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19929 stack_pointer_rtx,
19930 4 * (i + 1)));
19931 tmp1 = gen_rtx_SET (mem1, reg1);
19932 tmp2 = gen_rtx_SET (mem2, reg2);
19933 RTX_FRAME_RELATED_P (tmp1) = 1;
19934 RTX_FRAME_RELATED_P (tmp2) = 1;
19935 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19936 XVECEXP (par, 0, 0) = tmp1;
19937 XVECEXP (par, 0, 1) = tmp2;
19938 emit_insn (par);
19941 /* Create unwind information. This is an approximation. */
19942 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19943 plus_constant (Pmode,
19944 stack_pointer_rtx,
19945 4 * i)),
19946 reg1);
19947 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19948 plus_constant (Pmode,
19949 stack_pointer_rtx,
19950 4 * (i + 1))),
19951 reg2);
19953 RTX_FRAME_RELATED_P (tmp1) = 1;
19954 RTX_FRAME_RELATED_P (tmp2) = 1;
19955 XVECEXP (dwarf, 0, i + 1) = tmp1;
19956 XVECEXP (dwarf, 0, i + 2) = tmp2;
19957 i += 2;
19958 regno = regno2 + 1;
19960 else
19961 regno++;
19963 return;
19966 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19967 whenever possible, otherwise it emits single-word stores. The first store
19968 also allocates stack space for all saved registers, using writeback with
19969 post-addressing mode. All other stores use offset addressing. If no STRD
19970 can be emitted, this function emits a sequence of single-word stores,
19971 and not an STM as before, because single-word stores provide more freedom
19972 scheduling and can be turned into an STM by peephole optimizations. */
19973 static void
19974 arm_emit_strd_push (unsigned long saved_regs_mask)
19976 int num_regs = 0;
19977 int i, j, dwarf_index = 0;
19978 int offset = 0;
19979 rtx dwarf = NULL_RTX;
19980 rtx insn = NULL_RTX;
19981 rtx tmp, mem;
19983 /* TODO: A more efficient code can be emitted by changing the
19984 layout, e.g., first push all pairs that can use STRD to keep the
19985 stack aligned, and then push all other registers. */
19986 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19987 if (saved_regs_mask & (1 << i))
19988 num_regs++;
19990 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19991 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19992 gcc_assert (num_regs > 0);
19994 /* Create sequence for DWARF info. */
19995 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19997 /* For dwarf info, we generate explicit stack update. */
19998 tmp = gen_rtx_SET (stack_pointer_rtx,
19999 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20000 RTX_FRAME_RELATED_P (tmp) = 1;
20001 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20003 /* Save registers. */
20004 offset = - 4 * num_regs;
20005 j = 0;
20006 while (j <= LAST_ARM_REGNUM)
20007 if (saved_regs_mask & (1 << j))
20009 if ((j % 2 == 0)
20010 && (saved_regs_mask & (1 << (j + 1))))
20012 /* Current register and previous register form register pair for
20013 which STRD can be generated. */
20014 if (offset < 0)
20016 /* Allocate stack space for all saved registers. */
20017 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20018 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20019 mem = gen_frame_mem (DImode, tmp);
20020 offset = 0;
20022 else if (offset > 0)
20023 mem = gen_frame_mem (DImode,
20024 plus_constant (Pmode,
20025 stack_pointer_rtx,
20026 offset));
20027 else
20028 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20030 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20031 RTX_FRAME_RELATED_P (tmp) = 1;
20032 tmp = emit_insn (tmp);
20034 /* Record the first store insn. */
20035 if (dwarf_index == 1)
20036 insn = tmp;
20038 /* Generate dwarf info. */
20039 mem = gen_frame_mem (SImode,
20040 plus_constant (Pmode,
20041 stack_pointer_rtx,
20042 offset));
20043 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20044 RTX_FRAME_RELATED_P (tmp) = 1;
20045 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20047 mem = gen_frame_mem (SImode,
20048 plus_constant (Pmode,
20049 stack_pointer_rtx,
20050 offset + 4));
20051 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20052 RTX_FRAME_RELATED_P (tmp) = 1;
20053 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20055 offset += 8;
20056 j += 2;
20058 else
20060 /* Emit a single word store. */
20061 if (offset < 0)
20063 /* Allocate stack space for all saved registers. */
20064 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20065 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20066 mem = gen_frame_mem (SImode, tmp);
20067 offset = 0;
20069 else if (offset > 0)
20070 mem = gen_frame_mem (SImode,
20071 plus_constant (Pmode,
20072 stack_pointer_rtx,
20073 offset));
20074 else
20075 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20077 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20078 RTX_FRAME_RELATED_P (tmp) = 1;
20079 tmp = emit_insn (tmp);
20081 /* Record the first store insn. */
20082 if (dwarf_index == 1)
20083 insn = tmp;
20085 /* Generate dwarf info. */
20086 mem = gen_frame_mem (SImode,
20087 plus_constant(Pmode,
20088 stack_pointer_rtx,
20089 offset));
20090 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20091 RTX_FRAME_RELATED_P (tmp) = 1;
20092 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20094 offset += 4;
20095 j += 1;
20098 else
20099 j++;
20101 /* Attach dwarf info to the first insn we generate. */
20102 gcc_assert (insn != NULL_RTX);
20103 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20104 RTX_FRAME_RELATED_P (insn) = 1;
20107 /* Generate and emit an insn that we will recognize as a push_multi.
20108 Unfortunately, since this insn does not reflect very well the actual
20109 semantics of the operation, we need to annotate the insn for the benefit
20110 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20111 MASK for registers that should be annotated for DWARF2 frame unwind
20112 information. */
20113 static rtx
20114 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20116 int num_regs = 0;
20117 int num_dwarf_regs = 0;
20118 int i, j;
20119 rtx par;
20120 rtx dwarf;
20121 int dwarf_par_index;
20122 rtx tmp, reg;
20124 /* We don't record the PC in the dwarf frame information. */
20125 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20127 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20129 if (mask & (1 << i))
20130 num_regs++;
20131 if (dwarf_regs_mask & (1 << i))
20132 num_dwarf_regs++;
20135 gcc_assert (num_regs && num_regs <= 16);
20136 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20138 /* For the body of the insn we are going to generate an UNSPEC in
20139 parallel with several USEs. This allows the insn to be recognized
20140 by the push_multi pattern in the arm.md file.
20142 The body of the insn looks something like this:
20144 (parallel [
20145 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20146 (const_int:SI <num>)))
20147 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20148 (use (reg:SI XX))
20149 (use (reg:SI YY))
20153 For the frame note however, we try to be more explicit and actually
20154 show each register being stored into the stack frame, plus a (single)
20155 decrement of the stack pointer. We do it this way in order to be
20156 friendly to the stack unwinding code, which only wants to see a single
20157 stack decrement per instruction. The RTL we generate for the note looks
20158 something like this:
20160 (sequence [
20161 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20162 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20163 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20164 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20168 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20169 instead we'd have a parallel expression detailing all
20170 the stores to the various memory addresses so that debug
20171 information is more up-to-date. Remember however while writing
20172 this to take care of the constraints with the push instruction.
20174 Note also that this has to be taken care of for the VFP registers.
20176 For more see PR43399. */
20178 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20179 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20180 dwarf_par_index = 1;
20182 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184 if (mask & (1 << i))
20186 reg = gen_rtx_REG (SImode, i);
20188 XVECEXP (par, 0, 0)
20189 = gen_rtx_SET (gen_frame_mem
20190 (BLKmode,
20191 gen_rtx_PRE_MODIFY (Pmode,
20192 stack_pointer_rtx,
20193 plus_constant
20194 (Pmode, stack_pointer_rtx,
20195 -4 * num_regs))
20197 gen_rtx_UNSPEC (BLKmode,
20198 gen_rtvec (1, reg),
20199 UNSPEC_PUSH_MULT));
20201 if (dwarf_regs_mask & (1 << i))
20203 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20204 reg);
20205 RTX_FRAME_RELATED_P (tmp) = 1;
20206 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20209 break;
20213 for (j = 1, i++; j < num_regs; i++)
20215 if (mask & (1 << i))
20217 reg = gen_rtx_REG (SImode, i);
20219 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20221 if (dwarf_regs_mask & (1 << i))
20224 = gen_rtx_SET (gen_frame_mem
20225 (SImode,
20226 plus_constant (Pmode, stack_pointer_rtx,
20227 4 * j)),
20228 reg);
20229 RTX_FRAME_RELATED_P (tmp) = 1;
20230 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20233 j++;
20237 par = emit_insn (par);
20239 tmp = gen_rtx_SET (stack_pointer_rtx,
20240 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20241 RTX_FRAME_RELATED_P (tmp) = 1;
20242 XVECEXP (dwarf, 0, 0) = tmp;
20244 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20246 return par;
20249 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20250 SIZE is the offset to be adjusted.
20251 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20252 static void
20253 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20255 rtx dwarf;
20257 RTX_FRAME_RELATED_P (insn) = 1;
20258 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20259 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20262 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20263 SAVED_REGS_MASK shows which registers need to be restored.
20265 Unfortunately, since this insn does not reflect very well the actual
20266 semantics of the operation, we need to annotate the insn for the benefit
20267 of DWARF2 frame unwind information. */
20268 static void
20269 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20271 int num_regs = 0;
20272 int i, j;
20273 rtx par;
20274 rtx dwarf = NULL_RTX;
20275 rtx tmp, reg;
20276 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20277 int offset_adj;
20278 int emit_update;
20280 offset_adj = return_in_pc ? 1 : 0;
20281 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20282 if (saved_regs_mask & (1 << i))
20283 num_regs++;
20285 gcc_assert (num_regs && num_regs <= 16);
20287 /* If SP is in reglist, then we don't emit SP update insn. */
20288 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20290 /* The parallel needs to hold num_regs SETs
20291 and one SET for the stack update. */
20292 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20294 if (return_in_pc)
20295 XVECEXP (par, 0, 0) = ret_rtx;
20297 if (emit_update)
20299 /* Increment the stack pointer, based on there being
20300 num_regs 4-byte registers to restore. */
20301 tmp = gen_rtx_SET (stack_pointer_rtx,
20302 plus_constant (Pmode,
20303 stack_pointer_rtx,
20304 4 * num_regs));
20305 RTX_FRAME_RELATED_P (tmp) = 1;
20306 XVECEXP (par, 0, offset_adj) = tmp;
20309 /* Now restore every reg, which may include PC. */
20310 for (j = 0, i = 0; j < num_regs; i++)
20311 if (saved_regs_mask & (1 << i))
20313 reg = gen_rtx_REG (SImode, i);
20314 if ((num_regs == 1) && emit_update && !return_in_pc)
20316 /* Emit single load with writeback. */
20317 tmp = gen_frame_mem (SImode,
20318 gen_rtx_POST_INC (Pmode,
20319 stack_pointer_rtx));
20320 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20321 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20322 return;
20325 tmp = gen_rtx_SET (reg,
20326 gen_frame_mem
20327 (SImode,
20328 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20329 RTX_FRAME_RELATED_P (tmp) = 1;
20330 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20332 /* We need to maintain a sequence for DWARF info too. As dwarf info
20333 should not have PC, skip PC. */
20334 if (i != PC_REGNUM)
20335 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20337 j++;
20340 if (return_in_pc)
20341 par = emit_jump_insn (par);
20342 else
20343 par = emit_insn (par);
20345 REG_NOTES (par) = dwarf;
20346 if (!return_in_pc)
20347 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20348 stack_pointer_rtx, stack_pointer_rtx);
20351 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20352 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20354 Unfortunately, since this insn does not reflect very well the actual
20355 semantics of the operation, we need to annotate the insn for the benefit
20356 of DWARF2 frame unwind information. */
20357 static void
20358 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20360 int i, j;
20361 rtx par;
20362 rtx dwarf = NULL_RTX;
20363 rtx tmp, reg;
20365 gcc_assert (num_regs && num_regs <= 32);
20367 /* Workaround ARM10 VFPr1 bug. */
20368 if (num_regs == 2 && !arm_arch6)
20370 if (first_reg == 15)
20371 first_reg--;
20373 num_regs++;
20376 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20377 there could be up to 32 D-registers to restore.
20378 If there are more than 16 D-registers, make two recursive calls,
20379 each of which emits one pop_multi instruction. */
20380 if (num_regs > 16)
20382 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20383 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20384 return;
20387 /* The parallel needs to hold num_regs SETs
20388 and one SET for the stack update. */
20389 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20391 /* Increment the stack pointer, based on there being
20392 num_regs 8-byte registers to restore. */
20393 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20394 RTX_FRAME_RELATED_P (tmp) = 1;
20395 XVECEXP (par, 0, 0) = tmp;
20397 /* Now show every reg that will be restored, using a SET for each. */
20398 for (j = 0, i=first_reg; j < num_regs; i += 2)
20400 reg = gen_rtx_REG (DFmode, i);
20402 tmp = gen_rtx_SET (reg,
20403 gen_frame_mem
20404 (DFmode,
20405 plus_constant (Pmode, base_reg, 8 * j)));
20406 RTX_FRAME_RELATED_P (tmp) = 1;
20407 XVECEXP (par, 0, j + 1) = tmp;
20409 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20411 j++;
20414 par = emit_insn (par);
20415 REG_NOTES (par) = dwarf;
20417 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20418 if (REGNO (base_reg) == IP_REGNUM)
20420 RTX_FRAME_RELATED_P (par) = 1;
20421 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20423 else
20424 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20425 base_reg, base_reg);
20428 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20429 number of registers are being popped, multiple LDRD patterns are created for
20430 all register pairs. If odd number of registers are popped, last register is
20431 loaded by using LDR pattern. */
20432 static void
20433 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20435 int num_regs = 0;
20436 int i, j;
20437 rtx par = NULL_RTX;
20438 rtx dwarf = NULL_RTX;
20439 rtx tmp, reg, tmp1;
20440 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20442 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20443 if (saved_regs_mask & (1 << i))
20444 num_regs++;
20446 gcc_assert (num_regs && num_regs <= 16);
20448 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20449 to be popped. So, if num_regs is even, now it will become odd,
20450 and we can generate pop with PC. If num_regs is odd, it will be
20451 even now, and ldr with return can be generated for PC. */
20452 if (return_in_pc)
20453 num_regs--;
20455 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20457 /* Var j iterates over all the registers to gather all the registers in
20458 saved_regs_mask. Var i gives index of saved registers in stack frame.
20459 A PARALLEL RTX of register-pair is created here, so that pattern for
20460 LDRD can be matched. As PC is always last register to be popped, and
20461 we have already decremented num_regs if PC, we don't have to worry
20462 about PC in this loop. */
20463 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20464 if (saved_regs_mask & (1 << j))
20466 /* Create RTX for memory load. */
20467 reg = gen_rtx_REG (SImode, j);
20468 tmp = gen_rtx_SET (reg,
20469 gen_frame_mem (SImode,
20470 plus_constant (Pmode,
20471 stack_pointer_rtx, 4 * i)));
20472 RTX_FRAME_RELATED_P (tmp) = 1;
20474 if (i % 2 == 0)
20476 /* When saved-register index (i) is even, the RTX to be emitted is
20477 yet to be created. Hence create it first. The LDRD pattern we
20478 are generating is :
20479 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20480 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20481 where target registers need not be consecutive. */
20482 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20483 dwarf = NULL_RTX;
20486 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20487 added as 0th element and if i is odd, reg_i is added as 1st element
20488 of LDRD pattern shown above. */
20489 XVECEXP (par, 0, (i % 2)) = tmp;
20490 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20492 if ((i % 2) == 1)
20494 /* When saved-register index (i) is odd, RTXs for both the registers
20495 to be loaded are generated in above given LDRD pattern, and the
20496 pattern can be emitted now. */
20497 par = emit_insn (par);
20498 REG_NOTES (par) = dwarf;
20499 RTX_FRAME_RELATED_P (par) = 1;
20502 i++;
20505 /* If the number of registers pushed is odd AND return_in_pc is false OR
20506 number of registers are even AND return_in_pc is true, last register is
20507 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20508 then LDR with post increment. */
20510 /* Increment the stack pointer, based on there being
20511 num_regs 4-byte registers to restore. */
20512 tmp = gen_rtx_SET (stack_pointer_rtx,
20513 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20514 RTX_FRAME_RELATED_P (tmp) = 1;
20515 tmp = emit_insn (tmp);
20516 if (!return_in_pc)
20518 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20519 stack_pointer_rtx, stack_pointer_rtx);
20522 dwarf = NULL_RTX;
20524 if (((num_regs % 2) == 1 && !return_in_pc)
20525 || ((num_regs % 2) == 0 && return_in_pc))
20527 /* Scan for the single register to be popped. Skip until the saved
20528 register is found. */
20529 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20531 /* Gen LDR with post increment here. */
20532 tmp1 = gen_rtx_MEM (SImode,
20533 gen_rtx_POST_INC (SImode,
20534 stack_pointer_rtx));
20535 set_mem_alias_set (tmp1, get_frame_alias_set ());
20537 reg = gen_rtx_REG (SImode, j);
20538 tmp = gen_rtx_SET (reg, tmp1);
20539 RTX_FRAME_RELATED_P (tmp) = 1;
20540 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20542 if (return_in_pc)
20544 /* If return_in_pc, j must be PC_REGNUM. */
20545 gcc_assert (j == PC_REGNUM);
20546 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20547 XVECEXP (par, 0, 0) = ret_rtx;
20548 XVECEXP (par, 0, 1) = tmp;
20549 par = emit_jump_insn (par);
20551 else
20553 par = emit_insn (tmp);
20554 REG_NOTES (par) = dwarf;
20555 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20556 stack_pointer_rtx, stack_pointer_rtx);
20560 else if ((num_regs % 2) == 1 && return_in_pc)
20562 /* There are 2 registers to be popped. So, generate the pattern
20563 pop_multiple_with_stack_update_and_return to pop in PC. */
20564 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20567 return;
20570 /* LDRD in ARM mode needs consecutive registers as operands. This function
20571 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20572 offset addressing and then generates one separate stack udpate. This provides
20573 more scheduling freedom, compared to writeback on every load. However,
20574 if the function returns using load into PC directly
20575 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20576 before the last load. TODO: Add a peephole optimization to recognize
20577 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20578 peephole optimization to merge the load at stack-offset zero
20579 with the stack update instruction using load with writeback
20580 in post-index addressing mode. */
20581 static void
20582 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20584 int j = 0;
20585 int offset = 0;
20586 rtx par = NULL_RTX;
20587 rtx dwarf = NULL_RTX;
20588 rtx tmp, mem;
20590 /* Restore saved registers. */
20591 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20592 j = 0;
20593 while (j <= LAST_ARM_REGNUM)
20594 if (saved_regs_mask & (1 << j))
20596 if ((j % 2) == 0
20597 && (saved_regs_mask & (1 << (j + 1)))
20598 && (j + 1) != PC_REGNUM)
20600 /* Current register and next register form register pair for which
20601 LDRD can be generated. PC is always the last register popped, and
20602 we handle it separately. */
20603 if (offset > 0)
20604 mem = gen_frame_mem (DImode,
20605 plus_constant (Pmode,
20606 stack_pointer_rtx,
20607 offset));
20608 else
20609 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20611 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20612 tmp = emit_insn (tmp);
20613 RTX_FRAME_RELATED_P (tmp) = 1;
20615 /* Generate dwarf info. */
20617 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20618 gen_rtx_REG (SImode, j),
20619 NULL_RTX);
20620 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20621 gen_rtx_REG (SImode, j + 1),
20622 dwarf);
20624 REG_NOTES (tmp) = dwarf;
20626 offset += 8;
20627 j += 2;
20629 else if (j != PC_REGNUM)
20631 /* Emit a single word load. */
20632 if (offset > 0)
20633 mem = gen_frame_mem (SImode,
20634 plus_constant (Pmode,
20635 stack_pointer_rtx,
20636 offset));
20637 else
20638 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20640 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20641 tmp = emit_insn (tmp);
20642 RTX_FRAME_RELATED_P (tmp) = 1;
20644 /* Generate dwarf info. */
20645 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20646 gen_rtx_REG (SImode, j),
20647 NULL_RTX);
20649 offset += 4;
20650 j += 1;
20652 else /* j == PC_REGNUM */
20653 j++;
20655 else
20656 j++;
20658 /* Update the stack. */
20659 if (offset > 0)
20661 tmp = gen_rtx_SET (stack_pointer_rtx,
20662 plus_constant (Pmode,
20663 stack_pointer_rtx,
20664 offset));
20665 tmp = emit_insn (tmp);
20666 arm_add_cfa_adjust_cfa_note (tmp, offset,
20667 stack_pointer_rtx, stack_pointer_rtx);
20668 offset = 0;
20671 if (saved_regs_mask & (1 << PC_REGNUM))
20673 /* Only PC is to be popped. */
20674 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20675 XVECEXP (par, 0, 0) = ret_rtx;
20676 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20677 gen_frame_mem (SImode,
20678 gen_rtx_POST_INC (SImode,
20679 stack_pointer_rtx)));
20680 RTX_FRAME_RELATED_P (tmp) = 1;
20681 XVECEXP (par, 0, 1) = tmp;
20682 par = emit_jump_insn (par);
20684 /* Generate dwarf info. */
20685 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20686 gen_rtx_REG (SImode, PC_REGNUM),
20687 NULL_RTX);
20688 REG_NOTES (par) = dwarf;
20689 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20690 stack_pointer_rtx, stack_pointer_rtx);
20694 /* Calculate the size of the return value that is passed in registers. */
20695 static unsigned
20696 arm_size_return_regs (void)
20698 machine_mode mode;
20700 if (crtl->return_rtx != 0)
20701 mode = GET_MODE (crtl->return_rtx);
20702 else
20703 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20705 return GET_MODE_SIZE (mode);
20708 /* Return true if the current function needs to save/restore LR. */
20709 static bool
20710 thumb_force_lr_save (void)
20712 return !cfun->machine->lr_save_eliminated
20713 && (!crtl->is_leaf
20714 || thumb_far_jump_used_p ()
20715 || df_regs_ever_live_p (LR_REGNUM));
20718 /* We do not know if r3 will be available because
20719 we do have an indirect tailcall happening in this
20720 particular case. */
20721 static bool
20722 is_indirect_tailcall_p (rtx call)
20724 rtx pat = PATTERN (call);
20726 /* Indirect tail call. */
20727 pat = XVECEXP (pat, 0, 0);
20728 if (GET_CODE (pat) == SET)
20729 pat = SET_SRC (pat);
20731 pat = XEXP (XEXP (pat, 0), 0);
20732 return REG_P (pat);
20735 /* Return true if r3 is used by any of the tail call insns in the
20736 current function. */
20737 static bool
20738 any_sibcall_could_use_r3 (void)
20740 edge_iterator ei;
20741 edge e;
20743 if (!crtl->tail_call_emit)
20744 return false;
20745 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20746 if (e->flags & EDGE_SIBCALL)
20748 rtx_insn *call = BB_END (e->src);
20749 if (!CALL_P (call))
20750 call = prev_nonnote_nondebug_insn (call);
20751 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20752 if (find_regno_fusage (call, USE, 3)
20753 || is_indirect_tailcall_p (call))
20754 return true;
20756 return false;
20760 /* Compute the distance from register FROM to register TO.
20761 These can be the arg pointer (26), the soft frame pointer (25),
20762 the stack pointer (13) or the hard frame pointer (11).
20763 In thumb mode r7 is used as the soft frame pointer, if needed.
20764 Typical stack layout looks like this:
20766 old stack pointer -> | |
20767 ----
20768 | | \
20769 | | saved arguments for
20770 | | vararg functions
20771 | | /
20773 hard FP & arg pointer -> | | \
20774 | | stack
20775 | | frame
20776 | | /
20778 | | \
20779 | | call saved
20780 | | registers
20781 soft frame pointer -> | | /
20783 | | \
20784 | | local
20785 | | variables
20786 locals base pointer -> | | /
20788 | | \
20789 | | outgoing
20790 | | arguments
20791 current stack pointer -> | | /
20794 For a given function some or all of these stack components
20795 may not be needed, giving rise to the possibility of
20796 eliminating some of the registers.
20798 The values returned by this function must reflect the behavior
20799 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20801 The sign of the number returned reflects the direction of stack
20802 growth, so the values are positive for all eliminations except
20803 from the soft frame pointer to the hard frame pointer.
20805 SFP may point just inside the local variables block to ensure correct
20806 alignment. */
20809 /* Return cached stack offsets. */
20811 static arm_stack_offsets *
20812 arm_get_frame_offsets (void)
20814 struct arm_stack_offsets *offsets;
20816 offsets = &cfun->machine->stack_offsets;
20818 return offsets;
20822 /* Calculate stack offsets. These are used to calculate register elimination
20823 offsets and in prologue/epilogue code. Also calculates which registers
20824 should be saved. */
20826 static void
20827 arm_compute_frame_layout (void)
20829 struct arm_stack_offsets *offsets;
20830 unsigned long func_type;
20831 int saved;
20832 int core_saved;
20833 HOST_WIDE_INT frame_size;
20834 int i;
20836 offsets = &cfun->machine->stack_offsets;
20838 /* Initially this is the size of the local variables. It will translated
20839 into an offset once we have determined the size of preceding data. */
20840 frame_size = ROUND_UP_WORD (get_frame_size ());
20842 /* Space for variadic functions. */
20843 offsets->saved_args = crtl->args.pretend_args_size;
20845 /* In Thumb mode this is incorrect, but never used. */
20846 offsets->frame
20847 = (offsets->saved_args
20848 + arm_compute_static_chain_stack_bytes ()
20849 + (frame_pointer_needed ? 4 : 0));
20851 if (TARGET_32BIT)
20853 unsigned int regno;
20855 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20856 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20857 saved = core_saved;
20859 /* We know that SP will be doubleword aligned on entry, and we must
20860 preserve that condition at any subroutine call. We also require the
20861 soft frame pointer to be doubleword aligned. */
20863 if (TARGET_REALLY_IWMMXT)
20865 /* Check for the call-saved iWMMXt registers. */
20866 for (regno = FIRST_IWMMXT_REGNUM;
20867 regno <= LAST_IWMMXT_REGNUM;
20868 regno++)
20869 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20870 saved += 8;
20873 func_type = arm_current_func_type ();
20874 /* Space for saved VFP registers. */
20875 if (! IS_VOLATILE (func_type)
20876 && TARGET_HARD_FLOAT)
20877 saved += arm_get_vfp_saved_size ();
20879 else /* TARGET_THUMB1 */
20881 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20882 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20883 saved = core_saved;
20884 if (TARGET_BACKTRACE)
20885 saved += 16;
20888 /* Saved registers include the stack frame. */
20889 offsets->saved_regs
20890 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20891 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20893 /* A leaf function does not need any stack alignment if it has nothing
20894 on the stack. */
20895 if (crtl->is_leaf && frame_size == 0
20896 /* However if it calls alloca(), we have a dynamically allocated
20897 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20898 && ! cfun->calls_alloca)
20900 offsets->outgoing_args = offsets->soft_frame;
20901 offsets->locals_base = offsets->soft_frame;
20902 return;
20905 /* Ensure SFP has the correct alignment. */
20906 if (ARM_DOUBLEWORD_ALIGN
20907 && (offsets->soft_frame & 7))
20909 offsets->soft_frame += 4;
20910 /* Try to align stack by pushing an extra reg. Don't bother doing this
20911 when there is a stack frame as the alignment will be rolled into
20912 the normal stack adjustment. */
20913 if (frame_size + crtl->outgoing_args_size == 0)
20915 int reg = -1;
20917 /* Register r3 is caller-saved. Normally it does not need to be
20918 saved on entry by the prologue. However if we choose to save
20919 it for padding then we may confuse the compiler into thinking
20920 a prologue sequence is required when in fact it is not. This
20921 will occur when shrink-wrapping if r3 is used as a scratch
20922 register and there are no other callee-saved writes.
20924 This situation can be avoided when other callee-saved registers
20925 are available and r3 is not mandatory if we choose a callee-saved
20926 register for padding. */
20927 bool prefer_callee_reg_p = false;
20929 /* If it is safe to use r3, then do so. This sometimes
20930 generates better code on Thumb-2 by avoiding the need to
20931 use 32-bit push/pop instructions. */
20932 if (! any_sibcall_could_use_r3 ()
20933 && arm_size_return_regs () <= 12
20934 && (offsets->saved_regs_mask & (1 << 3)) == 0
20935 && (TARGET_THUMB2
20936 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20938 reg = 3;
20939 if (!TARGET_THUMB2)
20940 prefer_callee_reg_p = true;
20942 if (reg == -1
20943 || prefer_callee_reg_p)
20945 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20947 /* Avoid fixed registers; they may be changed at
20948 arbitrary times so it's unsafe to restore them
20949 during the epilogue. */
20950 if (!fixed_regs[i]
20951 && (offsets->saved_regs_mask & (1 << i)) == 0)
20953 reg = i;
20954 break;
20959 if (reg != -1)
20961 offsets->saved_regs += 4;
20962 offsets->saved_regs_mask |= (1 << reg);
20967 offsets->locals_base = offsets->soft_frame + frame_size;
20968 offsets->outgoing_args = (offsets->locals_base
20969 + crtl->outgoing_args_size);
20971 if (ARM_DOUBLEWORD_ALIGN)
20973 /* Ensure SP remains doubleword aligned. */
20974 if (offsets->outgoing_args & 7)
20975 offsets->outgoing_args += 4;
20976 gcc_assert (!(offsets->outgoing_args & 7));
20981 /* Calculate the relative offsets for the different stack pointers. Positive
20982 offsets are in the direction of stack growth. */
20984 HOST_WIDE_INT
20985 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20987 arm_stack_offsets *offsets;
20989 offsets = arm_get_frame_offsets ();
20991 /* OK, now we have enough information to compute the distances.
20992 There must be an entry in these switch tables for each pair
20993 of registers in ELIMINABLE_REGS, even if some of the entries
20994 seem to be redundant or useless. */
20995 switch (from)
20997 case ARG_POINTER_REGNUM:
20998 switch (to)
21000 case THUMB_HARD_FRAME_POINTER_REGNUM:
21001 return 0;
21003 case FRAME_POINTER_REGNUM:
21004 /* This is the reverse of the soft frame pointer
21005 to hard frame pointer elimination below. */
21006 return offsets->soft_frame - offsets->saved_args;
21008 case ARM_HARD_FRAME_POINTER_REGNUM:
21009 /* This is only non-zero in the case where the static chain register
21010 is stored above the frame. */
21011 return offsets->frame - offsets->saved_args - 4;
21013 case STACK_POINTER_REGNUM:
21014 /* If nothing has been pushed on the stack at all
21015 then this will return -4. This *is* correct! */
21016 return offsets->outgoing_args - (offsets->saved_args + 4);
21018 default:
21019 gcc_unreachable ();
21021 gcc_unreachable ();
21023 case FRAME_POINTER_REGNUM:
21024 switch (to)
21026 case THUMB_HARD_FRAME_POINTER_REGNUM:
21027 return 0;
21029 case ARM_HARD_FRAME_POINTER_REGNUM:
21030 /* The hard frame pointer points to the top entry in the
21031 stack frame. The soft frame pointer to the bottom entry
21032 in the stack frame. If there is no stack frame at all,
21033 then they are identical. */
21035 return offsets->frame - offsets->soft_frame;
21037 case STACK_POINTER_REGNUM:
21038 return offsets->outgoing_args - offsets->soft_frame;
21040 default:
21041 gcc_unreachable ();
21043 gcc_unreachable ();
21045 default:
21046 /* You cannot eliminate from the stack pointer.
21047 In theory you could eliminate from the hard frame
21048 pointer to the stack pointer, but this will never
21049 happen, since if a stack frame is not needed the
21050 hard frame pointer will never be used. */
21051 gcc_unreachable ();
21055 /* Given FROM and TO register numbers, say whether this elimination is
21056 allowed. Frame pointer elimination is automatically handled.
21058 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21059 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21060 pointer, we must eliminate FRAME_POINTER_REGNUM into
21061 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21062 ARG_POINTER_REGNUM. */
21064 bool
21065 arm_can_eliminate (const int from, const int to)
21067 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21068 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21069 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21070 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21071 true);
21074 /* Emit RTL to save coprocessor registers on function entry. Returns the
21075 number of bytes pushed. */
21077 static int
21078 arm_save_coproc_regs(void)
21080 int saved_size = 0;
21081 unsigned reg;
21082 unsigned start_reg;
21083 rtx insn;
21085 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21086 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21088 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21089 insn = gen_rtx_MEM (V2SImode, insn);
21090 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21091 RTX_FRAME_RELATED_P (insn) = 1;
21092 saved_size += 8;
21095 if (TARGET_HARD_FLOAT)
21097 start_reg = FIRST_VFP_REGNUM;
21099 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21101 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21102 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21104 if (start_reg != reg)
21105 saved_size += vfp_emit_fstmd (start_reg,
21106 (reg - start_reg) / 2);
21107 start_reg = reg + 2;
21110 if (start_reg != reg)
21111 saved_size += vfp_emit_fstmd (start_reg,
21112 (reg - start_reg) / 2);
21114 return saved_size;
21118 /* Set the Thumb frame pointer from the stack pointer. */
21120 static void
21121 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21123 HOST_WIDE_INT amount;
21124 rtx insn, dwarf;
21126 amount = offsets->outgoing_args - offsets->locals_base;
21127 if (amount < 1024)
21128 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21129 stack_pointer_rtx, GEN_INT (amount)));
21130 else
21132 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21133 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21134 expects the first two operands to be the same. */
21135 if (TARGET_THUMB2)
21137 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21138 stack_pointer_rtx,
21139 hard_frame_pointer_rtx));
21141 else
21143 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21144 hard_frame_pointer_rtx,
21145 stack_pointer_rtx));
21147 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21148 plus_constant (Pmode, stack_pointer_rtx, amount));
21149 RTX_FRAME_RELATED_P (dwarf) = 1;
21150 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21153 RTX_FRAME_RELATED_P (insn) = 1;
21156 struct scratch_reg {
21157 rtx reg;
21158 bool saved;
21161 /* Return a short-lived scratch register for use as a 2nd scratch register on
21162 function entry after the registers are saved in the prologue. This register
21163 must be released by means of release_scratch_register_on_entry. IP is not
21164 considered since it is always used as the 1st scratch register if available.
21166 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21167 mask of live registers. */
21169 static void
21170 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21171 unsigned long live_regs)
21173 int regno = -1;
21175 sr->saved = false;
21177 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21178 regno = LR_REGNUM;
21179 else
21181 unsigned int i;
21183 for (i = 4; i < 11; i++)
21184 if (regno1 != i && (live_regs & (1 << i)) != 0)
21186 regno = i;
21187 break;
21190 if (regno < 0)
21192 /* If IP is used as the 1st scratch register for a nested function,
21193 then either r3 wasn't available or is used to preserve IP. */
21194 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21195 regno1 = 3;
21196 regno = (regno1 == 3 ? 2 : 3);
21197 sr->saved
21198 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21199 regno);
21203 sr->reg = gen_rtx_REG (SImode, regno);
21204 if (sr->saved)
21206 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21207 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21208 rtx x = gen_rtx_SET (stack_pointer_rtx,
21209 plus_constant (Pmode, stack_pointer_rtx, -4));
21210 RTX_FRAME_RELATED_P (insn) = 1;
21211 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21215 /* Release a scratch register obtained from the preceding function. */
21217 static void
21218 release_scratch_register_on_entry (struct scratch_reg *sr)
21220 if (sr->saved)
21222 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21223 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21224 rtx x = gen_rtx_SET (stack_pointer_rtx,
21225 plus_constant (Pmode, stack_pointer_rtx, 4));
21226 RTX_FRAME_RELATED_P (insn) = 1;
21227 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21231 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21233 #if PROBE_INTERVAL > 4096
21234 #error Cannot use indexed addressing mode for stack probing
21235 #endif
21237 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21238 inclusive. These are offsets from the current stack pointer. REGNO1
21239 is the index number of the 1st scratch register and LIVE_REGS is the
21240 mask of live registers. */
21242 static void
21243 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21244 unsigned int regno1, unsigned long live_regs)
21246 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21248 /* See if we have a constant small number of probes to generate. If so,
21249 that's the easy case. */
21250 if (size <= PROBE_INTERVAL)
21252 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21253 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21254 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21257 /* The run-time loop is made up of 10 insns in the generic case while the
21258 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21259 else if (size <= 5 * PROBE_INTERVAL)
21261 HOST_WIDE_INT i, rem;
21263 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21264 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21265 emit_stack_probe (reg1);
21267 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21268 it exceeds SIZE. If only two probes are needed, this will not
21269 generate any code. Then probe at FIRST + SIZE. */
21270 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21272 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21273 emit_stack_probe (reg1);
21276 rem = size - (i - PROBE_INTERVAL);
21277 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21279 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21280 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21282 else
21283 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21286 /* Otherwise, do the same as above, but in a loop. Note that we must be
21287 extra careful with variables wrapping around because we might be at
21288 the very top (or the very bottom) of the address space and we have
21289 to be able to handle this case properly; in particular, we use an
21290 equality test for the loop condition. */
21291 else
21293 HOST_WIDE_INT rounded_size;
21294 struct scratch_reg sr;
21296 get_scratch_register_on_entry (&sr, regno1, live_regs);
21298 emit_move_insn (reg1, GEN_INT (first));
21301 /* Step 1: round SIZE to the previous multiple of the interval. */
21303 rounded_size = size & -PROBE_INTERVAL;
21304 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21307 /* Step 2: compute initial and final value of the loop counter. */
21309 /* TEST_ADDR = SP + FIRST. */
21310 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21312 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21313 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21316 /* Step 3: the loop
21320 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21321 probe at TEST_ADDR
21323 while (TEST_ADDR != LAST_ADDR)
21325 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21326 until it is equal to ROUNDED_SIZE. */
21328 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21331 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21332 that SIZE is equal to ROUNDED_SIZE. */
21334 if (size != rounded_size)
21336 HOST_WIDE_INT rem = size - rounded_size;
21338 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21340 emit_set_insn (sr.reg,
21341 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21342 emit_stack_probe (plus_constant (Pmode, sr.reg,
21343 PROBE_INTERVAL - rem));
21345 else
21346 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21349 release_scratch_register_on_entry (&sr);
21352 /* Make sure nothing is scheduled before we are done. */
21353 emit_insn (gen_blockage ());
21356 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21357 absolute addresses. */
21359 const char *
21360 output_probe_stack_range (rtx reg1, rtx reg2)
21362 static int labelno = 0;
21363 char loop_lab[32];
21364 rtx xops[2];
21366 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21368 /* Loop. */
21369 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21371 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21372 xops[0] = reg1;
21373 xops[1] = GEN_INT (PROBE_INTERVAL);
21374 output_asm_insn ("sub\t%0, %0, %1", xops);
21376 /* Probe at TEST_ADDR. */
21377 output_asm_insn ("str\tr0, [%0, #0]", xops);
21379 /* Test if TEST_ADDR == LAST_ADDR. */
21380 xops[1] = reg2;
21381 output_asm_insn ("cmp\t%0, %1", xops);
21383 /* Branch. */
21384 fputs ("\tbne\t", asm_out_file);
21385 assemble_name_raw (asm_out_file, loop_lab);
21386 fputc ('\n', asm_out_file);
21388 return "";
21391 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21392 function. */
21393 void
21394 arm_expand_prologue (void)
21396 rtx amount;
21397 rtx insn;
21398 rtx ip_rtx;
21399 unsigned long live_regs_mask;
21400 unsigned long func_type;
21401 int fp_offset = 0;
21402 int saved_pretend_args = 0;
21403 int saved_regs = 0;
21404 unsigned HOST_WIDE_INT args_to_push;
21405 HOST_WIDE_INT size;
21406 arm_stack_offsets *offsets;
21407 bool clobber_ip;
21409 func_type = arm_current_func_type ();
21411 /* Naked functions don't have prologues. */
21412 if (IS_NAKED (func_type))
21414 if (flag_stack_usage_info)
21415 current_function_static_stack_size = 0;
21416 return;
21419 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21420 args_to_push = crtl->args.pretend_args_size;
21422 /* Compute which register we will have to save onto the stack. */
21423 offsets = arm_get_frame_offsets ();
21424 live_regs_mask = offsets->saved_regs_mask;
21426 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21428 if (IS_STACKALIGN (func_type))
21430 rtx r0, r1;
21432 /* Handle a word-aligned stack pointer. We generate the following:
21434 mov r0, sp
21435 bic r1, r0, #7
21436 mov sp, r1
21437 <save and restore r0 in normal prologue/epilogue>
21438 mov sp, r0
21439 bx lr
21441 The unwinder doesn't need to know about the stack realignment.
21442 Just tell it we saved SP in r0. */
21443 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21445 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21446 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21448 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21449 RTX_FRAME_RELATED_P (insn) = 1;
21450 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21452 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21454 /* ??? The CFA changes here, which may cause GDB to conclude that it
21455 has entered a different function. That said, the unwind info is
21456 correct, individually, before and after this instruction because
21457 we've described the save of SP, which will override the default
21458 handling of SP as restoring from the CFA. */
21459 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21462 /* The static chain register is the same as the IP register. If it is
21463 clobbered when creating the frame, we need to save and restore it. */
21464 clobber_ip = IS_NESTED (func_type)
21465 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21466 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21467 && !df_regs_ever_live_p (LR_REGNUM)
21468 && arm_r3_live_at_start_p ()));
21470 /* Find somewhere to store IP whilst the frame is being created.
21471 We try the following places in order:
21473 1. The last argument register r3 if it is available.
21474 2. A slot on the stack above the frame if there are no
21475 arguments to push onto the stack.
21476 3. Register r3 again, after pushing the argument registers
21477 onto the stack, if this is a varargs function.
21478 4. The last slot on the stack created for the arguments to
21479 push, if this isn't a varargs function.
21481 Note - we only need to tell the dwarf2 backend about the SP
21482 adjustment in the second variant; the static chain register
21483 doesn't need to be unwound, as it doesn't contain a value
21484 inherited from the caller. */
21485 if (clobber_ip)
21487 if (!arm_r3_live_at_start_p ())
21488 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21489 else if (args_to_push == 0)
21491 rtx addr, dwarf;
21493 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21494 saved_regs += 4;
21496 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21497 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21498 fp_offset = 4;
21500 /* Just tell the dwarf backend that we adjusted SP. */
21501 dwarf = gen_rtx_SET (stack_pointer_rtx,
21502 plus_constant (Pmode, stack_pointer_rtx,
21503 -fp_offset));
21504 RTX_FRAME_RELATED_P (insn) = 1;
21505 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21507 else
21509 /* Store the args on the stack. */
21510 if (cfun->machine->uses_anonymous_args)
21512 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21513 (0xf0 >> (args_to_push / 4)) & 0xf);
21514 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21515 saved_pretend_args = 1;
21517 else
21519 rtx addr, dwarf;
21521 if (args_to_push == 4)
21522 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21523 else
21524 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21525 plus_constant (Pmode,
21526 stack_pointer_rtx,
21527 -args_to_push));
21529 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21531 /* Just tell the dwarf backend that we adjusted SP. */
21532 dwarf = gen_rtx_SET (stack_pointer_rtx,
21533 plus_constant (Pmode, stack_pointer_rtx,
21534 -args_to_push));
21535 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21538 RTX_FRAME_RELATED_P (insn) = 1;
21539 fp_offset = args_to_push;
21540 args_to_push = 0;
21544 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21546 if (IS_INTERRUPT (func_type))
21548 /* Interrupt functions must not corrupt any registers.
21549 Creating a frame pointer however, corrupts the IP
21550 register, so we must push it first. */
21551 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21553 /* Do not set RTX_FRAME_RELATED_P on this insn.
21554 The dwarf stack unwinding code only wants to see one
21555 stack decrement per function, and this is not it. If
21556 this instruction is labeled as being part of the frame
21557 creation sequence then dwarf2out_frame_debug_expr will
21558 die when it encounters the assignment of IP to FP
21559 later on, since the use of SP here establishes SP as
21560 the CFA register and not IP.
21562 Anyway this instruction is not really part of the stack
21563 frame creation although it is part of the prologue. */
21566 insn = emit_set_insn (ip_rtx,
21567 plus_constant (Pmode, stack_pointer_rtx,
21568 fp_offset));
21569 RTX_FRAME_RELATED_P (insn) = 1;
21572 if (args_to_push)
21574 /* Push the argument registers, or reserve space for them. */
21575 if (cfun->machine->uses_anonymous_args)
21576 insn = emit_multi_reg_push
21577 ((0xf0 >> (args_to_push / 4)) & 0xf,
21578 (0xf0 >> (args_to_push / 4)) & 0xf);
21579 else
21580 insn = emit_insn
21581 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21582 GEN_INT (- args_to_push)));
21583 RTX_FRAME_RELATED_P (insn) = 1;
21586 /* If this is an interrupt service routine, and the link register
21587 is going to be pushed, and we're not generating extra
21588 push of IP (needed when frame is needed and frame layout if apcs),
21589 subtracting four from LR now will mean that the function return
21590 can be done with a single instruction. */
21591 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21592 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21593 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21594 && TARGET_ARM)
21596 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21598 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21601 if (live_regs_mask)
21603 unsigned long dwarf_regs_mask = live_regs_mask;
21605 saved_regs += bit_count (live_regs_mask) * 4;
21606 if (optimize_size && !frame_pointer_needed
21607 && saved_regs == offsets->saved_regs - offsets->saved_args)
21609 /* If no coprocessor registers are being pushed and we don't have
21610 to worry about a frame pointer then push extra registers to
21611 create the stack frame. This is done in a way that does not
21612 alter the frame layout, so is independent of the epilogue. */
21613 int n;
21614 int frame;
21615 n = 0;
21616 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21617 n++;
21618 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21619 if (frame && n * 4 >= frame)
21621 n = frame / 4;
21622 live_regs_mask |= (1 << n) - 1;
21623 saved_regs += frame;
21627 if (TARGET_LDRD
21628 && current_tune->prefer_ldrd_strd
21629 && !optimize_function_for_size_p (cfun))
21631 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21632 if (TARGET_THUMB2)
21633 thumb2_emit_strd_push (live_regs_mask);
21634 else if (TARGET_ARM
21635 && !TARGET_APCS_FRAME
21636 && !IS_INTERRUPT (func_type))
21637 arm_emit_strd_push (live_regs_mask);
21638 else
21640 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21641 RTX_FRAME_RELATED_P (insn) = 1;
21644 else
21646 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21647 RTX_FRAME_RELATED_P (insn) = 1;
21651 if (! IS_VOLATILE (func_type))
21652 saved_regs += arm_save_coproc_regs ();
21654 if (frame_pointer_needed && TARGET_ARM)
21656 /* Create the new frame pointer. */
21657 if (TARGET_APCS_FRAME)
21659 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21660 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21661 RTX_FRAME_RELATED_P (insn) = 1;
21663 else
21665 insn = GEN_INT (saved_regs - (4 + fp_offset));
21666 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21667 stack_pointer_rtx, insn));
21668 RTX_FRAME_RELATED_P (insn) = 1;
21672 size = offsets->outgoing_args - offsets->saved_args;
21673 if (flag_stack_usage_info)
21674 current_function_static_stack_size = size;
21676 /* If this isn't an interrupt service routine and we have a frame, then do
21677 stack checking. We use IP as the first scratch register, except for the
21678 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21679 if (!IS_INTERRUPT (func_type)
21680 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21682 unsigned int regno;
21684 if (!IS_NESTED (func_type) || clobber_ip)
21685 regno = IP_REGNUM;
21686 else if (df_regs_ever_live_p (LR_REGNUM))
21687 regno = LR_REGNUM;
21688 else
21689 regno = 3;
21691 if (crtl->is_leaf && !cfun->calls_alloca)
21693 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21694 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21695 size - STACK_CHECK_PROTECT,
21696 regno, live_regs_mask);
21698 else if (size > 0)
21699 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21700 regno, live_regs_mask);
21703 /* Recover the static chain register. */
21704 if (clobber_ip)
21706 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21707 insn = gen_rtx_REG (SImode, 3);
21708 else
21710 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21711 insn = gen_frame_mem (SImode, insn);
21713 emit_set_insn (ip_rtx, insn);
21714 emit_insn (gen_force_register_use (ip_rtx));
21717 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21719 /* This add can produce multiple insns for a large constant, so we
21720 need to get tricky. */
21721 rtx_insn *last = get_last_insn ();
21723 amount = GEN_INT (offsets->saved_args + saved_regs
21724 - offsets->outgoing_args);
21726 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21727 amount));
21730 last = last ? NEXT_INSN (last) : get_insns ();
21731 RTX_FRAME_RELATED_P (last) = 1;
21733 while (last != insn);
21735 /* If the frame pointer is needed, emit a special barrier that
21736 will prevent the scheduler from moving stores to the frame
21737 before the stack adjustment. */
21738 if (frame_pointer_needed)
21739 emit_insn (gen_stack_tie (stack_pointer_rtx,
21740 hard_frame_pointer_rtx));
21744 if (frame_pointer_needed && TARGET_THUMB2)
21745 thumb_set_frame_pointer (offsets);
21747 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21749 unsigned long mask;
21751 mask = live_regs_mask;
21752 mask &= THUMB2_WORK_REGS;
21753 if (!IS_NESTED (func_type))
21754 mask |= (1 << IP_REGNUM);
21755 arm_load_pic_register (mask);
21758 /* If we are profiling, make sure no instructions are scheduled before
21759 the call to mcount. Similarly if the user has requested no
21760 scheduling in the prolog. Similarly if we want non-call exceptions
21761 using the EABI unwinder, to prevent faulting instructions from being
21762 swapped with a stack adjustment. */
21763 if (crtl->profile || !TARGET_SCHED_PROLOG
21764 || (arm_except_unwind_info (&global_options) == UI_TARGET
21765 && cfun->can_throw_non_call_exceptions))
21766 emit_insn (gen_blockage ());
21768 /* If the link register is being kept alive, with the return address in it,
21769 then make sure that it does not get reused by the ce2 pass. */
21770 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21771 cfun->machine->lr_save_eliminated = 1;
21774 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21775 static void
21776 arm_print_condition (FILE *stream)
21778 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21780 /* Branch conversion is not implemented for Thumb-2. */
21781 if (TARGET_THUMB)
21783 output_operand_lossage ("predicated Thumb instruction");
21784 return;
21786 if (current_insn_predicate != NULL)
21788 output_operand_lossage
21789 ("predicated instruction in conditional sequence");
21790 return;
21793 fputs (arm_condition_codes[arm_current_cc], stream);
21795 else if (current_insn_predicate)
21797 enum arm_cond_code code;
21799 if (TARGET_THUMB1)
21801 output_operand_lossage ("predicated Thumb instruction");
21802 return;
21805 code = get_arm_condition_code (current_insn_predicate);
21806 fputs (arm_condition_codes[code], stream);
21811 /* Globally reserved letters: acln
21812 Puncutation letters currently used: @_|?().!#
21813 Lower case letters currently used: bcdefhimpqtvwxyz
21814 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21815 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21817 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21819 If CODE is 'd', then the X is a condition operand and the instruction
21820 should only be executed if the condition is true.
21821 if CODE is 'D', then the X is a condition operand and the instruction
21822 should only be executed if the condition is false: however, if the mode
21823 of the comparison is CCFPEmode, then always execute the instruction -- we
21824 do this because in these circumstances !GE does not necessarily imply LT;
21825 in these cases the instruction pattern will take care to make sure that
21826 an instruction containing %d will follow, thereby undoing the effects of
21827 doing this instruction unconditionally.
21828 If CODE is 'N' then X is a floating point operand that must be negated
21829 before output.
21830 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21831 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21832 static void
21833 arm_print_operand (FILE *stream, rtx x, int code)
21835 switch (code)
21837 case '@':
21838 fputs (ASM_COMMENT_START, stream);
21839 return;
21841 case '_':
21842 fputs (user_label_prefix, stream);
21843 return;
21845 case '|':
21846 fputs (REGISTER_PREFIX, stream);
21847 return;
21849 case '?':
21850 arm_print_condition (stream);
21851 return;
21853 case '.':
21854 /* The current condition code for a condition code setting instruction.
21855 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21856 fputc('s', stream);
21857 arm_print_condition (stream);
21858 return;
21860 case '!':
21861 /* If the instruction is conditionally executed then print
21862 the current condition code, otherwise print 's'. */
21863 gcc_assert (TARGET_THUMB2);
21864 if (current_insn_predicate)
21865 arm_print_condition (stream);
21866 else
21867 fputc('s', stream);
21868 break;
21870 /* %# is a "break" sequence. It doesn't output anything, but is used to
21871 separate e.g. operand numbers from following text, if that text consists
21872 of further digits which we don't want to be part of the operand
21873 number. */
21874 case '#':
21875 return;
21877 case 'N':
21879 REAL_VALUE_TYPE r;
21880 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21881 fprintf (stream, "%s", fp_const_from_val (&r));
21883 return;
21885 /* An integer or symbol address without a preceding # sign. */
21886 case 'c':
21887 switch (GET_CODE (x))
21889 case CONST_INT:
21890 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21891 break;
21893 case SYMBOL_REF:
21894 output_addr_const (stream, x);
21895 break;
21897 case CONST:
21898 if (GET_CODE (XEXP (x, 0)) == PLUS
21899 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21901 output_addr_const (stream, x);
21902 break;
21904 /* Fall through. */
21906 default:
21907 output_operand_lossage ("Unsupported operand for code '%c'", code);
21909 return;
21911 /* An integer that we want to print in HEX. */
21912 case 'x':
21913 switch (GET_CODE (x))
21915 case CONST_INT:
21916 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21917 break;
21919 default:
21920 output_operand_lossage ("Unsupported operand for code '%c'", code);
21922 return;
21924 case 'B':
21925 if (CONST_INT_P (x))
21927 HOST_WIDE_INT val;
21928 val = ARM_SIGN_EXTEND (~INTVAL (x));
21929 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21931 else
21933 putc ('~', stream);
21934 output_addr_const (stream, x);
21936 return;
21938 case 'b':
21939 /* Print the log2 of a CONST_INT. */
21941 HOST_WIDE_INT val;
21943 if (!CONST_INT_P (x)
21944 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21945 output_operand_lossage ("Unsupported operand for code '%c'", code);
21946 else
21947 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21949 return;
21951 case 'L':
21952 /* The low 16 bits of an immediate constant. */
21953 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21954 return;
21956 case 'i':
21957 fprintf (stream, "%s", arithmetic_instr (x, 1));
21958 return;
21960 case 'I':
21961 fprintf (stream, "%s", arithmetic_instr (x, 0));
21962 return;
21964 case 'S':
21966 HOST_WIDE_INT val;
21967 const char *shift;
21969 shift = shift_op (x, &val);
21971 if (shift)
21973 fprintf (stream, ", %s ", shift);
21974 if (val == -1)
21975 arm_print_operand (stream, XEXP (x, 1), 0);
21976 else
21977 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21980 return;
21982 /* An explanation of the 'Q', 'R' and 'H' register operands:
21984 In a pair of registers containing a DI or DF value the 'Q'
21985 operand returns the register number of the register containing
21986 the least significant part of the value. The 'R' operand returns
21987 the register number of the register containing the most
21988 significant part of the value.
21990 The 'H' operand returns the higher of the two register numbers.
21991 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21992 same as the 'Q' operand, since the most significant part of the
21993 value is held in the lower number register. The reverse is true
21994 on systems where WORDS_BIG_ENDIAN is false.
21996 The purpose of these operands is to distinguish between cases
21997 where the endian-ness of the values is important (for example
21998 when they are added together), and cases where the endian-ness
21999 is irrelevant, but the order of register operations is important.
22000 For example when loading a value from memory into a register
22001 pair, the endian-ness does not matter. Provided that the value
22002 from the lower memory address is put into the lower numbered
22003 register, and the value from the higher address is put into the
22004 higher numbered register, the load will work regardless of whether
22005 the value being loaded is big-wordian or little-wordian. The
22006 order of the two register loads can matter however, if the address
22007 of the memory location is actually held in one of the registers
22008 being overwritten by the load.
22010 The 'Q' and 'R' constraints are also available for 64-bit
22011 constants. */
22012 case 'Q':
22013 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22015 rtx part = gen_lowpart (SImode, x);
22016 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22017 return;
22020 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22022 output_operand_lossage ("invalid operand for code '%c'", code);
22023 return;
22026 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22027 return;
22029 case 'R':
22030 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22032 machine_mode mode = GET_MODE (x);
22033 rtx part;
22035 if (mode == VOIDmode)
22036 mode = DImode;
22037 part = gen_highpart_mode (SImode, mode, x);
22038 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22039 return;
22042 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22044 output_operand_lossage ("invalid operand for code '%c'", code);
22045 return;
22048 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22049 return;
22051 case 'H':
22052 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22054 output_operand_lossage ("invalid operand for code '%c'", code);
22055 return;
22058 asm_fprintf (stream, "%r", REGNO (x) + 1);
22059 return;
22061 case 'J':
22062 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22064 output_operand_lossage ("invalid operand for code '%c'", code);
22065 return;
22068 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22069 return;
22071 case 'K':
22072 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22074 output_operand_lossage ("invalid operand for code '%c'", code);
22075 return;
22078 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22079 return;
22081 case 'm':
22082 asm_fprintf (stream, "%r",
22083 REG_P (XEXP (x, 0))
22084 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22085 return;
22087 case 'M':
22088 asm_fprintf (stream, "{%r-%r}",
22089 REGNO (x),
22090 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22091 return;
22093 /* Like 'M', but writing doubleword vector registers, for use by Neon
22094 insns. */
22095 case 'h':
22097 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22098 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22099 if (numregs == 1)
22100 asm_fprintf (stream, "{d%d}", regno);
22101 else
22102 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22104 return;
22106 case 'd':
22107 /* CONST_TRUE_RTX means always -- that's the default. */
22108 if (x == const_true_rtx)
22109 return;
22111 if (!COMPARISON_P (x))
22113 output_operand_lossage ("invalid operand for code '%c'", code);
22114 return;
22117 fputs (arm_condition_codes[get_arm_condition_code (x)],
22118 stream);
22119 return;
22121 case 'D':
22122 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22123 want to do that. */
22124 if (x == const_true_rtx)
22126 output_operand_lossage ("instruction never executed");
22127 return;
22129 if (!COMPARISON_P (x))
22131 output_operand_lossage ("invalid operand for code '%c'", code);
22132 return;
22135 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22136 (get_arm_condition_code (x))],
22137 stream);
22138 return;
22140 case 's':
22141 case 'V':
22142 case 'W':
22143 case 'X':
22144 case 'Y':
22145 case 'Z':
22146 /* Former Maverick support, removed after GCC-4.7. */
22147 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22148 return;
22150 case 'U':
22151 if (!REG_P (x)
22152 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22153 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22154 /* Bad value for wCG register number. */
22156 output_operand_lossage ("invalid operand for code '%c'", code);
22157 return;
22160 else
22161 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22162 return;
22164 /* Print an iWMMXt control register name. */
22165 case 'w':
22166 if (!CONST_INT_P (x)
22167 || INTVAL (x) < 0
22168 || INTVAL (x) >= 16)
22169 /* Bad value for wC register number. */
22171 output_operand_lossage ("invalid operand for code '%c'", code);
22172 return;
22175 else
22177 static const char * wc_reg_names [16] =
22179 "wCID", "wCon", "wCSSF", "wCASF",
22180 "wC4", "wC5", "wC6", "wC7",
22181 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22182 "wC12", "wC13", "wC14", "wC15"
22185 fputs (wc_reg_names [INTVAL (x)], stream);
22187 return;
22189 /* Print the high single-precision register of a VFP double-precision
22190 register. */
22191 case 'p':
22193 machine_mode mode = GET_MODE (x);
22194 int regno;
22196 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22198 output_operand_lossage ("invalid operand for code '%c'", code);
22199 return;
22202 regno = REGNO (x);
22203 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22205 output_operand_lossage ("invalid operand for code '%c'", code);
22206 return;
22209 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22211 return;
22213 /* Print a VFP/Neon double precision or quad precision register name. */
22214 case 'P':
22215 case 'q':
22217 machine_mode mode = GET_MODE (x);
22218 int is_quad = (code == 'q');
22219 int regno;
22221 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22223 output_operand_lossage ("invalid operand for code '%c'", code);
22224 return;
22227 if (!REG_P (x)
22228 || !IS_VFP_REGNUM (REGNO (x)))
22230 output_operand_lossage ("invalid operand for code '%c'", code);
22231 return;
22234 regno = REGNO (x);
22235 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22236 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22238 output_operand_lossage ("invalid operand for code '%c'", code);
22239 return;
22242 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22243 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22245 return;
22247 /* These two codes print the low/high doubleword register of a Neon quad
22248 register, respectively. For pair-structure types, can also print
22249 low/high quadword registers. */
22250 case 'e':
22251 case 'f':
22253 machine_mode mode = GET_MODE (x);
22254 int regno;
22256 if ((GET_MODE_SIZE (mode) != 16
22257 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22259 output_operand_lossage ("invalid operand for code '%c'", code);
22260 return;
22263 regno = REGNO (x);
22264 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22266 output_operand_lossage ("invalid operand for code '%c'", code);
22267 return;
22270 if (GET_MODE_SIZE (mode) == 16)
22271 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22272 + (code == 'f' ? 1 : 0));
22273 else
22274 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22275 + (code == 'f' ? 1 : 0));
22277 return;
22279 /* Print a VFPv3 floating-point constant, represented as an integer
22280 index. */
22281 case 'G':
22283 int index = vfp3_const_double_index (x);
22284 gcc_assert (index != -1);
22285 fprintf (stream, "%d", index);
22287 return;
22289 /* Print bits representing opcode features for Neon.
22291 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22292 and polynomials as unsigned.
22294 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22296 Bit 2 is 1 for rounding functions, 0 otherwise. */
22298 /* Identify the type as 's', 'u', 'p' or 'f'. */
22299 case 'T':
22301 HOST_WIDE_INT bits = INTVAL (x);
22302 fputc ("uspf"[bits & 3], stream);
22304 return;
22306 /* Likewise, but signed and unsigned integers are both 'i'. */
22307 case 'F':
22309 HOST_WIDE_INT bits = INTVAL (x);
22310 fputc ("iipf"[bits & 3], stream);
22312 return;
22314 /* As for 'T', but emit 'u' instead of 'p'. */
22315 case 't':
22317 HOST_WIDE_INT bits = INTVAL (x);
22318 fputc ("usuf"[bits & 3], stream);
22320 return;
22322 /* Bit 2: rounding (vs none). */
22323 case 'O':
22325 HOST_WIDE_INT bits = INTVAL (x);
22326 fputs ((bits & 4) != 0 ? "r" : "", stream);
22328 return;
22330 /* Memory operand for vld1/vst1 instruction. */
22331 case 'A':
22333 rtx addr;
22334 bool postinc = FALSE;
22335 rtx postinc_reg = NULL;
22336 unsigned align, memsize, align_bits;
22338 gcc_assert (MEM_P (x));
22339 addr = XEXP (x, 0);
22340 if (GET_CODE (addr) == POST_INC)
22342 postinc = 1;
22343 addr = XEXP (addr, 0);
22345 if (GET_CODE (addr) == POST_MODIFY)
22347 postinc_reg = XEXP( XEXP (addr, 1), 1);
22348 addr = XEXP (addr, 0);
22350 asm_fprintf (stream, "[%r", REGNO (addr));
22352 /* We know the alignment of this access, so we can emit a hint in the
22353 instruction (for some alignments) as an aid to the memory subsystem
22354 of the target. */
22355 align = MEM_ALIGN (x) >> 3;
22356 memsize = MEM_SIZE (x);
22358 /* Only certain alignment specifiers are supported by the hardware. */
22359 if (memsize == 32 && (align % 32) == 0)
22360 align_bits = 256;
22361 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22362 align_bits = 128;
22363 else if (memsize >= 8 && (align % 8) == 0)
22364 align_bits = 64;
22365 else
22366 align_bits = 0;
22368 if (align_bits != 0)
22369 asm_fprintf (stream, ":%d", align_bits);
22371 asm_fprintf (stream, "]");
22373 if (postinc)
22374 fputs("!", stream);
22375 if (postinc_reg)
22376 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22378 return;
22380 case 'C':
22382 rtx addr;
22384 gcc_assert (MEM_P (x));
22385 addr = XEXP (x, 0);
22386 gcc_assert (REG_P (addr));
22387 asm_fprintf (stream, "[%r]", REGNO (addr));
22389 return;
22391 /* Translate an S register number into a D register number and element index. */
22392 case 'y':
22394 machine_mode mode = GET_MODE (x);
22395 int regno;
22397 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22399 output_operand_lossage ("invalid operand for code '%c'", code);
22400 return;
22403 regno = REGNO (x);
22404 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22406 output_operand_lossage ("invalid operand for code '%c'", code);
22407 return;
22410 regno = regno - FIRST_VFP_REGNUM;
22411 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22413 return;
22415 case 'v':
22416 gcc_assert (CONST_DOUBLE_P (x));
22417 int result;
22418 result = vfp3_const_double_for_fract_bits (x);
22419 if (result == 0)
22420 result = vfp3_const_double_for_bits (x);
22421 fprintf (stream, "#%d", result);
22422 return;
22424 /* Register specifier for vld1.16/vst1.16. Translate the S register
22425 number into a D register number and element index. */
22426 case 'z':
22428 machine_mode mode = GET_MODE (x);
22429 int regno;
22431 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22433 output_operand_lossage ("invalid operand for code '%c'", code);
22434 return;
22437 regno = REGNO (x);
22438 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22440 output_operand_lossage ("invalid operand for code '%c'", code);
22441 return;
22444 regno = regno - FIRST_VFP_REGNUM;
22445 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22447 return;
22449 default:
22450 if (x == 0)
22452 output_operand_lossage ("missing operand");
22453 return;
22456 switch (GET_CODE (x))
22458 case REG:
22459 asm_fprintf (stream, "%r", REGNO (x));
22460 break;
22462 case MEM:
22463 output_address (GET_MODE (x), XEXP (x, 0));
22464 break;
22466 case CONST_DOUBLE:
22468 char fpstr[20];
22469 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22470 sizeof (fpstr), 0, 1);
22471 fprintf (stream, "#%s", fpstr);
22473 break;
22475 default:
22476 gcc_assert (GET_CODE (x) != NEG);
22477 fputc ('#', stream);
22478 if (GET_CODE (x) == HIGH)
22480 fputs (":lower16:", stream);
22481 x = XEXP (x, 0);
22484 output_addr_const (stream, x);
22485 break;
22490 /* Target hook for printing a memory address. */
22491 static void
22492 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22494 if (TARGET_32BIT)
22496 int is_minus = GET_CODE (x) == MINUS;
22498 if (REG_P (x))
22499 asm_fprintf (stream, "[%r]", REGNO (x));
22500 else if (GET_CODE (x) == PLUS || is_minus)
22502 rtx base = XEXP (x, 0);
22503 rtx index = XEXP (x, 1);
22504 HOST_WIDE_INT offset = 0;
22505 if (!REG_P (base)
22506 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22508 /* Ensure that BASE is a register. */
22509 /* (one of them must be). */
22510 /* Also ensure the SP is not used as in index register. */
22511 std::swap (base, index);
22513 switch (GET_CODE (index))
22515 case CONST_INT:
22516 offset = INTVAL (index);
22517 if (is_minus)
22518 offset = -offset;
22519 asm_fprintf (stream, "[%r, #%wd]",
22520 REGNO (base), offset);
22521 break;
22523 case REG:
22524 asm_fprintf (stream, "[%r, %s%r]",
22525 REGNO (base), is_minus ? "-" : "",
22526 REGNO (index));
22527 break;
22529 case MULT:
22530 case ASHIFTRT:
22531 case LSHIFTRT:
22532 case ASHIFT:
22533 case ROTATERT:
22535 asm_fprintf (stream, "[%r, %s%r",
22536 REGNO (base), is_minus ? "-" : "",
22537 REGNO (XEXP (index, 0)));
22538 arm_print_operand (stream, index, 'S');
22539 fputs ("]", stream);
22540 break;
22543 default:
22544 gcc_unreachable ();
22547 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22548 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22550 gcc_assert (REG_P (XEXP (x, 0)));
22552 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22553 asm_fprintf (stream, "[%r, #%s%d]!",
22554 REGNO (XEXP (x, 0)),
22555 GET_CODE (x) == PRE_DEC ? "-" : "",
22556 GET_MODE_SIZE (mode));
22557 else
22558 asm_fprintf (stream, "[%r], #%s%d",
22559 REGNO (XEXP (x, 0)),
22560 GET_CODE (x) == POST_DEC ? "-" : "",
22561 GET_MODE_SIZE (mode));
22563 else if (GET_CODE (x) == PRE_MODIFY)
22565 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22566 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22567 asm_fprintf (stream, "#%wd]!",
22568 INTVAL (XEXP (XEXP (x, 1), 1)));
22569 else
22570 asm_fprintf (stream, "%r]!",
22571 REGNO (XEXP (XEXP (x, 1), 1)));
22573 else if (GET_CODE (x) == POST_MODIFY)
22575 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22576 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22577 asm_fprintf (stream, "#%wd",
22578 INTVAL (XEXP (XEXP (x, 1), 1)));
22579 else
22580 asm_fprintf (stream, "%r",
22581 REGNO (XEXP (XEXP (x, 1), 1)));
22583 else output_addr_const (stream, x);
22585 else
22587 if (REG_P (x))
22588 asm_fprintf (stream, "[%r]", REGNO (x));
22589 else if (GET_CODE (x) == POST_INC)
22590 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22591 else if (GET_CODE (x) == PLUS)
22593 gcc_assert (REG_P (XEXP (x, 0)));
22594 if (CONST_INT_P (XEXP (x, 1)))
22595 asm_fprintf (stream, "[%r, #%wd]",
22596 REGNO (XEXP (x, 0)),
22597 INTVAL (XEXP (x, 1)));
22598 else
22599 asm_fprintf (stream, "[%r, %r]",
22600 REGNO (XEXP (x, 0)),
22601 REGNO (XEXP (x, 1)));
22603 else
22604 output_addr_const (stream, x);
22608 /* Target hook for indicating whether a punctuation character for
22609 TARGET_PRINT_OPERAND is valid. */
22610 static bool
22611 arm_print_operand_punct_valid_p (unsigned char code)
22613 return (code == '@' || code == '|' || code == '.'
22614 || code == '(' || code == ')' || code == '#'
22615 || (TARGET_32BIT && (code == '?'))
22616 || (TARGET_THUMB2 && (code == '!'))
22617 || (TARGET_THUMB && (code == '_')));
22620 /* Target hook for assembling integer objects. The ARM version needs to
22621 handle word-sized values specially. */
22622 static bool
22623 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22625 machine_mode mode;
22627 if (size == UNITS_PER_WORD && aligned_p)
22629 fputs ("\t.word\t", asm_out_file);
22630 output_addr_const (asm_out_file, x);
22632 /* Mark symbols as position independent. We only do this in the
22633 .text segment, not in the .data segment. */
22634 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22635 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22637 /* See legitimize_pic_address for an explanation of the
22638 TARGET_VXWORKS_RTP check. */
22639 /* References to weak symbols cannot be resolved locally:
22640 they may be overridden by a non-weak definition at link
22641 time. */
22642 if (!arm_pic_data_is_text_relative
22643 || (GET_CODE (x) == SYMBOL_REF
22644 && (!SYMBOL_REF_LOCAL_P (x)
22645 || (SYMBOL_REF_DECL (x)
22646 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22647 fputs ("(GOT)", asm_out_file);
22648 else
22649 fputs ("(GOTOFF)", asm_out_file);
22651 fputc ('\n', asm_out_file);
22652 return true;
22655 mode = GET_MODE (x);
22657 if (arm_vector_mode_supported_p (mode))
22659 int i, units;
22661 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22663 units = CONST_VECTOR_NUNITS (x);
22664 size = GET_MODE_UNIT_SIZE (mode);
22666 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22667 for (i = 0; i < units; i++)
22669 rtx elt = CONST_VECTOR_ELT (x, i);
22670 assemble_integer
22671 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22673 else
22674 for (i = 0; i < units; i++)
22676 rtx elt = CONST_VECTOR_ELT (x, i);
22677 assemble_real
22678 (*CONST_DOUBLE_REAL_VALUE (elt),
22679 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22680 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22683 return true;
22686 return default_assemble_integer (x, size, aligned_p);
22689 static void
22690 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22692 section *s;
22694 if (!TARGET_AAPCS_BASED)
22696 (is_ctor ?
22697 default_named_section_asm_out_constructor
22698 : default_named_section_asm_out_destructor) (symbol, priority);
22699 return;
22702 /* Put these in the .init_array section, using a special relocation. */
22703 if (priority != DEFAULT_INIT_PRIORITY)
22705 char buf[18];
22706 sprintf (buf, "%s.%.5u",
22707 is_ctor ? ".init_array" : ".fini_array",
22708 priority);
22709 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22711 else if (is_ctor)
22712 s = ctors_section;
22713 else
22714 s = dtors_section;
22716 switch_to_section (s);
22717 assemble_align (POINTER_SIZE);
22718 fputs ("\t.word\t", asm_out_file);
22719 output_addr_const (asm_out_file, symbol);
22720 fputs ("(target1)\n", asm_out_file);
22723 /* Add a function to the list of static constructors. */
22725 static void
22726 arm_elf_asm_constructor (rtx symbol, int priority)
22728 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22731 /* Add a function to the list of static destructors. */
22733 static void
22734 arm_elf_asm_destructor (rtx symbol, int priority)
22736 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22739 /* A finite state machine takes care of noticing whether or not instructions
22740 can be conditionally executed, and thus decrease execution time and code
22741 size by deleting branch instructions. The fsm is controlled by
22742 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22744 /* The state of the fsm controlling condition codes are:
22745 0: normal, do nothing special
22746 1: make ASM_OUTPUT_OPCODE not output this instruction
22747 2: make ASM_OUTPUT_OPCODE not output this instruction
22748 3: make instructions conditional
22749 4: make instructions conditional
22751 State transitions (state->state by whom under condition):
22752 0 -> 1 final_prescan_insn if the `target' is a label
22753 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22754 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22755 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22756 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22757 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22758 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22759 (the target insn is arm_target_insn).
22761 If the jump clobbers the conditions then we use states 2 and 4.
22763 A similar thing can be done with conditional return insns.
22765 XXX In case the `target' is an unconditional branch, this conditionalising
22766 of the instructions always reduces code size, but not always execution
22767 time. But then, I want to reduce the code size to somewhere near what
22768 /bin/cc produces. */
22770 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22771 instructions. When a COND_EXEC instruction is seen the subsequent
22772 instructions are scanned so that multiple conditional instructions can be
22773 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22774 specify the length and true/false mask for the IT block. These will be
22775 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22777 /* Returns the index of the ARM condition code string in
22778 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22779 COMPARISON should be an rtx like `(eq (...) (...))'. */
22781 enum arm_cond_code
22782 maybe_get_arm_condition_code (rtx comparison)
22784 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22785 enum arm_cond_code code;
22786 enum rtx_code comp_code = GET_CODE (comparison);
22788 if (GET_MODE_CLASS (mode) != MODE_CC)
22789 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22790 XEXP (comparison, 1));
22792 switch (mode)
22794 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22795 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22796 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22797 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22798 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22799 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22800 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22801 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22802 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22803 case E_CC_DLTUmode: code = ARM_CC;
22805 dominance:
22806 if (comp_code == EQ)
22807 return ARM_INVERSE_CONDITION_CODE (code);
22808 if (comp_code == NE)
22809 return code;
22810 return ARM_NV;
22812 case E_CC_NOOVmode:
22813 switch (comp_code)
22815 case NE: return ARM_NE;
22816 case EQ: return ARM_EQ;
22817 case GE: return ARM_PL;
22818 case LT: return ARM_MI;
22819 default: return ARM_NV;
22822 case E_CC_Zmode:
22823 switch (comp_code)
22825 case NE: return ARM_NE;
22826 case EQ: return ARM_EQ;
22827 default: return ARM_NV;
22830 case E_CC_Nmode:
22831 switch (comp_code)
22833 case NE: return ARM_MI;
22834 case EQ: return ARM_PL;
22835 default: return ARM_NV;
22838 case E_CCFPEmode:
22839 case E_CCFPmode:
22840 /* We can handle all cases except UNEQ and LTGT. */
22841 switch (comp_code)
22843 case GE: return ARM_GE;
22844 case GT: return ARM_GT;
22845 case LE: return ARM_LS;
22846 case LT: return ARM_MI;
22847 case NE: return ARM_NE;
22848 case EQ: return ARM_EQ;
22849 case ORDERED: return ARM_VC;
22850 case UNORDERED: return ARM_VS;
22851 case UNLT: return ARM_LT;
22852 case UNLE: return ARM_LE;
22853 case UNGT: return ARM_HI;
22854 case UNGE: return ARM_PL;
22855 /* UNEQ and LTGT do not have a representation. */
22856 case UNEQ: /* Fall through. */
22857 case LTGT: /* Fall through. */
22858 default: return ARM_NV;
22861 case E_CC_SWPmode:
22862 switch (comp_code)
22864 case NE: return ARM_NE;
22865 case EQ: return ARM_EQ;
22866 case GE: return ARM_LE;
22867 case GT: return ARM_LT;
22868 case LE: return ARM_GE;
22869 case LT: return ARM_GT;
22870 case GEU: return ARM_LS;
22871 case GTU: return ARM_CC;
22872 case LEU: return ARM_CS;
22873 case LTU: return ARM_HI;
22874 default: return ARM_NV;
22877 case E_CC_Cmode:
22878 switch (comp_code)
22880 case LTU: return ARM_CS;
22881 case GEU: return ARM_CC;
22882 case NE: return ARM_CS;
22883 case EQ: return ARM_CC;
22884 default: return ARM_NV;
22887 case E_CC_CZmode:
22888 switch (comp_code)
22890 case NE: return ARM_NE;
22891 case EQ: return ARM_EQ;
22892 case GEU: return ARM_CS;
22893 case GTU: return ARM_HI;
22894 case LEU: return ARM_LS;
22895 case LTU: return ARM_CC;
22896 default: return ARM_NV;
22899 case E_CC_NCVmode:
22900 switch (comp_code)
22902 case GE: return ARM_GE;
22903 case LT: return ARM_LT;
22904 case GEU: return ARM_CS;
22905 case LTU: return ARM_CC;
22906 default: return ARM_NV;
22909 case E_CC_Vmode:
22910 switch (comp_code)
22912 case NE: return ARM_VS;
22913 case EQ: return ARM_VC;
22914 default: return ARM_NV;
22917 case E_CCmode:
22918 switch (comp_code)
22920 case NE: return ARM_NE;
22921 case EQ: return ARM_EQ;
22922 case GE: return ARM_GE;
22923 case GT: return ARM_GT;
22924 case LE: return ARM_LE;
22925 case LT: return ARM_LT;
22926 case GEU: return ARM_CS;
22927 case GTU: return ARM_HI;
22928 case LEU: return ARM_LS;
22929 case LTU: return ARM_CC;
22930 default: return ARM_NV;
22933 default: gcc_unreachable ();
22937 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22938 static enum arm_cond_code
22939 get_arm_condition_code (rtx comparison)
22941 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22942 gcc_assert (code != ARM_NV);
22943 return code;
22946 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22947 code registers when not targetting Thumb1. The VFP condition register
22948 only exists when generating hard-float code. */
22949 static bool
22950 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22952 if (!TARGET_32BIT)
22953 return false;
22955 *p1 = CC_REGNUM;
22956 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22957 return true;
22960 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22961 instructions. */
22962 void
22963 thumb2_final_prescan_insn (rtx_insn *insn)
22965 rtx_insn *first_insn = insn;
22966 rtx body = PATTERN (insn);
22967 rtx predicate;
22968 enum arm_cond_code code;
22969 int n;
22970 int mask;
22971 int max;
22973 /* max_insns_skipped in the tune was already taken into account in the
22974 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22975 just emit the IT blocks as we can. It does not make sense to split
22976 the IT blocks. */
22977 max = MAX_INSN_PER_IT_BLOCK;
22979 /* Remove the previous insn from the count of insns to be output. */
22980 if (arm_condexec_count)
22981 arm_condexec_count--;
22983 /* Nothing to do if we are already inside a conditional block. */
22984 if (arm_condexec_count)
22985 return;
22987 if (GET_CODE (body) != COND_EXEC)
22988 return;
22990 /* Conditional jumps are implemented directly. */
22991 if (JUMP_P (insn))
22992 return;
22994 predicate = COND_EXEC_TEST (body);
22995 arm_current_cc = get_arm_condition_code (predicate);
22997 n = get_attr_ce_count (insn);
22998 arm_condexec_count = 1;
22999 arm_condexec_mask = (1 << n) - 1;
23000 arm_condexec_masklen = n;
23001 /* See if subsequent instructions can be combined into the same block. */
23002 for (;;)
23004 insn = next_nonnote_insn (insn);
23006 /* Jumping into the middle of an IT block is illegal, so a label or
23007 barrier terminates the block. */
23008 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23009 break;
23011 body = PATTERN (insn);
23012 /* USE and CLOBBER aren't really insns, so just skip them. */
23013 if (GET_CODE (body) == USE
23014 || GET_CODE (body) == CLOBBER)
23015 continue;
23017 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23018 if (GET_CODE (body) != COND_EXEC)
23019 break;
23020 /* Maximum number of conditionally executed instructions in a block. */
23021 n = get_attr_ce_count (insn);
23022 if (arm_condexec_masklen + n > max)
23023 break;
23025 predicate = COND_EXEC_TEST (body);
23026 code = get_arm_condition_code (predicate);
23027 mask = (1 << n) - 1;
23028 if (arm_current_cc == code)
23029 arm_condexec_mask |= (mask << arm_condexec_masklen);
23030 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23031 break;
23033 arm_condexec_count++;
23034 arm_condexec_masklen += n;
23036 /* A jump must be the last instruction in a conditional block. */
23037 if (JUMP_P (insn))
23038 break;
23040 /* Restore recog_data (getting the attributes of other insns can
23041 destroy this array, but final.c assumes that it remains intact
23042 across this call). */
23043 extract_constrain_insn_cached (first_insn);
23046 void
23047 arm_final_prescan_insn (rtx_insn *insn)
23049 /* BODY will hold the body of INSN. */
23050 rtx body = PATTERN (insn);
23052 /* This will be 1 if trying to repeat the trick, and things need to be
23053 reversed if it appears to fail. */
23054 int reverse = 0;
23056 /* If we start with a return insn, we only succeed if we find another one. */
23057 int seeking_return = 0;
23058 enum rtx_code return_code = UNKNOWN;
23060 /* START_INSN will hold the insn from where we start looking. This is the
23061 first insn after the following code_label if REVERSE is true. */
23062 rtx_insn *start_insn = insn;
23064 /* If in state 4, check if the target branch is reached, in order to
23065 change back to state 0. */
23066 if (arm_ccfsm_state == 4)
23068 if (insn == arm_target_insn)
23070 arm_target_insn = NULL;
23071 arm_ccfsm_state = 0;
23073 return;
23076 /* If in state 3, it is possible to repeat the trick, if this insn is an
23077 unconditional branch to a label, and immediately following this branch
23078 is the previous target label which is only used once, and the label this
23079 branch jumps to is not too far off. */
23080 if (arm_ccfsm_state == 3)
23082 if (simplejump_p (insn))
23084 start_insn = next_nonnote_insn (start_insn);
23085 if (BARRIER_P (start_insn))
23087 /* XXX Isn't this always a barrier? */
23088 start_insn = next_nonnote_insn (start_insn);
23090 if (LABEL_P (start_insn)
23091 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23092 && LABEL_NUSES (start_insn) == 1)
23093 reverse = TRUE;
23094 else
23095 return;
23097 else if (ANY_RETURN_P (body))
23099 start_insn = next_nonnote_insn (start_insn);
23100 if (BARRIER_P (start_insn))
23101 start_insn = next_nonnote_insn (start_insn);
23102 if (LABEL_P (start_insn)
23103 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23104 && LABEL_NUSES (start_insn) == 1)
23106 reverse = TRUE;
23107 seeking_return = 1;
23108 return_code = GET_CODE (body);
23110 else
23111 return;
23113 else
23114 return;
23117 gcc_assert (!arm_ccfsm_state || reverse);
23118 if (!JUMP_P (insn))
23119 return;
23121 /* This jump might be paralleled with a clobber of the condition codes
23122 the jump should always come first */
23123 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23124 body = XVECEXP (body, 0, 0);
23126 if (reverse
23127 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23128 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23130 int insns_skipped;
23131 int fail = FALSE, succeed = FALSE;
23132 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23133 int then_not_else = TRUE;
23134 rtx_insn *this_insn = start_insn;
23135 rtx label = 0;
23137 /* Register the insn jumped to. */
23138 if (reverse)
23140 if (!seeking_return)
23141 label = XEXP (SET_SRC (body), 0);
23143 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23144 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23145 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23147 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23148 then_not_else = FALSE;
23150 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23152 seeking_return = 1;
23153 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23155 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23157 seeking_return = 1;
23158 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23159 then_not_else = FALSE;
23161 else
23162 gcc_unreachable ();
23164 /* See how many insns this branch skips, and what kind of insns. If all
23165 insns are okay, and the label or unconditional branch to the same
23166 label is not too far away, succeed. */
23167 for (insns_skipped = 0;
23168 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23170 rtx scanbody;
23172 this_insn = next_nonnote_insn (this_insn);
23173 if (!this_insn)
23174 break;
23176 switch (GET_CODE (this_insn))
23178 case CODE_LABEL:
23179 /* Succeed if it is the target label, otherwise fail since
23180 control falls in from somewhere else. */
23181 if (this_insn == label)
23183 arm_ccfsm_state = 1;
23184 succeed = TRUE;
23186 else
23187 fail = TRUE;
23188 break;
23190 case BARRIER:
23191 /* Succeed if the following insn is the target label.
23192 Otherwise fail.
23193 If return insns are used then the last insn in a function
23194 will be a barrier. */
23195 this_insn = next_nonnote_insn (this_insn);
23196 if (this_insn && this_insn == label)
23198 arm_ccfsm_state = 1;
23199 succeed = TRUE;
23201 else
23202 fail = TRUE;
23203 break;
23205 case CALL_INSN:
23206 /* The AAPCS says that conditional calls should not be
23207 used since they make interworking inefficient (the
23208 linker can't transform BL<cond> into BLX). That's
23209 only a problem if the machine has BLX. */
23210 if (arm_arch5)
23212 fail = TRUE;
23213 break;
23216 /* Succeed if the following insn is the target label, or
23217 if the following two insns are a barrier and the
23218 target label. */
23219 this_insn = next_nonnote_insn (this_insn);
23220 if (this_insn && BARRIER_P (this_insn))
23221 this_insn = next_nonnote_insn (this_insn);
23223 if (this_insn && this_insn == label
23224 && insns_skipped < max_insns_skipped)
23226 arm_ccfsm_state = 1;
23227 succeed = TRUE;
23229 else
23230 fail = TRUE;
23231 break;
23233 case JUMP_INSN:
23234 /* If this is an unconditional branch to the same label, succeed.
23235 If it is to another label, do nothing. If it is conditional,
23236 fail. */
23237 /* XXX Probably, the tests for SET and the PC are
23238 unnecessary. */
23240 scanbody = PATTERN (this_insn);
23241 if (GET_CODE (scanbody) == SET
23242 && GET_CODE (SET_DEST (scanbody)) == PC)
23244 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23245 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23247 arm_ccfsm_state = 2;
23248 succeed = TRUE;
23250 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23251 fail = TRUE;
23253 /* Fail if a conditional return is undesirable (e.g. on a
23254 StrongARM), but still allow this if optimizing for size. */
23255 else if (GET_CODE (scanbody) == return_code
23256 && !use_return_insn (TRUE, NULL)
23257 && !optimize_size)
23258 fail = TRUE;
23259 else if (GET_CODE (scanbody) == return_code)
23261 arm_ccfsm_state = 2;
23262 succeed = TRUE;
23264 else if (GET_CODE (scanbody) == PARALLEL)
23266 switch (get_attr_conds (this_insn))
23268 case CONDS_NOCOND:
23269 break;
23270 default:
23271 fail = TRUE;
23272 break;
23275 else
23276 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23278 break;
23280 case INSN:
23281 /* Instructions using or affecting the condition codes make it
23282 fail. */
23283 scanbody = PATTERN (this_insn);
23284 if (!(GET_CODE (scanbody) == SET
23285 || GET_CODE (scanbody) == PARALLEL)
23286 || get_attr_conds (this_insn) != CONDS_NOCOND)
23287 fail = TRUE;
23288 break;
23290 default:
23291 break;
23294 if (succeed)
23296 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23297 arm_target_label = CODE_LABEL_NUMBER (label);
23298 else
23300 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23302 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23304 this_insn = next_nonnote_insn (this_insn);
23305 gcc_assert (!this_insn
23306 || (!BARRIER_P (this_insn)
23307 && !LABEL_P (this_insn)));
23309 if (!this_insn)
23311 /* Oh, dear! we ran off the end.. give up. */
23312 extract_constrain_insn_cached (insn);
23313 arm_ccfsm_state = 0;
23314 arm_target_insn = NULL;
23315 return;
23317 arm_target_insn = this_insn;
23320 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23321 what it was. */
23322 if (!reverse)
23323 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23325 if (reverse || then_not_else)
23326 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23329 /* Restore recog_data (getting the attributes of other insns can
23330 destroy this array, but final.c assumes that it remains intact
23331 across this call. */
23332 extract_constrain_insn_cached (insn);
23336 /* Output IT instructions. */
23337 void
23338 thumb2_asm_output_opcode (FILE * stream)
23340 char buff[5];
23341 int n;
23343 if (arm_condexec_mask)
23345 for (n = 0; n < arm_condexec_masklen; n++)
23346 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23347 buff[n] = 0;
23348 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23349 arm_condition_codes[arm_current_cc]);
23350 arm_condexec_mask = 0;
23354 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23355 UNITS_PER_WORD bytes wide. */
23356 static unsigned int
23357 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23359 if (TARGET_32BIT
23360 && regno > PC_REGNUM
23361 && regno != FRAME_POINTER_REGNUM
23362 && regno != ARG_POINTER_REGNUM
23363 && !IS_VFP_REGNUM (regno))
23364 return 1;
23366 return ARM_NUM_REGS (mode);
23369 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23370 static bool
23371 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23373 if (GET_MODE_CLASS (mode) == MODE_CC)
23374 return (regno == CC_REGNUM
23375 || (TARGET_HARD_FLOAT
23376 && regno == VFPCC_REGNUM));
23378 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23379 return false;
23381 if (TARGET_THUMB1)
23382 /* For the Thumb we only allow values bigger than SImode in
23383 registers 0 - 6, so that there is always a second low
23384 register available to hold the upper part of the value.
23385 We probably we ought to ensure that the register is the
23386 start of an even numbered register pair. */
23387 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23389 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23391 if (mode == SFmode || mode == SImode)
23392 return VFP_REGNO_OK_FOR_SINGLE (regno);
23394 if (mode == DFmode)
23395 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23397 if (mode == HFmode)
23398 return VFP_REGNO_OK_FOR_SINGLE (regno);
23400 /* VFP registers can hold HImode values. */
23401 if (mode == HImode)
23402 return VFP_REGNO_OK_FOR_SINGLE (regno);
23404 if (TARGET_NEON)
23405 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23406 || (VALID_NEON_QREG_MODE (mode)
23407 && NEON_REGNO_OK_FOR_QUAD (regno))
23408 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23409 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23410 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23411 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23412 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23414 return false;
23417 if (TARGET_REALLY_IWMMXT)
23419 if (IS_IWMMXT_GR_REGNUM (regno))
23420 return mode == SImode;
23422 if (IS_IWMMXT_REGNUM (regno))
23423 return VALID_IWMMXT_REG_MODE (mode);
23426 /* We allow almost any value to be stored in the general registers.
23427 Restrict doubleword quantities to even register pairs in ARM state
23428 so that we can use ldrd. Do not allow very large Neon structure
23429 opaque modes in general registers; they would use too many. */
23430 if (regno <= LAST_ARM_REGNUM)
23432 if (ARM_NUM_REGS (mode) > 4)
23433 return false;
23435 if (TARGET_THUMB2)
23436 return true;
23438 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23441 if (regno == FRAME_POINTER_REGNUM
23442 || regno == ARG_POINTER_REGNUM)
23443 /* We only allow integers in the fake hard registers. */
23444 return GET_MODE_CLASS (mode) == MODE_INT;
23446 return false;
23449 /* Implement TARGET_MODES_TIEABLE_P. */
23451 static bool
23452 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23454 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23455 return true;
23457 /* We specifically want to allow elements of "structure" modes to
23458 be tieable to the structure. This more general condition allows
23459 other rarer situations too. */
23460 if (TARGET_NEON
23461 && (VALID_NEON_DREG_MODE (mode1)
23462 || VALID_NEON_QREG_MODE (mode1)
23463 || VALID_NEON_STRUCT_MODE (mode1))
23464 && (VALID_NEON_DREG_MODE (mode2)
23465 || VALID_NEON_QREG_MODE (mode2)
23466 || VALID_NEON_STRUCT_MODE (mode2)))
23467 return true;
23469 return false;
23472 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23473 not used in arm mode. */
23475 enum reg_class
23476 arm_regno_class (int regno)
23478 if (regno == PC_REGNUM)
23479 return NO_REGS;
23481 if (TARGET_THUMB1)
23483 if (regno == STACK_POINTER_REGNUM)
23484 return STACK_REG;
23485 if (regno == CC_REGNUM)
23486 return CC_REG;
23487 if (regno < 8)
23488 return LO_REGS;
23489 return HI_REGS;
23492 if (TARGET_THUMB2 && regno < 8)
23493 return LO_REGS;
23495 if ( regno <= LAST_ARM_REGNUM
23496 || regno == FRAME_POINTER_REGNUM
23497 || regno == ARG_POINTER_REGNUM)
23498 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23500 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23501 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23503 if (IS_VFP_REGNUM (regno))
23505 if (regno <= D7_VFP_REGNUM)
23506 return VFP_D0_D7_REGS;
23507 else if (regno <= LAST_LO_VFP_REGNUM)
23508 return VFP_LO_REGS;
23509 else
23510 return VFP_HI_REGS;
23513 if (IS_IWMMXT_REGNUM (regno))
23514 return IWMMXT_REGS;
23516 if (IS_IWMMXT_GR_REGNUM (regno))
23517 return IWMMXT_GR_REGS;
23519 return NO_REGS;
23522 /* Handle a special case when computing the offset
23523 of an argument from the frame pointer. */
23525 arm_debugger_arg_offset (int value, rtx addr)
23527 rtx_insn *insn;
23529 /* We are only interested if dbxout_parms() failed to compute the offset. */
23530 if (value != 0)
23531 return 0;
23533 /* We can only cope with the case where the address is held in a register. */
23534 if (!REG_P (addr))
23535 return 0;
23537 /* If we are using the frame pointer to point at the argument, then
23538 an offset of 0 is correct. */
23539 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23540 return 0;
23542 /* If we are using the stack pointer to point at the
23543 argument, then an offset of 0 is correct. */
23544 /* ??? Check this is consistent with thumb2 frame layout. */
23545 if ((TARGET_THUMB || !frame_pointer_needed)
23546 && REGNO (addr) == SP_REGNUM)
23547 return 0;
23549 /* Oh dear. The argument is pointed to by a register rather
23550 than being held in a register, or being stored at a known
23551 offset from the frame pointer. Since GDB only understands
23552 those two kinds of argument we must translate the address
23553 held in the register into an offset from the frame pointer.
23554 We do this by searching through the insns for the function
23555 looking to see where this register gets its value. If the
23556 register is initialized from the frame pointer plus an offset
23557 then we are in luck and we can continue, otherwise we give up.
23559 This code is exercised by producing debugging information
23560 for a function with arguments like this:
23562 double func (double a, double b, int c, double d) {return d;}
23564 Without this code the stab for parameter 'd' will be set to
23565 an offset of 0 from the frame pointer, rather than 8. */
23567 /* The if() statement says:
23569 If the insn is a normal instruction
23570 and if the insn is setting the value in a register
23571 and if the register being set is the register holding the address of the argument
23572 and if the address is computing by an addition
23573 that involves adding to a register
23574 which is the frame pointer
23575 a constant integer
23577 then... */
23579 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23581 if ( NONJUMP_INSN_P (insn)
23582 && GET_CODE (PATTERN (insn)) == SET
23583 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23584 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23585 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23586 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23587 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23590 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23592 break;
23596 if (value == 0)
23598 debug_rtx (addr);
23599 warning (0, "unable to compute real location of stacked parameter");
23600 value = 8; /* XXX magic hack */
23603 return value;
23606 /* Implement TARGET_PROMOTED_TYPE. */
23608 static tree
23609 arm_promoted_type (const_tree t)
23611 if (SCALAR_FLOAT_TYPE_P (t)
23612 && TYPE_PRECISION (t) == 16
23613 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23614 return float_type_node;
23615 return NULL_TREE;
23618 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23619 This simply adds HFmode as a supported mode; even though we don't
23620 implement arithmetic on this type directly, it's supported by
23621 optabs conversions, much the way the double-word arithmetic is
23622 special-cased in the default hook. */
23624 static bool
23625 arm_scalar_mode_supported_p (scalar_mode mode)
23627 if (mode == HFmode)
23628 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23629 else if (ALL_FIXED_POINT_MODE_P (mode))
23630 return true;
23631 else
23632 return default_scalar_mode_supported_p (mode);
23635 /* Set the value of FLT_EVAL_METHOD.
23636 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23638 0: evaluate all operations and constants, whose semantic type has at
23639 most the range and precision of type float, to the range and
23640 precision of float; evaluate all other operations and constants to
23641 the range and precision of the semantic type;
23643 N, where _FloatN is a supported interchange floating type
23644 evaluate all operations and constants, whose semantic type has at
23645 most the range and precision of _FloatN type, to the range and
23646 precision of the _FloatN type; evaluate all other operations and
23647 constants to the range and precision of the semantic type;
23649 If we have the ARMv8.2-A extensions then we support _Float16 in native
23650 precision, so we should set this to 16. Otherwise, we support the type,
23651 but want to evaluate expressions in float precision, so set this to
23652 0. */
23654 static enum flt_eval_method
23655 arm_excess_precision (enum excess_precision_type type)
23657 switch (type)
23659 case EXCESS_PRECISION_TYPE_FAST:
23660 case EXCESS_PRECISION_TYPE_STANDARD:
23661 /* We can calculate either in 16-bit range and precision or
23662 32-bit range and precision. Make that decision based on whether
23663 we have native support for the ARMv8.2-A 16-bit floating-point
23664 instructions or not. */
23665 return (TARGET_VFP_FP16INST
23666 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23667 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23668 case EXCESS_PRECISION_TYPE_IMPLICIT:
23669 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23670 default:
23671 gcc_unreachable ();
23673 return FLT_EVAL_METHOD_UNPREDICTABLE;
23677 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23678 _Float16 if we are using anything other than ieee format for 16-bit
23679 floating point. Otherwise, punt to the default implementation. */
23680 static opt_scalar_float_mode
23681 arm_floatn_mode (int n, bool extended)
23683 if (!extended && n == 16)
23685 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23686 return HFmode;
23687 return opt_scalar_float_mode ();
23690 return default_floatn_mode (n, extended);
23694 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23695 not to early-clobber SRC registers in the process.
23697 We assume that the operands described by SRC and DEST represent a
23698 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23699 number of components into which the copy has been decomposed. */
23700 void
23701 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23703 unsigned int i;
23705 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23706 || REGNO (operands[0]) < REGNO (operands[1]))
23708 for (i = 0; i < count; i++)
23710 operands[2 * i] = dest[i];
23711 operands[2 * i + 1] = src[i];
23714 else
23716 for (i = 0; i < count; i++)
23718 operands[2 * i] = dest[count - i - 1];
23719 operands[2 * i + 1] = src[count - i - 1];
23724 /* Split operands into moves from op[1] + op[2] into op[0]. */
23726 void
23727 neon_split_vcombine (rtx operands[3])
23729 unsigned int dest = REGNO (operands[0]);
23730 unsigned int src1 = REGNO (operands[1]);
23731 unsigned int src2 = REGNO (operands[2]);
23732 machine_mode halfmode = GET_MODE (operands[1]);
23733 unsigned int halfregs = REG_NREGS (operands[1]);
23734 rtx destlo, desthi;
23736 if (src1 == dest && src2 == dest + halfregs)
23738 /* No-op move. Can't split to nothing; emit something. */
23739 emit_note (NOTE_INSN_DELETED);
23740 return;
23743 /* Preserve register attributes for variable tracking. */
23744 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23745 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23746 GET_MODE_SIZE (halfmode));
23748 /* Special case of reversed high/low parts. Use VSWP. */
23749 if (src2 == dest && src1 == dest + halfregs)
23751 rtx x = gen_rtx_SET (destlo, operands[1]);
23752 rtx y = gen_rtx_SET (desthi, operands[2]);
23753 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23754 return;
23757 if (!reg_overlap_mentioned_p (operands[2], destlo))
23759 /* Try to avoid unnecessary moves if part of the result
23760 is in the right place already. */
23761 if (src1 != dest)
23762 emit_move_insn (destlo, operands[1]);
23763 if (src2 != dest + halfregs)
23764 emit_move_insn (desthi, operands[2]);
23766 else
23768 if (src2 != dest + halfregs)
23769 emit_move_insn (desthi, operands[2]);
23770 if (src1 != dest)
23771 emit_move_insn (destlo, operands[1]);
23775 /* Return the number (counting from 0) of
23776 the least significant set bit in MASK. */
23778 inline static int
23779 number_of_first_bit_set (unsigned mask)
23781 return ctz_hwi (mask);
23784 /* Like emit_multi_reg_push, but allowing for a different set of
23785 registers to be described as saved. MASK is the set of registers
23786 to be saved; REAL_REGS is the set of registers to be described as
23787 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23789 static rtx_insn *
23790 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23792 unsigned long regno;
23793 rtx par[10], tmp, reg;
23794 rtx_insn *insn;
23795 int i, j;
23797 /* Build the parallel of the registers actually being stored. */
23798 for (i = 0; mask; ++i, mask &= mask - 1)
23800 regno = ctz_hwi (mask);
23801 reg = gen_rtx_REG (SImode, regno);
23803 if (i == 0)
23804 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23805 else
23806 tmp = gen_rtx_USE (VOIDmode, reg);
23808 par[i] = tmp;
23811 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23812 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23813 tmp = gen_frame_mem (BLKmode, tmp);
23814 tmp = gen_rtx_SET (tmp, par[0]);
23815 par[0] = tmp;
23817 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23818 insn = emit_insn (tmp);
23820 /* Always build the stack adjustment note for unwind info. */
23821 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23822 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23823 par[0] = tmp;
23825 /* Build the parallel of the registers recorded as saved for unwind. */
23826 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23828 regno = ctz_hwi (real_regs);
23829 reg = gen_rtx_REG (SImode, regno);
23831 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23832 tmp = gen_frame_mem (SImode, tmp);
23833 tmp = gen_rtx_SET (tmp, reg);
23834 RTX_FRAME_RELATED_P (tmp) = 1;
23835 par[j + 1] = tmp;
23838 if (j == 0)
23839 tmp = par[0];
23840 else
23842 RTX_FRAME_RELATED_P (par[0]) = 1;
23843 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23846 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23848 return insn;
23851 /* Emit code to push or pop registers to or from the stack. F is the
23852 assembly file. MASK is the registers to pop. */
23853 static void
23854 thumb_pop (FILE *f, unsigned long mask)
23856 int regno;
23857 int lo_mask = mask & 0xFF;
23859 gcc_assert (mask);
23861 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23863 /* Special case. Do not generate a POP PC statement here, do it in
23864 thumb_exit() */
23865 thumb_exit (f, -1);
23866 return;
23869 fprintf (f, "\tpop\t{");
23871 /* Look at the low registers first. */
23872 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23874 if (lo_mask & 1)
23876 asm_fprintf (f, "%r", regno);
23878 if ((lo_mask & ~1) != 0)
23879 fprintf (f, ", ");
23883 if (mask & (1 << PC_REGNUM))
23885 /* Catch popping the PC. */
23886 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23887 || IS_CMSE_ENTRY (arm_current_func_type ()))
23889 /* The PC is never poped directly, instead
23890 it is popped into r3 and then BX is used. */
23891 fprintf (f, "}\n");
23893 thumb_exit (f, -1);
23895 return;
23897 else
23899 if (mask & 0xFF)
23900 fprintf (f, ", ");
23902 asm_fprintf (f, "%r", PC_REGNUM);
23906 fprintf (f, "}\n");
23909 /* Generate code to return from a thumb function.
23910 If 'reg_containing_return_addr' is -1, then the return address is
23911 actually on the stack, at the stack pointer. */
23912 static void
23913 thumb_exit (FILE *f, int reg_containing_return_addr)
23915 unsigned regs_available_for_popping;
23916 unsigned regs_to_pop;
23917 int pops_needed;
23918 unsigned available;
23919 unsigned required;
23920 machine_mode mode;
23921 int size;
23922 int restore_a4 = FALSE;
23924 /* Compute the registers we need to pop. */
23925 regs_to_pop = 0;
23926 pops_needed = 0;
23928 if (reg_containing_return_addr == -1)
23930 regs_to_pop |= 1 << LR_REGNUM;
23931 ++pops_needed;
23934 if (TARGET_BACKTRACE)
23936 /* Restore the (ARM) frame pointer and stack pointer. */
23937 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23938 pops_needed += 2;
23941 /* If there is nothing to pop then just emit the BX instruction and
23942 return. */
23943 if (pops_needed == 0)
23945 if (crtl->calls_eh_return)
23946 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23948 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23950 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23951 reg_containing_return_addr);
23952 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23954 else
23955 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23956 return;
23958 /* Otherwise if we are not supporting interworking and we have not created
23959 a backtrace structure and the function was not entered in ARM mode then
23960 just pop the return address straight into the PC. */
23961 else if (!TARGET_INTERWORK
23962 && !TARGET_BACKTRACE
23963 && !is_called_in_ARM_mode (current_function_decl)
23964 && !crtl->calls_eh_return
23965 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23967 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23968 return;
23971 /* Find out how many of the (return) argument registers we can corrupt. */
23972 regs_available_for_popping = 0;
23974 /* If returning via __builtin_eh_return, the bottom three registers
23975 all contain information needed for the return. */
23976 if (crtl->calls_eh_return)
23977 size = 12;
23978 else
23980 /* If we can deduce the registers used from the function's
23981 return value. This is more reliable that examining
23982 df_regs_ever_live_p () because that will be set if the register is
23983 ever used in the function, not just if the register is used
23984 to hold a return value. */
23986 if (crtl->return_rtx != 0)
23987 mode = GET_MODE (crtl->return_rtx);
23988 else
23989 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23991 size = GET_MODE_SIZE (mode);
23993 if (size == 0)
23995 /* In a void function we can use any argument register.
23996 In a function that returns a structure on the stack
23997 we can use the second and third argument registers. */
23998 if (mode == VOIDmode)
23999 regs_available_for_popping =
24000 (1 << ARG_REGISTER (1))
24001 | (1 << ARG_REGISTER (2))
24002 | (1 << ARG_REGISTER (3));
24003 else
24004 regs_available_for_popping =
24005 (1 << ARG_REGISTER (2))
24006 | (1 << ARG_REGISTER (3));
24008 else if (size <= 4)
24009 regs_available_for_popping =
24010 (1 << ARG_REGISTER (2))
24011 | (1 << ARG_REGISTER (3));
24012 else if (size <= 8)
24013 regs_available_for_popping =
24014 (1 << ARG_REGISTER (3));
24017 /* Match registers to be popped with registers into which we pop them. */
24018 for (available = regs_available_for_popping,
24019 required = regs_to_pop;
24020 required != 0 && available != 0;
24021 available &= ~(available & - available),
24022 required &= ~(required & - required))
24023 -- pops_needed;
24025 /* If we have any popping registers left over, remove them. */
24026 if (available > 0)
24027 regs_available_for_popping &= ~available;
24029 /* Otherwise if we need another popping register we can use
24030 the fourth argument register. */
24031 else if (pops_needed)
24033 /* If we have not found any free argument registers and
24034 reg a4 contains the return address, we must move it. */
24035 if (regs_available_for_popping == 0
24036 && reg_containing_return_addr == LAST_ARG_REGNUM)
24038 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24039 reg_containing_return_addr = LR_REGNUM;
24041 else if (size > 12)
24043 /* Register a4 is being used to hold part of the return value,
24044 but we have dire need of a free, low register. */
24045 restore_a4 = TRUE;
24047 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24050 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24052 /* The fourth argument register is available. */
24053 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24055 --pops_needed;
24059 /* Pop as many registers as we can. */
24060 thumb_pop (f, regs_available_for_popping);
24062 /* Process the registers we popped. */
24063 if (reg_containing_return_addr == -1)
24065 /* The return address was popped into the lowest numbered register. */
24066 regs_to_pop &= ~(1 << LR_REGNUM);
24068 reg_containing_return_addr =
24069 number_of_first_bit_set (regs_available_for_popping);
24071 /* Remove this register for the mask of available registers, so that
24072 the return address will not be corrupted by further pops. */
24073 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24076 /* If we popped other registers then handle them here. */
24077 if (regs_available_for_popping)
24079 int frame_pointer;
24081 /* Work out which register currently contains the frame pointer. */
24082 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24084 /* Move it into the correct place. */
24085 asm_fprintf (f, "\tmov\t%r, %r\n",
24086 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24088 /* (Temporarily) remove it from the mask of popped registers. */
24089 regs_available_for_popping &= ~(1 << frame_pointer);
24090 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24092 if (regs_available_for_popping)
24094 int stack_pointer;
24096 /* We popped the stack pointer as well,
24097 find the register that contains it. */
24098 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24100 /* Move it into the stack register. */
24101 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24103 /* At this point we have popped all necessary registers, so
24104 do not worry about restoring regs_available_for_popping
24105 to its correct value:
24107 assert (pops_needed == 0)
24108 assert (regs_available_for_popping == (1 << frame_pointer))
24109 assert (regs_to_pop == (1 << STACK_POINTER)) */
24111 else
24113 /* Since we have just move the popped value into the frame
24114 pointer, the popping register is available for reuse, and
24115 we know that we still have the stack pointer left to pop. */
24116 regs_available_for_popping |= (1 << frame_pointer);
24120 /* If we still have registers left on the stack, but we no longer have
24121 any registers into which we can pop them, then we must move the return
24122 address into the link register and make available the register that
24123 contained it. */
24124 if (regs_available_for_popping == 0 && pops_needed > 0)
24126 regs_available_for_popping |= 1 << reg_containing_return_addr;
24128 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24129 reg_containing_return_addr);
24131 reg_containing_return_addr = LR_REGNUM;
24134 /* If we have registers left on the stack then pop some more.
24135 We know that at most we will want to pop FP and SP. */
24136 if (pops_needed > 0)
24138 int popped_into;
24139 int move_to;
24141 thumb_pop (f, regs_available_for_popping);
24143 /* We have popped either FP or SP.
24144 Move whichever one it is into the correct register. */
24145 popped_into = number_of_first_bit_set (regs_available_for_popping);
24146 move_to = number_of_first_bit_set (regs_to_pop);
24148 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24149 --pops_needed;
24152 /* If we still have not popped everything then we must have only
24153 had one register available to us and we are now popping the SP. */
24154 if (pops_needed > 0)
24156 int popped_into;
24158 thumb_pop (f, regs_available_for_popping);
24160 popped_into = number_of_first_bit_set (regs_available_for_popping);
24162 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24164 assert (regs_to_pop == (1 << STACK_POINTER))
24165 assert (pops_needed == 1)
24169 /* If necessary restore the a4 register. */
24170 if (restore_a4)
24172 if (reg_containing_return_addr != LR_REGNUM)
24174 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24175 reg_containing_return_addr = LR_REGNUM;
24178 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24181 if (crtl->calls_eh_return)
24182 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24184 /* Return to caller. */
24185 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24187 /* This is for the cases where LR is not being used to contain the return
24188 address. It may therefore contain information that we might not want
24189 to leak, hence it must be cleared. The value in R0 will never be a
24190 secret at this point, so it is safe to use it, see the clearing code
24191 in 'cmse_nonsecure_entry_clear_before_return'. */
24192 if (reg_containing_return_addr != LR_REGNUM)
24193 asm_fprintf (f, "\tmov\tlr, r0\n");
24195 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24196 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24198 else
24199 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24202 /* Scan INSN just before assembler is output for it.
24203 For Thumb-1, we track the status of the condition codes; this
24204 information is used in the cbranchsi4_insn pattern. */
24205 void
24206 thumb1_final_prescan_insn (rtx_insn *insn)
24208 if (flag_print_asm_name)
24209 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24210 INSN_ADDRESSES (INSN_UID (insn)));
24211 /* Don't overwrite the previous setter when we get to a cbranch. */
24212 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24214 enum attr_conds conds;
24216 if (cfun->machine->thumb1_cc_insn)
24218 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24219 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24220 CC_STATUS_INIT;
24222 conds = get_attr_conds (insn);
24223 if (conds == CONDS_SET)
24225 rtx set = single_set (insn);
24226 cfun->machine->thumb1_cc_insn = insn;
24227 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24228 cfun->machine->thumb1_cc_op1 = const0_rtx;
24229 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24230 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24232 rtx src1 = XEXP (SET_SRC (set), 1);
24233 if (src1 == const0_rtx)
24234 cfun->machine->thumb1_cc_mode = CCmode;
24236 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24238 /* Record the src register operand instead of dest because
24239 cprop_hardreg pass propagates src. */
24240 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24243 else if (conds != CONDS_NOCOND)
24244 cfun->machine->thumb1_cc_insn = NULL_RTX;
24247 /* Check if unexpected far jump is used. */
24248 if (cfun->machine->lr_save_eliminated
24249 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24250 internal_error("Unexpected thumb1 far jump");
24254 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24256 unsigned HOST_WIDE_INT mask = 0xff;
24257 int i;
24259 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24260 if (val == 0) /* XXX */
24261 return 0;
24263 for (i = 0; i < 25; i++)
24264 if ((val & (mask << i)) == val)
24265 return 1;
24267 return 0;
24270 /* Returns nonzero if the current function contains,
24271 or might contain a far jump. */
24272 static int
24273 thumb_far_jump_used_p (void)
24275 rtx_insn *insn;
24276 bool far_jump = false;
24277 unsigned int func_size = 0;
24279 /* If we have already decided that far jumps may be used,
24280 do not bother checking again, and always return true even if
24281 it turns out that they are not being used. Once we have made
24282 the decision that far jumps are present (and that hence the link
24283 register will be pushed onto the stack) we cannot go back on it. */
24284 if (cfun->machine->far_jump_used)
24285 return 1;
24287 /* If this function is not being called from the prologue/epilogue
24288 generation code then it must be being called from the
24289 INITIAL_ELIMINATION_OFFSET macro. */
24290 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24292 /* In this case we know that we are being asked about the elimination
24293 of the arg pointer register. If that register is not being used,
24294 then there are no arguments on the stack, and we do not have to
24295 worry that a far jump might force the prologue to push the link
24296 register, changing the stack offsets. In this case we can just
24297 return false, since the presence of far jumps in the function will
24298 not affect stack offsets.
24300 If the arg pointer is live (or if it was live, but has now been
24301 eliminated and so set to dead) then we do have to test to see if
24302 the function might contain a far jump. This test can lead to some
24303 false negatives, since before reload is completed, then length of
24304 branch instructions is not known, so gcc defaults to returning their
24305 longest length, which in turn sets the far jump attribute to true.
24307 A false negative will not result in bad code being generated, but it
24308 will result in a needless push and pop of the link register. We
24309 hope that this does not occur too often.
24311 If we need doubleword stack alignment this could affect the other
24312 elimination offsets so we can't risk getting it wrong. */
24313 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24314 cfun->machine->arg_pointer_live = 1;
24315 else if (!cfun->machine->arg_pointer_live)
24316 return 0;
24319 /* We should not change far_jump_used during or after reload, as there is
24320 no chance to change stack frame layout. */
24321 if (reload_in_progress || reload_completed)
24322 return 0;
24324 /* Check to see if the function contains a branch
24325 insn with the far jump attribute set. */
24326 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24328 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24330 far_jump = true;
24332 func_size += get_attr_length (insn);
24335 /* Attribute far_jump will always be true for thumb1 before
24336 shorten_branch pass. So checking far_jump attribute before
24337 shorten_branch isn't much useful.
24339 Following heuristic tries to estimate more accurately if a far jump
24340 may finally be used. The heuristic is very conservative as there is
24341 no chance to roll-back the decision of not to use far jump.
24343 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24344 2-byte insn is associated with a 4 byte constant pool. Using
24345 function size 2048/3 as the threshold is conservative enough. */
24346 if (far_jump)
24348 if ((func_size * 3) >= 2048)
24350 /* Record the fact that we have decided that
24351 the function does use far jumps. */
24352 cfun->machine->far_jump_used = 1;
24353 return 1;
24357 return 0;
24360 /* Return nonzero if FUNC must be entered in ARM mode. */
24361 static bool
24362 is_called_in_ARM_mode (tree func)
24364 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24366 /* Ignore the problem about functions whose address is taken. */
24367 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24368 return true;
24370 #ifdef ARM_PE
24371 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24372 #else
24373 return false;
24374 #endif
24377 /* Given the stack offsets and register mask in OFFSETS, decide how
24378 many additional registers to push instead of subtracting a constant
24379 from SP. For epilogues the principle is the same except we use pop.
24380 FOR_PROLOGUE indicates which we're generating. */
24381 static int
24382 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24384 HOST_WIDE_INT amount;
24385 unsigned long live_regs_mask = offsets->saved_regs_mask;
24386 /* Extract a mask of the ones we can give to the Thumb's push/pop
24387 instruction. */
24388 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24389 /* Then count how many other high registers will need to be pushed. */
24390 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24391 int n_free, reg_base, size;
24393 if (!for_prologue && frame_pointer_needed)
24394 amount = offsets->locals_base - offsets->saved_regs;
24395 else
24396 amount = offsets->outgoing_args - offsets->saved_regs;
24398 /* If the stack frame size is 512 exactly, we can save one load
24399 instruction, which should make this a win even when optimizing
24400 for speed. */
24401 if (!optimize_size && amount != 512)
24402 return 0;
24404 /* Can't do this if there are high registers to push. */
24405 if (high_regs_pushed != 0)
24406 return 0;
24408 /* Shouldn't do it in the prologue if no registers would normally
24409 be pushed at all. In the epilogue, also allow it if we'll have
24410 a pop insn for the PC. */
24411 if (l_mask == 0
24412 && (for_prologue
24413 || TARGET_BACKTRACE
24414 || (live_regs_mask & 1 << LR_REGNUM) == 0
24415 || TARGET_INTERWORK
24416 || crtl->args.pretend_args_size != 0))
24417 return 0;
24419 /* Don't do this if thumb_expand_prologue wants to emit instructions
24420 between the push and the stack frame allocation. */
24421 if (for_prologue
24422 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24423 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24424 return 0;
24426 reg_base = 0;
24427 n_free = 0;
24428 if (!for_prologue)
24430 size = arm_size_return_regs ();
24431 reg_base = ARM_NUM_INTS (size);
24432 live_regs_mask >>= reg_base;
24435 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24436 && (for_prologue || call_used_regs[reg_base + n_free]))
24438 live_regs_mask >>= 1;
24439 n_free++;
24442 if (n_free == 0)
24443 return 0;
24444 gcc_assert (amount / 4 * 4 == amount);
24446 if (amount >= 512 && (amount - n_free * 4) < 512)
24447 return (amount - 508) / 4;
24448 if (amount <= n_free * 4)
24449 return amount / 4;
24450 return 0;
24453 /* The bits which aren't usefully expanded as rtl. */
24454 const char *
24455 thumb1_unexpanded_epilogue (void)
24457 arm_stack_offsets *offsets;
24458 int regno;
24459 unsigned long live_regs_mask = 0;
24460 int high_regs_pushed = 0;
24461 int extra_pop;
24462 int had_to_push_lr;
24463 int size;
24465 if (cfun->machine->return_used_this_function != 0)
24466 return "";
24468 if (IS_NAKED (arm_current_func_type ()))
24469 return "";
24471 offsets = arm_get_frame_offsets ();
24472 live_regs_mask = offsets->saved_regs_mask;
24473 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24475 /* If we can deduce the registers used from the function's return value.
24476 This is more reliable that examining df_regs_ever_live_p () because that
24477 will be set if the register is ever used in the function, not just if
24478 the register is used to hold a return value. */
24479 size = arm_size_return_regs ();
24481 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24482 if (extra_pop > 0)
24484 unsigned long extra_mask = (1 << extra_pop) - 1;
24485 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24488 /* The prolog may have pushed some high registers to use as
24489 work registers. e.g. the testsuite file:
24490 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24491 compiles to produce:
24492 push {r4, r5, r6, r7, lr}
24493 mov r7, r9
24494 mov r6, r8
24495 push {r6, r7}
24496 as part of the prolog. We have to undo that pushing here. */
24498 if (high_regs_pushed)
24500 unsigned long mask = live_regs_mask & 0xff;
24501 int next_hi_reg;
24503 /* The available low registers depend on the size of the value we are
24504 returning. */
24505 if (size <= 12)
24506 mask |= 1 << 3;
24507 if (size <= 8)
24508 mask |= 1 << 2;
24510 if (mask == 0)
24511 /* Oh dear! We have no low registers into which we can pop
24512 high registers! */
24513 internal_error
24514 ("no low registers available for popping high registers");
24516 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24517 if (live_regs_mask & (1 << next_hi_reg))
24518 break;
24520 while (high_regs_pushed)
24522 /* Find lo register(s) into which the high register(s) can
24523 be popped. */
24524 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24526 if (mask & (1 << regno))
24527 high_regs_pushed--;
24528 if (high_regs_pushed == 0)
24529 break;
24532 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24534 /* Pop the values into the low register(s). */
24535 thumb_pop (asm_out_file, mask);
24537 /* Move the value(s) into the high registers. */
24538 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24540 if (mask & (1 << regno))
24542 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24543 regno);
24545 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24546 if (live_regs_mask & (1 << next_hi_reg))
24547 break;
24551 live_regs_mask &= ~0x0f00;
24554 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24555 live_regs_mask &= 0xff;
24557 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24559 /* Pop the return address into the PC. */
24560 if (had_to_push_lr)
24561 live_regs_mask |= 1 << PC_REGNUM;
24563 /* Either no argument registers were pushed or a backtrace
24564 structure was created which includes an adjusted stack
24565 pointer, so just pop everything. */
24566 if (live_regs_mask)
24567 thumb_pop (asm_out_file, live_regs_mask);
24569 /* We have either just popped the return address into the
24570 PC or it is was kept in LR for the entire function.
24571 Note that thumb_pop has already called thumb_exit if the
24572 PC was in the list. */
24573 if (!had_to_push_lr)
24574 thumb_exit (asm_out_file, LR_REGNUM);
24576 else
24578 /* Pop everything but the return address. */
24579 if (live_regs_mask)
24580 thumb_pop (asm_out_file, live_regs_mask);
24582 if (had_to_push_lr)
24584 if (size > 12)
24586 /* We have no free low regs, so save one. */
24587 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24588 LAST_ARG_REGNUM);
24591 /* Get the return address into a temporary register. */
24592 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24594 if (size > 12)
24596 /* Move the return address to lr. */
24597 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24598 LAST_ARG_REGNUM);
24599 /* Restore the low register. */
24600 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24601 IP_REGNUM);
24602 regno = LR_REGNUM;
24604 else
24605 regno = LAST_ARG_REGNUM;
24607 else
24608 regno = LR_REGNUM;
24610 /* Remove the argument registers that were pushed onto the stack. */
24611 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24612 SP_REGNUM, SP_REGNUM,
24613 crtl->args.pretend_args_size);
24615 thumb_exit (asm_out_file, regno);
24618 return "";
24621 /* Functions to save and restore machine-specific function data. */
24622 static struct machine_function *
24623 arm_init_machine_status (void)
24625 struct machine_function *machine;
24626 machine = ggc_cleared_alloc<machine_function> ();
24628 #if ARM_FT_UNKNOWN != 0
24629 machine->func_type = ARM_FT_UNKNOWN;
24630 #endif
24631 return machine;
24634 /* Return an RTX indicating where the return address to the
24635 calling function can be found. */
24637 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24639 if (count != 0)
24640 return NULL_RTX;
24642 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24645 /* Do anything needed before RTL is emitted for each function. */
24646 void
24647 arm_init_expanders (void)
24649 /* Arrange to initialize and mark the machine per-function status. */
24650 init_machine_status = arm_init_machine_status;
24652 /* This is to stop the combine pass optimizing away the alignment
24653 adjustment of va_arg. */
24654 /* ??? It is claimed that this should not be necessary. */
24655 if (cfun)
24656 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24659 /* Check that FUNC is called with a different mode. */
24661 bool
24662 arm_change_mode_p (tree func)
24664 if (TREE_CODE (func) != FUNCTION_DECL)
24665 return false;
24667 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24669 if (!callee_tree)
24670 callee_tree = target_option_default_node;
24672 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24673 int flags = callee_opts->x_target_flags;
24675 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24678 /* Like arm_compute_initial_elimination offset. Simpler because there
24679 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24680 to point at the base of the local variables after static stack
24681 space for a function has been allocated. */
24683 HOST_WIDE_INT
24684 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24686 arm_stack_offsets *offsets;
24688 offsets = arm_get_frame_offsets ();
24690 switch (from)
24692 case ARG_POINTER_REGNUM:
24693 switch (to)
24695 case STACK_POINTER_REGNUM:
24696 return offsets->outgoing_args - offsets->saved_args;
24698 case FRAME_POINTER_REGNUM:
24699 return offsets->soft_frame - offsets->saved_args;
24701 case ARM_HARD_FRAME_POINTER_REGNUM:
24702 return offsets->saved_regs - offsets->saved_args;
24704 case THUMB_HARD_FRAME_POINTER_REGNUM:
24705 return offsets->locals_base - offsets->saved_args;
24707 default:
24708 gcc_unreachable ();
24710 break;
24712 case FRAME_POINTER_REGNUM:
24713 switch (to)
24715 case STACK_POINTER_REGNUM:
24716 return offsets->outgoing_args - offsets->soft_frame;
24718 case ARM_HARD_FRAME_POINTER_REGNUM:
24719 return offsets->saved_regs - offsets->soft_frame;
24721 case THUMB_HARD_FRAME_POINTER_REGNUM:
24722 return offsets->locals_base - offsets->soft_frame;
24724 default:
24725 gcc_unreachable ();
24727 break;
24729 default:
24730 gcc_unreachable ();
24734 /* Generate the function's prologue. */
24736 void
24737 thumb1_expand_prologue (void)
24739 rtx_insn *insn;
24741 HOST_WIDE_INT amount;
24742 HOST_WIDE_INT size;
24743 arm_stack_offsets *offsets;
24744 unsigned long func_type;
24745 int regno;
24746 unsigned long live_regs_mask;
24747 unsigned long l_mask;
24748 unsigned high_regs_pushed = 0;
24749 bool lr_needs_saving;
24751 func_type = arm_current_func_type ();
24753 /* Naked functions don't have prologues. */
24754 if (IS_NAKED (func_type))
24756 if (flag_stack_usage_info)
24757 current_function_static_stack_size = 0;
24758 return;
24761 if (IS_INTERRUPT (func_type))
24763 error ("interrupt Service Routines cannot be coded in Thumb mode");
24764 return;
24767 if (is_called_in_ARM_mode (current_function_decl))
24768 emit_insn (gen_prologue_thumb1_interwork ());
24770 offsets = arm_get_frame_offsets ();
24771 live_regs_mask = offsets->saved_regs_mask;
24772 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24774 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24775 l_mask = live_regs_mask & 0x40ff;
24776 /* Then count how many other high registers will need to be pushed. */
24777 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24779 if (crtl->args.pretend_args_size)
24781 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24783 if (cfun->machine->uses_anonymous_args)
24785 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24786 unsigned long mask;
24788 mask = 1ul << (LAST_ARG_REGNUM + 1);
24789 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24791 insn = thumb1_emit_multi_reg_push (mask, 0);
24793 else
24795 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24796 stack_pointer_rtx, x));
24798 RTX_FRAME_RELATED_P (insn) = 1;
24801 if (TARGET_BACKTRACE)
24803 HOST_WIDE_INT offset = 0;
24804 unsigned work_register;
24805 rtx work_reg, x, arm_hfp_rtx;
24807 /* We have been asked to create a stack backtrace structure.
24808 The code looks like this:
24810 0 .align 2
24811 0 func:
24812 0 sub SP, #16 Reserve space for 4 registers.
24813 2 push {R7} Push low registers.
24814 4 add R7, SP, #20 Get the stack pointer before the push.
24815 6 str R7, [SP, #8] Store the stack pointer
24816 (before reserving the space).
24817 8 mov R7, PC Get hold of the start of this code + 12.
24818 10 str R7, [SP, #16] Store it.
24819 12 mov R7, FP Get hold of the current frame pointer.
24820 14 str R7, [SP, #4] Store it.
24821 16 mov R7, LR Get hold of the current return address.
24822 18 str R7, [SP, #12] Store it.
24823 20 add R7, SP, #16 Point at the start of the
24824 backtrace structure.
24825 22 mov FP, R7 Put this value into the frame pointer. */
24827 work_register = thumb_find_work_register (live_regs_mask);
24828 work_reg = gen_rtx_REG (SImode, work_register);
24829 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24831 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24832 stack_pointer_rtx, GEN_INT (-16)));
24833 RTX_FRAME_RELATED_P (insn) = 1;
24835 if (l_mask)
24837 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24838 RTX_FRAME_RELATED_P (insn) = 1;
24839 lr_needs_saving = false;
24841 offset = bit_count (l_mask) * UNITS_PER_WORD;
24844 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24845 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24847 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24848 x = gen_frame_mem (SImode, x);
24849 emit_move_insn (x, work_reg);
24851 /* Make sure that the instruction fetching the PC is in the right place
24852 to calculate "start of backtrace creation code + 12". */
24853 /* ??? The stores using the common WORK_REG ought to be enough to
24854 prevent the scheduler from doing anything weird. Failing that
24855 we could always move all of the following into an UNSPEC_VOLATILE. */
24856 if (l_mask)
24858 x = gen_rtx_REG (SImode, PC_REGNUM);
24859 emit_move_insn (work_reg, x);
24861 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24862 x = gen_frame_mem (SImode, x);
24863 emit_move_insn (x, work_reg);
24865 emit_move_insn (work_reg, arm_hfp_rtx);
24867 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24868 x = gen_frame_mem (SImode, x);
24869 emit_move_insn (x, work_reg);
24871 else
24873 emit_move_insn (work_reg, arm_hfp_rtx);
24875 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24876 x = gen_frame_mem (SImode, x);
24877 emit_move_insn (x, work_reg);
24879 x = gen_rtx_REG (SImode, PC_REGNUM);
24880 emit_move_insn (work_reg, x);
24882 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24883 x = gen_frame_mem (SImode, x);
24884 emit_move_insn (x, work_reg);
24887 x = gen_rtx_REG (SImode, LR_REGNUM);
24888 emit_move_insn (work_reg, x);
24890 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24891 x = gen_frame_mem (SImode, x);
24892 emit_move_insn (x, work_reg);
24894 x = GEN_INT (offset + 12);
24895 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24897 emit_move_insn (arm_hfp_rtx, work_reg);
24899 /* Optimization: If we are not pushing any low registers but we are going
24900 to push some high registers then delay our first push. This will just
24901 be a push of LR and we can combine it with the push of the first high
24902 register. */
24903 else if ((l_mask & 0xff) != 0
24904 || (high_regs_pushed == 0 && lr_needs_saving))
24906 unsigned long mask = l_mask;
24907 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24908 insn = thumb1_emit_multi_reg_push (mask, mask);
24909 RTX_FRAME_RELATED_P (insn) = 1;
24910 lr_needs_saving = false;
24913 if (high_regs_pushed)
24915 unsigned pushable_regs;
24916 unsigned next_hi_reg;
24917 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24918 : crtl->args.info.nregs;
24919 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24921 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24922 if (live_regs_mask & (1 << next_hi_reg))
24923 break;
24925 /* Here we need to mask out registers used for passing arguments
24926 even if they can be pushed. This is to avoid using them to stash the high
24927 registers. Such kind of stash may clobber the use of arguments. */
24928 pushable_regs = l_mask & (~arg_regs_mask);
24929 if (lr_needs_saving)
24930 pushable_regs &= ~(1 << LR_REGNUM);
24932 if (pushable_regs == 0)
24933 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24935 while (high_regs_pushed > 0)
24937 unsigned long real_regs_mask = 0;
24938 unsigned long push_mask = 0;
24940 for (regno = LR_REGNUM; regno >= 0; regno --)
24942 if (pushable_regs & (1 << regno))
24944 emit_move_insn (gen_rtx_REG (SImode, regno),
24945 gen_rtx_REG (SImode, next_hi_reg));
24947 high_regs_pushed --;
24948 real_regs_mask |= (1 << next_hi_reg);
24949 push_mask |= (1 << regno);
24951 if (high_regs_pushed)
24953 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24954 next_hi_reg --)
24955 if (live_regs_mask & (1 << next_hi_reg))
24956 break;
24958 else
24959 break;
24963 /* If we had to find a work register and we have not yet
24964 saved the LR then add it to the list of regs to push. */
24965 if (lr_needs_saving)
24967 push_mask |= 1 << LR_REGNUM;
24968 real_regs_mask |= 1 << LR_REGNUM;
24969 lr_needs_saving = false;
24972 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24973 RTX_FRAME_RELATED_P (insn) = 1;
24977 /* Load the pic register before setting the frame pointer,
24978 so we can use r7 as a temporary work register. */
24979 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24980 arm_load_pic_register (live_regs_mask);
24982 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24983 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24984 stack_pointer_rtx);
24986 size = offsets->outgoing_args - offsets->saved_args;
24987 if (flag_stack_usage_info)
24988 current_function_static_stack_size = size;
24990 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24991 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24992 sorry ("-fstack-check=specific for Thumb-1");
24994 amount = offsets->outgoing_args - offsets->saved_regs;
24995 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24996 if (amount)
24998 if (amount < 512)
25000 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25001 GEN_INT (- amount)));
25002 RTX_FRAME_RELATED_P (insn) = 1;
25004 else
25006 rtx reg, dwarf;
25008 /* The stack decrement is too big for an immediate value in a single
25009 insn. In theory we could issue multiple subtracts, but after
25010 three of them it becomes more space efficient to place the full
25011 value in the constant pool and load into a register. (Also the
25012 ARM debugger really likes to see only one stack decrement per
25013 function). So instead we look for a scratch register into which
25014 we can load the decrement, and then we subtract this from the
25015 stack pointer. Unfortunately on the thumb the only available
25016 scratch registers are the argument registers, and we cannot use
25017 these as they may hold arguments to the function. Instead we
25018 attempt to locate a call preserved register which is used by this
25019 function. If we can find one, then we know that it will have
25020 been pushed at the start of the prologue and so we can corrupt
25021 it now. */
25022 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25023 if (live_regs_mask & (1 << regno))
25024 break;
25026 gcc_assert(regno <= LAST_LO_REGNUM);
25028 reg = gen_rtx_REG (SImode, regno);
25030 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25032 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25033 stack_pointer_rtx, reg));
25035 dwarf = gen_rtx_SET (stack_pointer_rtx,
25036 plus_constant (Pmode, stack_pointer_rtx,
25037 -amount));
25038 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25039 RTX_FRAME_RELATED_P (insn) = 1;
25043 if (frame_pointer_needed)
25044 thumb_set_frame_pointer (offsets);
25046 /* If we are profiling, make sure no instructions are scheduled before
25047 the call to mcount. Similarly if the user has requested no
25048 scheduling in the prolog. Similarly if we want non-call exceptions
25049 using the EABI unwinder, to prevent faulting instructions from being
25050 swapped with a stack adjustment. */
25051 if (crtl->profile || !TARGET_SCHED_PROLOG
25052 || (arm_except_unwind_info (&global_options) == UI_TARGET
25053 && cfun->can_throw_non_call_exceptions))
25054 emit_insn (gen_blockage ());
25056 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25057 if (live_regs_mask & 0xff)
25058 cfun->machine->lr_save_eliminated = 0;
25061 /* Clear caller saved registers not used to pass return values and leaked
25062 condition flags before exiting a cmse_nonsecure_entry function. */
25064 void
25065 cmse_nonsecure_entry_clear_before_return (void)
25067 uint64_t to_clear_mask[2];
25068 uint32_t padding_bits_to_clear = 0;
25069 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25070 int regno, maxregno = IP_REGNUM;
25071 tree result_type;
25072 rtx result_rtl;
25074 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25075 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25077 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25078 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25079 to make sure the instructions used to clear them are present. */
25080 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25082 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25083 maxregno = LAST_VFP_REGNUM;
25085 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25086 to_clear_mask[0] |= float_mask;
25088 float_mask = (1ULL << (maxregno - 63)) - 1;
25089 to_clear_mask[1] = float_mask;
25091 /* Make sure we don't clear the two scratch registers used to clear the
25092 relevant FPSCR bits in output_return_instruction. */
25093 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25094 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25095 emit_use (gen_rtx_REG (SImode, 4));
25096 to_clear_mask[0] &= ~(1ULL << 4);
25099 /* If the user has defined registers to be caller saved, these are no longer
25100 restored by the function before returning and must thus be cleared for
25101 security purposes. */
25102 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25104 /* We do not touch registers that can be used to pass arguments as per
25105 the AAPCS, since these should never be made callee-saved by user
25106 options. */
25107 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25108 continue;
25109 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25110 continue;
25111 if (call_used_regs[regno])
25112 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25115 /* Make sure we do not clear the registers used to return the result in. */
25116 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25117 if (!VOID_TYPE_P (result_type))
25119 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25121 /* No need to check that we return in registers, because we don't
25122 support returning on stack yet. */
25123 to_clear_mask[0]
25124 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25125 padding_bits_to_clear_ptr);
25128 if (padding_bits_to_clear != 0)
25130 rtx reg_rtx;
25131 /* Padding bits to clear is not 0 so we know we are dealing with
25132 returning a composite type, which only uses r0. Let's make sure that
25133 r1-r3 is cleared too, we will use r1 as a scratch register. */
25134 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25136 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25138 /* Fill the lower half of the negated padding_bits_to_clear. */
25139 emit_move_insn (reg_rtx,
25140 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25142 /* Also fill the top half of the negated padding_bits_to_clear. */
25143 if (((~padding_bits_to_clear) >> 16) > 0)
25144 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25145 GEN_INT (16),
25146 GEN_INT (16)),
25147 GEN_INT ((~padding_bits_to_clear) >> 16)));
25149 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25150 gen_rtx_REG (SImode, R0_REGNUM),
25151 reg_rtx));
25154 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25156 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25157 continue;
25159 if (IS_VFP_REGNUM (regno))
25161 /* If regno is an even vfp register and its successor is also to
25162 be cleared, use vmov. */
25163 if (TARGET_VFP_DOUBLE
25164 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25165 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25167 emit_move_insn (gen_rtx_REG (DFmode, regno),
25168 CONST1_RTX (DFmode));
25169 emit_use (gen_rtx_REG (DFmode, regno));
25170 regno++;
25172 else
25174 emit_move_insn (gen_rtx_REG (SFmode, regno),
25175 CONST1_RTX (SFmode));
25176 emit_use (gen_rtx_REG (SFmode, regno));
25179 else
25181 if (TARGET_THUMB1)
25183 if (regno == R0_REGNUM)
25184 emit_move_insn (gen_rtx_REG (SImode, regno),
25185 const0_rtx);
25186 else
25187 /* R0 has either been cleared before, see code above, or it
25188 holds a return value, either way it is not secret
25189 information. */
25190 emit_move_insn (gen_rtx_REG (SImode, regno),
25191 gen_rtx_REG (SImode, R0_REGNUM));
25192 emit_use (gen_rtx_REG (SImode, regno));
25194 else
25196 emit_move_insn (gen_rtx_REG (SImode, regno),
25197 gen_rtx_REG (SImode, LR_REGNUM));
25198 emit_use (gen_rtx_REG (SImode, regno));
25204 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25205 POP instruction can be generated. LR should be replaced by PC. All
25206 the checks required are already done by USE_RETURN_INSN (). Hence,
25207 all we really need to check here is if single register is to be
25208 returned, or multiple register return. */
25209 void
25210 thumb2_expand_return (bool simple_return)
25212 int i, num_regs;
25213 unsigned long saved_regs_mask;
25214 arm_stack_offsets *offsets;
25216 offsets = arm_get_frame_offsets ();
25217 saved_regs_mask = offsets->saved_regs_mask;
25219 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25220 if (saved_regs_mask & (1 << i))
25221 num_regs++;
25223 if (!simple_return && saved_regs_mask)
25225 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25226 functions or adapt code to handle according to ACLE. This path should
25227 not be reachable for cmse_nonsecure_entry functions though we prefer
25228 to assert it for now to ensure that future code changes do not silently
25229 change this behavior. */
25230 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25231 if (num_regs == 1)
25233 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25234 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25235 rtx addr = gen_rtx_MEM (SImode,
25236 gen_rtx_POST_INC (SImode,
25237 stack_pointer_rtx));
25238 set_mem_alias_set (addr, get_frame_alias_set ());
25239 XVECEXP (par, 0, 0) = ret_rtx;
25240 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25241 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25242 emit_jump_insn (par);
25244 else
25246 saved_regs_mask &= ~ (1 << LR_REGNUM);
25247 saved_regs_mask |= (1 << PC_REGNUM);
25248 arm_emit_multi_reg_pop (saved_regs_mask);
25251 else
25253 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25254 cmse_nonsecure_entry_clear_before_return ();
25255 emit_jump_insn (simple_return_rtx);
25259 void
25260 thumb1_expand_epilogue (void)
25262 HOST_WIDE_INT amount;
25263 arm_stack_offsets *offsets;
25264 int regno;
25266 /* Naked functions don't have prologues. */
25267 if (IS_NAKED (arm_current_func_type ()))
25268 return;
25270 offsets = arm_get_frame_offsets ();
25271 amount = offsets->outgoing_args - offsets->saved_regs;
25273 if (frame_pointer_needed)
25275 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25276 amount = offsets->locals_base - offsets->saved_regs;
25278 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25280 gcc_assert (amount >= 0);
25281 if (amount)
25283 emit_insn (gen_blockage ());
25285 if (amount < 512)
25286 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25287 GEN_INT (amount)));
25288 else
25290 /* r3 is always free in the epilogue. */
25291 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25293 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25294 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25298 /* Emit a USE (stack_pointer_rtx), so that
25299 the stack adjustment will not be deleted. */
25300 emit_insn (gen_force_register_use (stack_pointer_rtx));
25302 if (crtl->profile || !TARGET_SCHED_PROLOG)
25303 emit_insn (gen_blockage ());
25305 /* Emit a clobber for each insn that will be restored in the epilogue,
25306 so that flow2 will get register lifetimes correct. */
25307 for (regno = 0; regno < 13; regno++)
25308 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25309 emit_clobber (gen_rtx_REG (SImode, regno));
25311 if (! df_regs_ever_live_p (LR_REGNUM))
25312 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25314 /* Clear all caller-saved regs that are not used to return. */
25315 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25316 cmse_nonsecure_entry_clear_before_return ();
25319 /* Epilogue code for APCS frame. */
25320 static void
25321 arm_expand_epilogue_apcs_frame (bool really_return)
25323 unsigned long func_type;
25324 unsigned long saved_regs_mask;
25325 int num_regs = 0;
25326 int i;
25327 int floats_from_frame = 0;
25328 arm_stack_offsets *offsets;
25330 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25331 func_type = arm_current_func_type ();
25333 /* Get frame offsets for ARM. */
25334 offsets = arm_get_frame_offsets ();
25335 saved_regs_mask = offsets->saved_regs_mask;
25337 /* Find the offset of the floating-point save area in the frame. */
25338 floats_from_frame
25339 = (offsets->saved_args
25340 + arm_compute_static_chain_stack_bytes ()
25341 - offsets->frame);
25343 /* Compute how many core registers saved and how far away the floats are. */
25344 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25345 if (saved_regs_mask & (1 << i))
25347 num_regs++;
25348 floats_from_frame += 4;
25351 if (TARGET_HARD_FLOAT)
25353 int start_reg;
25354 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25356 /* The offset is from IP_REGNUM. */
25357 int saved_size = arm_get_vfp_saved_size ();
25358 if (saved_size > 0)
25360 rtx_insn *insn;
25361 floats_from_frame += saved_size;
25362 insn = emit_insn (gen_addsi3 (ip_rtx,
25363 hard_frame_pointer_rtx,
25364 GEN_INT (-floats_from_frame)));
25365 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25366 ip_rtx, hard_frame_pointer_rtx);
25369 /* Generate VFP register multi-pop. */
25370 start_reg = FIRST_VFP_REGNUM;
25372 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25373 /* Look for a case where a reg does not need restoring. */
25374 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25375 && (!df_regs_ever_live_p (i + 1)
25376 || call_used_regs[i + 1]))
25378 if (start_reg != i)
25379 arm_emit_vfp_multi_reg_pop (start_reg,
25380 (i - start_reg) / 2,
25381 gen_rtx_REG (SImode,
25382 IP_REGNUM));
25383 start_reg = i + 2;
25386 /* Restore the remaining regs that we have discovered (or possibly
25387 even all of them, if the conditional in the for loop never
25388 fired). */
25389 if (start_reg != i)
25390 arm_emit_vfp_multi_reg_pop (start_reg,
25391 (i - start_reg) / 2,
25392 gen_rtx_REG (SImode, IP_REGNUM));
25395 if (TARGET_IWMMXT)
25397 /* The frame pointer is guaranteed to be non-double-word aligned, as
25398 it is set to double-word-aligned old_stack_pointer - 4. */
25399 rtx_insn *insn;
25400 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25402 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25403 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25405 rtx addr = gen_frame_mem (V2SImode,
25406 plus_constant (Pmode, hard_frame_pointer_rtx,
25407 - lrm_count * 4));
25408 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25409 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25410 gen_rtx_REG (V2SImode, i),
25411 NULL_RTX);
25412 lrm_count += 2;
25416 /* saved_regs_mask should contain IP which contains old stack pointer
25417 at the time of activation creation. Since SP and IP are adjacent registers,
25418 we can restore the value directly into SP. */
25419 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25420 saved_regs_mask &= ~(1 << IP_REGNUM);
25421 saved_regs_mask |= (1 << SP_REGNUM);
25423 /* There are two registers left in saved_regs_mask - LR and PC. We
25424 only need to restore LR (the return address), but to
25425 save time we can load it directly into PC, unless we need a
25426 special function exit sequence, or we are not really returning. */
25427 if (really_return
25428 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25429 && !crtl->calls_eh_return)
25430 /* Delete LR from the register mask, so that LR on
25431 the stack is loaded into the PC in the register mask. */
25432 saved_regs_mask &= ~(1 << LR_REGNUM);
25433 else
25434 saved_regs_mask &= ~(1 << PC_REGNUM);
25436 num_regs = bit_count (saved_regs_mask);
25437 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25439 rtx_insn *insn;
25440 emit_insn (gen_blockage ());
25441 /* Unwind the stack to just below the saved registers. */
25442 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25443 hard_frame_pointer_rtx,
25444 GEN_INT (- 4 * num_regs)));
25446 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25447 stack_pointer_rtx, hard_frame_pointer_rtx);
25450 arm_emit_multi_reg_pop (saved_regs_mask);
25452 if (IS_INTERRUPT (func_type))
25454 /* Interrupt handlers will have pushed the
25455 IP onto the stack, so restore it now. */
25456 rtx_insn *insn;
25457 rtx addr = gen_rtx_MEM (SImode,
25458 gen_rtx_POST_INC (SImode,
25459 stack_pointer_rtx));
25460 set_mem_alias_set (addr, get_frame_alias_set ());
25461 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25462 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25463 gen_rtx_REG (SImode, IP_REGNUM),
25464 NULL_RTX);
25467 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25468 return;
25470 if (crtl->calls_eh_return)
25471 emit_insn (gen_addsi3 (stack_pointer_rtx,
25472 stack_pointer_rtx,
25473 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25475 if (IS_STACKALIGN (func_type))
25476 /* Restore the original stack pointer. Before prologue, the stack was
25477 realigned and the original stack pointer saved in r0. For details,
25478 see comment in arm_expand_prologue. */
25479 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25481 emit_jump_insn (simple_return_rtx);
25484 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25485 function is not a sibcall. */
25486 void
25487 arm_expand_epilogue (bool really_return)
25489 unsigned long func_type;
25490 unsigned long saved_regs_mask;
25491 int num_regs = 0;
25492 int i;
25493 int amount;
25494 arm_stack_offsets *offsets;
25496 func_type = arm_current_func_type ();
25498 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25499 let output_return_instruction take care of instruction emission if any. */
25500 if (IS_NAKED (func_type)
25501 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25503 if (really_return)
25504 emit_jump_insn (simple_return_rtx);
25505 return;
25508 /* If we are throwing an exception, then we really must be doing a
25509 return, so we can't tail-call. */
25510 gcc_assert (!crtl->calls_eh_return || really_return);
25512 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25514 arm_expand_epilogue_apcs_frame (really_return);
25515 return;
25518 /* Get frame offsets for ARM. */
25519 offsets = arm_get_frame_offsets ();
25520 saved_regs_mask = offsets->saved_regs_mask;
25521 num_regs = bit_count (saved_regs_mask);
25523 if (frame_pointer_needed)
25525 rtx_insn *insn;
25526 /* Restore stack pointer if necessary. */
25527 if (TARGET_ARM)
25529 /* In ARM mode, frame pointer points to first saved register.
25530 Restore stack pointer to last saved register. */
25531 amount = offsets->frame - offsets->saved_regs;
25533 /* Force out any pending memory operations that reference stacked data
25534 before stack de-allocation occurs. */
25535 emit_insn (gen_blockage ());
25536 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25537 hard_frame_pointer_rtx,
25538 GEN_INT (amount)));
25539 arm_add_cfa_adjust_cfa_note (insn, amount,
25540 stack_pointer_rtx,
25541 hard_frame_pointer_rtx);
25543 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25544 deleted. */
25545 emit_insn (gen_force_register_use (stack_pointer_rtx));
25547 else
25549 /* In Thumb-2 mode, the frame pointer points to the last saved
25550 register. */
25551 amount = offsets->locals_base - offsets->saved_regs;
25552 if (amount)
25554 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25555 hard_frame_pointer_rtx,
25556 GEN_INT (amount)));
25557 arm_add_cfa_adjust_cfa_note (insn, amount,
25558 hard_frame_pointer_rtx,
25559 hard_frame_pointer_rtx);
25562 /* Force out any pending memory operations that reference stacked data
25563 before stack de-allocation occurs. */
25564 emit_insn (gen_blockage ());
25565 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25566 hard_frame_pointer_rtx));
25567 arm_add_cfa_adjust_cfa_note (insn, 0,
25568 stack_pointer_rtx,
25569 hard_frame_pointer_rtx);
25570 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25571 deleted. */
25572 emit_insn (gen_force_register_use (stack_pointer_rtx));
25575 else
25577 /* Pop off outgoing args and local frame to adjust stack pointer to
25578 last saved register. */
25579 amount = offsets->outgoing_args - offsets->saved_regs;
25580 if (amount)
25582 rtx_insn *tmp;
25583 /* Force out any pending memory operations that reference stacked data
25584 before stack de-allocation occurs. */
25585 emit_insn (gen_blockage ());
25586 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25587 stack_pointer_rtx,
25588 GEN_INT (amount)));
25589 arm_add_cfa_adjust_cfa_note (tmp, amount,
25590 stack_pointer_rtx, stack_pointer_rtx);
25591 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25592 not deleted. */
25593 emit_insn (gen_force_register_use (stack_pointer_rtx));
25597 if (TARGET_HARD_FLOAT)
25599 /* Generate VFP register multi-pop. */
25600 int end_reg = LAST_VFP_REGNUM + 1;
25602 /* Scan the registers in reverse order. We need to match
25603 any groupings made in the prologue and generate matching
25604 vldm operations. The need to match groups is because,
25605 unlike pop, vldm can only do consecutive regs. */
25606 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25607 /* Look for a case where a reg does not need restoring. */
25608 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25609 && (!df_regs_ever_live_p (i + 1)
25610 || call_used_regs[i + 1]))
25612 /* Restore the regs discovered so far (from reg+2 to
25613 end_reg). */
25614 if (end_reg > i + 2)
25615 arm_emit_vfp_multi_reg_pop (i + 2,
25616 (end_reg - (i + 2)) / 2,
25617 stack_pointer_rtx);
25618 end_reg = i;
25621 /* Restore the remaining regs that we have discovered (or possibly
25622 even all of them, if the conditional in the for loop never
25623 fired). */
25624 if (end_reg > i + 2)
25625 arm_emit_vfp_multi_reg_pop (i + 2,
25626 (end_reg - (i + 2)) / 2,
25627 stack_pointer_rtx);
25630 if (TARGET_IWMMXT)
25631 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25632 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25634 rtx_insn *insn;
25635 rtx addr = gen_rtx_MEM (V2SImode,
25636 gen_rtx_POST_INC (SImode,
25637 stack_pointer_rtx));
25638 set_mem_alias_set (addr, get_frame_alias_set ());
25639 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25640 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25641 gen_rtx_REG (V2SImode, i),
25642 NULL_RTX);
25643 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25644 stack_pointer_rtx, stack_pointer_rtx);
25647 if (saved_regs_mask)
25649 rtx insn;
25650 bool return_in_pc = false;
25652 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25653 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25654 && !IS_CMSE_ENTRY (func_type)
25655 && !IS_STACKALIGN (func_type)
25656 && really_return
25657 && crtl->args.pretend_args_size == 0
25658 && saved_regs_mask & (1 << LR_REGNUM)
25659 && !crtl->calls_eh_return)
25661 saved_regs_mask &= ~(1 << LR_REGNUM);
25662 saved_regs_mask |= (1 << PC_REGNUM);
25663 return_in_pc = true;
25666 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25668 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25669 if (saved_regs_mask & (1 << i))
25671 rtx addr = gen_rtx_MEM (SImode,
25672 gen_rtx_POST_INC (SImode,
25673 stack_pointer_rtx));
25674 set_mem_alias_set (addr, get_frame_alias_set ());
25676 if (i == PC_REGNUM)
25678 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25679 XVECEXP (insn, 0, 0) = ret_rtx;
25680 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25681 addr);
25682 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25683 insn = emit_jump_insn (insn);
25685 else
25687 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25688 addr));
25689 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25690 gen_rtx_REG (SImode, i),
25691 NULL_RTX);
25692 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25693 stack_pointer_rtx,
25694 stack_pointer_rtx);
25698 else
25700 if (TARGET_LDRD
25701 && current_tune->prefer_ldrd_strd
25702 && !optimize_function_for_size_p (cfun))
25704 if (TARGET_THUMB2)
25705 thumb2_emit_ldrd_pop (saved_regs_mask);
25706 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25707 arm_emit_ldrd_pop (saved_regs_mask);
25708 else
25709 arm_emit_multi_reg_pop (saved_regs_mask);
25711 else
25712 arm_emit_multi_reg_pop (saved_regs_mask);
25715 if (return_in_pc)
25716 return;
25719 amount
25720 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25721 if (amount)
25723 int i, j;
25724 rtx dwarf = NULL_RTX;
25725 rtx_insn *tmp =
25726 emit_insn (gen_addsi3 (stack_pointer_rtx,
25727 stack_pointer_rtx,
25728 GEN_INT (amount)));
25730 RTX_FRAME_RELATED_P (tmp) = 1;
25732 if (cfun->machine->uses_anonymous_args)
25734 /* Restore pretend args. Refer arm_expand_prologue on how to save
25735 pretend_args in stack. */
25736 int num_regs = crtl->args.pretend_args_size / 4;
25737 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25738 for (j = 0, i = 0; j < num_regs; i++)
25739 if (saved_regs_mask & (1 << i))
25741 rtx reg = gen_rtx_REG (SImode, i);
25742 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25743 j++;
25745 REG_NOTES (tmp) = dwarf;
25747 arm_add_cfa_adjust_cfa_note (tmp, amount,
25748 stack_pointer_rtx, stack_pointer_rtx);
25751 /* Clear all caller-saved regs that are not used to return. */
25752 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25754 /* CMSE_ENTRY always returns. */
25755 gcc_assert (really_return);
25756 cmse_nonsecure_entry_clear_before_return ();
25759 if (!really_return)
25760 return;
25762 if (crtl->calls_eh_return)
25763 emit_insn (gen_addsi3 (stack_pointer_rtx,
25764 stack_pointer_rtx,
25765 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25767 if (IS_STACKALIGN (func_type))
25768 /* Restore the original stack pointer. Before prologue, the stack was
25769 realigned and the original stack pointer saved in r0. For details,
25770 see comment in arm_expand_prologue. */
25771 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25773 emit_jump_insn (simple_return_rtx);
25776 /* Implementation of insn prologue_thumb1_interwork. This is the first
25777 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25779 const char *
25780 thumb1_output_interwork (void)
25782 const char * name;
25783 FILE *f = asm_out_file;
25785 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25786 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25787 == SYMBOL_REF);
25788 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25790 /* Generate code sequence to switch us into Thumb mode. */
25791 /* The .code 32 directive has already been emitted by
25792 ASM_DECLARE_FUNCTION_NAME. */
25793 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25794 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25796 /* Generate a label, so that the debugger will notice the
25797 change in instruction sets. This label is also used by
25798 the assembler to bypass the ARM code when this function
25799 is called from a Thumb encoded function elsewhere in the
25800 same file. Hence the definition of STUB_NAME here must
25801 agree with the definition in gas/config/tc-arm.c. */
25803 #define STUB_NAME ".real_start_of"
25805 fprintf (f, "\t.code\t16\n");
25806 #ifdef ARM_PE
25807 if (arm_dllexport_name_p (name))
25808 name = arm_strip_name_encoding (name);
25809 #endif
25810 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25811 fprintf (f, "\t.thumb_func\n");
25812 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25814 return "";
25817 /* Handle the case of a double word load into a low register from
25818 a computed memory address. The computed address may involve a
25819 register which is overwritten by the load. */
25820 const char *
25821 thumb_load_double_from_address (rtx *operands)
25823 rtx addr;
25824 rtx base;
25825 rtx offset;
25826 rtx arg1;
25827 rtx arg2;
25829 gcc_assert (REG_P (operands[0]));
25830 gcc_assert (MEM_P (operands[1]));
25832 /* Get the memory address. */
25833 addr = XEXP (operands[1], 0);
25835 /* Work out how the memory address is computed. */
25836 switch (GET_CODE (addr))
25838 case REG:
25839 operands[2] = adjust_address (operands[1], SImode, 4);
25841 if (REGNO (operands[0]) == REGNO (addr))
25843 output_asm_insn ("ldr\t%H0, %2", operands);
25844 output_asm_insn ("ldr\t%0, %1", operands);
25846 else
25848 output_asm_insn ("ldr\t%0, %1", operands);
25849 output_asm_insn ("ldr\t%H0, %2", operands);
25851 break;
25853 case CONST:
25854 /* Compute <address> + 4 for the high order load. */
25855 operands[2] = adjust_address (operands[1], SImode, 4);
25857 output_asm_insn ("ldr\t%0, %1", operands);
25858 output_asm_insn ("ldr\t%H0, %2", operands);
25859 break;
25861 case PLUS:
25862 arg1 = XEXP (addr, 0);
25863 arg2 = XEXP (addr, 1);
25865 if (CONSTANT_P (arg1))
25866 base = arg2, offset = arg1;
25867 else
25868 base = arg1, offset = arg2;
25870 gcc_assert (REG_P (base));
25872 /* Catch the case of <address> = <reg> + <reg> */
25873 if (REG_P (offset))
25875 int reg_offset = REGNO (offset);
25876 int reg_base = REGNO (base);
25877 int reg_dest = REGNO (operands[0]);
25879 /* Add the base and offset registers together into the
25880 higher destination register. */
25881 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25882 reg_dest + 1, reg_base, reg_offset);
25884 /* Load the lower destination register from the address in
25885 the higher destination register. */
25886 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25887 reg_dest, reg_dest + 1);
25889 /* Load the higher destination register from its own address
25890 plus 4. */
25891 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25892 reg_dest + 1, reg_dest + 1);
25894 else
25896 /* Compute <address> + 4 for the high order load. */
25897 operands[2] = adjust_address (operands[1], SImode, 4);
25899 /* If the computed address is held in the low order register
25900 then load the high order register first, otherwise always
25901 load the low order register first. */
25902 if (REGNO (operands[0]) == REGNO (base))
25904 output_asm_insn ("ldr\t%H0, %2", operands);
25905 output_asm_insn ("ldr\t%0, %1", operands);
25907 else
25909 output_asm_insn ("ldr\t%0, %1", operands);
25910 output_asm_insn ("ldr\t%H0, %2", operands);
25913 break;
25915 case LABEL_REF:
25916 /* With no registers to worry about we can just load the value
25917 directly. */
25918 operands[2] = adjust_address (operands[1], SImode, 4);
25920 output_asm_insn ("ldr\t%H0, %2", operands);
25921 output_asm_insn ("ldr\t%0, %1", operands);
25922 break;
25924 default:
25925 gcc_unreachable ();
25928 return "";
25931 const char *
25932 thumb_output_move_mem_multiple (int n, rtx *operands)
25934 switch (n)
25936 case 2:
25937 if (REGNO (operands[4]) > REGNO (operands[5]))
25938 std::swap (operands[4], operands[5]);
25940 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25941 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25942 break;
25944 case 3:
25945 if (REGNO (operands[4]) > REGNO (operands[5]))
25946 std::swap (operands[4], operands[5]);
25947 if (REGNO (operands[5]) > REGNO (operands[6]))
25948 std::swap (operands[5], operands[6]);
25949 if (REGNO (operands[4]) > REGNO (operands[5]))
25950 std::swap (operands[4], operands[5]);
25952 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25953 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25954 break;
25956 default:
25957 gcc_unreachable ();
25960 return "";
25963 /* Output a call-via instruction for thumb state. */
25964 const char *
25965 thumb_call_via_reg (rtx reg)
25967 int regno = REGNO (reg);
25968 rtx *labelp;
25970 gcc_assert (regno < LR_REGNUM);
25972 /* If we are in the normal text section we can use a single instance
25973 per compilation unit. If we are doing function sections, then we need
25974 an entry per section, since we can't rely on reachability. */
25975 if (in_section == text_section)
25977 thumb_call_reg_needed = 1;
25979 if (thumb_call_via_label[regno] == NULL)
25980 thumb_call_via_label[regno] = gen_label_rtx ();
25981 labelp = thumb_call_via_label + regno;
25983 else
25985 if (cfun->machine->call_via[regno] == NULL)
25986 cfun->machine->call_via[regno] = gen_label_rtx ();
25987 labelp = cfun->machine->call_via + regno;
25990 output_asm_insn ("bl\t%a0", labelp);
25991 return "";
25994 /* Routines for generating rtl. */
25995 void
25996 thumb_expand_movmemqi (rtx *operands)
25998 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25999 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26000 HOST_WIDE_INT len = INTVAL (operands[2]);
26001 HOST_WIDE_INT offset = 0;
26003 while (len >= 12)
26005 emit_insn (gen_movmem12b (out, in, out, in));
26006 len -= 12;
26009 if (len >= 8)
26011 emit_insn (gen_movmem8b (out, in, out, in));
26012 len -= 8;
26015 if (len >= 4)
26017 rtx reg = gen_reg_rtx (SImode);
26018 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26019 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26020 len -= 4;
26021 offset += 4;
26024 if (len >= 2)
26026 rtx reg = gen_reg_rtx (HImode);
26027 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26028 plus_constant (Pmode, in,
26029 offset))));
26030 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26031 offset)),
26032 reg));
26033 len -= 2;
26034 offset += 2;
26037 if (len)
26039 rtx reg = gen_reg_rtx (QImode);
26040 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26041 plus_constant (Pmode, in,
26042 offset))));
26043 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26044 offset)),
26045 reg));
26049 void
26050 thumb_reload_out_hi (rtx *operands)
26052 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26055 /* Return the length of a function name prefix
26056 that starts with the character 'c'. */
26057 static int
26058 arm_get_strip_length (int c)
26060 switch (c)
26062 ARM_NAME_ENCODING_LENGTHS
26063 default: return 0;
26067 /* Return a pointer to a function's name with any
26068 and all prefix encodings stripped from it. */
26069 const char *
26070 arm_strip_name_encoding (const char *name)
26072 int skip;
26074 while ((skip = arm_get_strip_length (* name)))
26075 name += skip;
26077 return name;
26080 /* If there is a '*' anywhere in the name's prefix, then
26081 emit the stripped name verbatim, otherwise prepend an
26082 underscore if leading underscores are being used. */
26083 void
26084 arm_asm_output_labelref (FILE *stream, const char *name)
26086 int skip;
26087 int verbatim = 0;
26089 while ((skip = arm_get_strip_length (* name)))
26091 verbatim |= (*name == '*');
26092 name += skip;
26095 if (verbatim)
26096 fputs (name, stream);
26097 else
26098 asm_fprintf (stream, "%U%s", name);
26101 /* This function is used to emit an EABI tag and its associated value.
26102 We emit the numerical value of the tag in case the assembler does not
26103 support textual tags. (Eg gas prior to 2.20). If requested we include
26104 the tag name in a comment so that anyone reading the assembler output
26105 will know which tag is being set.
26107 This function is not static because arm-c.c needs it too. */
26109 void
26110 arm_emit_eabi_attribute (const char *name, int num, int val)
26112 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26113 if (flag_verbose_asm || flag_debug_asm)
26114 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26115 asm_fprintf (asm_out_file, "\n");
26118 /* This function is used to print CPU tuning information as comment
26119 in assembler file. Pointers are not printed for now. */
26121 void
26122 arm_print_tune_info (void)
26124 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26125 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26126 current_tune->constant_limit);
26127 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26128 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26129 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26130 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26131 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26132 "prefetch.l1_cache_size:\t%d\n",
26133 current_tune->prefetch.l1_cache_size);
26134 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26135 "prefetch.l1_cache_line_size:\t%d\n",
26136 current_tune->prefetch.l1_cache_line_size);
26137 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26138 "prefer_constant_pool:\t%d\n",
26139 (int) current_tune->prefer_constant_pool);
26140 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26141 "branch_cost:\t(s:speed, p:predictable)\n");
26142 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26143 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26144 current_tune->branch_cost (false, false));
26145 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26146 current_tune->branch_cost (false, true));
26147 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26148 current_tune->branch_cost (true, false));
26149 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26150 current_tune->branch_cost (true, true));
26151 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26152 "prefer_ldrd_strd:\t%d\n",
26153 (int) current_tune->prefer_ldrd_strd);
26154 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26155 "logical_op_non_short_circuit:\t[%d,%d]\n",
26156 (int) current_tune->logical_op_non_short_circuit_thumb,
26157 (int) current_tune->logical_op_non_short_circuit_arm);
26158 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26159 "prefer_neon_for_64bits:\t%d\n",
26160 (int) current_tune->prefer_neon_for_64bits);
26161 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162 "disparage_flag_setting_t16_encodings:\t%d\n",
26163 (int) current_tune->disparage_flag_setting_t16_encodings);
26164 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26165 "string_ops_prefer_neon:\t%d\n",
26166 (int) current_tune->string_ops_prefer_neon);
26167 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26168 "max_insns_inline_memset:\t%d\n",
26169 current_tune->max_insns_inline_memset);
26170 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26171 current_tune->fusible_ops);
26172 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26173 (int) current_tune->sched_autopref);
26176 /* Print .arch and .arch_extension directives corresponding to the
26177 current architecture configuration. */
26178 static void
26179 arm_print_asm_arch_directives ()
26181 const arch_option *arch
26182 = arm_parse_arch_option_name (all_architectures, "-march",
26183 arm_active_target.arch_name);
26184 auto_sbitmap opt_bits (isa_num_bits);
26186 gcc_assert (arch);
26188 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26189 if (!arch->common.extensions)
26190 return;
26192 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26193 opt->name != NULL;
26194 opt++)
26196 if (!opt->remove)
26198 arm_initialize_isa (opt_bits, opt->isa_bits);
26200 /* If every feature bit of this option is set in the target
26201 ISA specification, print out the option name. However,
26202 don't print anything if all the bits are part of the
26203 FPU specification. */
26204 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26205 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26206 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26211 static void
26212 arm_file_start (void)
26214 int val;
26216 if (TARGET_BPABI)
26218 /* We don't have a specified CPU. Use the architecture to
26219 generate the tags.
26221 Note: it might be better to do this unconditionally, then the
26222 assembler would not need to know about all new CPU names as
26223 they are added. */
26224 if (!arm_active_target.core_name)
26226 /* armv7ve doesn't support any extensions. */
26227 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26229 /* Keep backward compatability for assemblers
26230 which don't support armv7ve. */
26231 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26232 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26233 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26234 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26235 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26237 else
26238 arm_print_asm_arch_directives ();
26240 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26241 asm_fprintf (asm_out_file, "\t.arch %s\n",
26242 arm_active_target.core_name + 8);
26243 else
26245 const char* truncated_name
26246 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26247 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26250 if (print_tune_info)
26251 arm_print_tune_info ();
26253 if (! TARGET_SOFT_FLOAT)
26255 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26256 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26258 if (TARGET_HARD_FLOAT_ABI)
26259 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26262 /* Some of these attributes only apply when the corresponding features
26263 are used. However we don't have any easy way of figuring this out.
26264 Conservatively record the setting that would have been used. */
26266 if (flag_rounding_math)
26267 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26269 if (!flag_unsafe_math_optimizations)
26271 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26272 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26274 if (flag_signaling_nans)
26275 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26277 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26278 flag_finite_math_only ? 1 : 3);
26280 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26281 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26282 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26283 flag_short_enums ? 1 : 2);
26285 /* Tag_ABI_optimization_goals. */
26286 if (optimize_size)
26287 val = 4;
26288 else if (optimize >= 2)
26289 val = 2;
26290 else if (optimize)
26291 val = 1;
26292 else
26293 val = 6;
26294 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26296 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26297 unaligned_access);
26299 if (arm_fp16_format)
26300 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26301 (int) arm_fp16_format);
26303 if (arm_lang_output_object_attributes_hook)
26304 arm_lang_output_object_attributes_hook();
26307 default_file_start ();
26310 static void
26311 arm_file_end (void)
26313 int regno;
26315 if (NEED_INDICATE_EXEC_STACK)
26316 /* Add .note.GNU-stack. */
26317 file_end_indicate_exec_stack ();
26319 if (! thumb_call_reg_needed)
26320 return;
26322 switch_to_section (text_section);
26323 asm_fprintf (asm_out_file, "\t.code 16\n");
26324 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26326 for (regno = 0; regno < LR_REGNUM; regno++)
26328 rtx label = thumb_call_via_label[regno];
26330 if (label != 0)
26332 targetm.asm_out.internal_label (asm_out_file, "L",
26333 CODE_LABEL_NUMBER (label));
26334 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26339 #ifndef ARM_PE
26340 /* Symbols in the text segment can be accessed without indirecting via the
26341 constant pool; it may take an extra binary operation, but this is still
26342 faster than indirecting via memory. Don't do this when not optimizing,
26343 since we won't be calculating al of the offsets necessary to do this
26344 simplification. */
26346 static void
26347 arm_encode_section_info (tree decl, rtx rtl, int first)
26349 if (optimize > 0 && TREE_CONSTANT (decl))
26350 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26352 default_encode_section_info (decl, rtl, first);
26354 #endif /* !ARM_PE */
26356 static void
26357 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26359 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26360 && !strcmp (prefix, "L"))
26362 arm_ccfsm_state = 0;
26363 arm_target_insn = NULL;
26365 default_internal_label (stream, prefix, labelno);
26368 /* Output code to add DELTA to the first argument, and then jump
26369 to FUNCTION. Used for C++ multiple inheritance. */
26371 static void
26372 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26373 HOST_WIDE_INT, tree function)
26375 static int thunk_label = 0;
26376 char label[256];
26377 char labelpc[256];
26378 int mi_delta = delta;
26379 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26380 int shift = 0;
26381 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26382 ? 1 : 0);
26383 if (mi_delta < 0)
26384 mi_delta = - mi_delta;
26386 final_start_function (emit_barrier (), file, 1);
26388 if (TARGET_THUMB1)
26390 int labelno = thunk_label++;
26391 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26392 /* Thunks are entered in arm mode when available. */
26393 if (TARGET_THUMB1_ONLY)
26395 /* push r3 so we can use it as a temporary. */
26396 /* TODO: Omit this save if r3 is not used. */
26397 fputs ("\tpush {r3}\n", file);
26398 fputs ("\tldr\tr3, ", file);
26400 else
26402 fputs ("\tldr\tr12, ", file);
26404 assemble_name (file, label);
26405 fputc ('\n', file);
26406 if (flag_pic)
26408 /* If we are generating PIC, the ldr instruction below loads
26409 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26410 the address of the add + 8, so we have:
26412 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26413 = target + 1.
26415 Note that we have "+ 1" because some versions of GNU ld
26416 don't set the low bit of the result for R_ARM_REL32
26417 relocations against thumb function symbols.
26418 On ARMv6M this is +4, not +8. */
26419 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26420 assemble_name (file, labelpc);
26421 fputs (":\n", file);
26422 if (TARGET_THUMB1_ONLY)
26424 /* This is 2 insns after the start of the thunk, so we know it
26425 is 4-byte aligned. */
26426 fputs ("\tadd\tr3, pc, r3\n", file);
26427 fputs ("\tmov r12, r3\n", file);
26429 else
26430 fputs ("\tadd\tr12, pc, r12\n", file);
26432 else if (TARGET_THUMB1_ONLY)
26433 fputs ("\tmov r12, r3\n", file);
26435 if (TARGET_THUMB1_ONLY)
26437 if (mi_delta > 255)
26439 fputs ("\tldr\tr3, ", file);
26440 assemble_name (file, label);
26441 fputs ("+4\n", file);
26442 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26443 mi_op, this_regno, this_regno);
26445 else if (mi_delta != 0)
26447 /* Thumb1 unified syntax requires s suffix in instruction name when
26448 one of the operands is immediate. */
26449 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26450 mi_op, this_regno, this_regno,
26451 mi_delta);
26454 else
26456 /* TODO: Use movw/movt for large constants when available. */
26457 while (mi_delta != 0)
26459 if ((mi_delta & (3 << shift)) == 0)
26460 shift += 2;
26461 else
26463 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26464 mi_op, this_regno, this_regno,
26465 mi_delta & (0xff << shift));
26466 mi_delta &= ~(0xff << shift);
26467 shift += 8;
26471 if (TARGET_THUMB1)
26473 if (TARGET_THUMB1_ONLY)
26474 fputs ("\tpop\t{r3}\n", file);
26476 fprintf (file, "\tbx\tr12\n");
26477 ASM_OUTPUT_ALIGN (file, 2);
26478 assemble_name (file, label);
26479 fputs (":\n", file);
26480 if (flag_pic)
26482 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26483 rtx tem = XEXP (DECL_RTL (function), 0);
26484 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26485 pipeline offset is four rather than eight. Adjust the offset
26486 accordingly. */
26487 tem = plus_constant (GET_MODE (tem), tem,
26488 TARGET_THUMB1_ONLY ? -3 : -7);
26489 tem = gen_rtx_MINUS (GET_MODE (tem),
26490 tem,
26491 gen_rtx_SYMBOL_REF (Pmode,
26492 ggc_strdup (labelpc)));
26493 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26495 else
26496 /* Output ".word .LTHUNKn". */
26497 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26499 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26500 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26502 else
26504 fputs ("\tb\t", file);
26505 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26506 if (NEED_PLT_RELOC)
26507 fputs ("(PLT)", file);
26508 fputc ('\n', file);
26511 final_end_function ();
26514 /* MI thunk handling for TARGET_32BIT. */
26516 static void
26517 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26518 HOST_WIDE_INT vcall_offset, tree function)
26520 /* On ARM, this_regno is R0 or R1 depending on
26521 whether the function returns an aggregate or not.
26523 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26524 function)
26525 ? R1_REGNUM : R0_REGNUM);
26527 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26528 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26529 reload_completed = 1;
26530 emit_note (NOTE_INSN_PROLOGUE_END);
26532 /* Add DELTA to THIS_RTX. */
26533 if (delta != 0)
26534 arm_split_constant (PLUS, Pmode, NULL_RTX,
26535 delta, this_rtx, this_rtx, false);
26537 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26538 if (vcall_offset != 0)
26540 /* Load *THIS_RTX. */
26541 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26542 /* Compute *THIS_RTX + VCALL_OFFSET. */
26543 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26544 false);
26545 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26546 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26547 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26550 /* Generate a tail call to the target function. */
26551 if (!TREE_USED (function))
26553 assemble_external (function);
26554 TREE_USED (function) = 1;
26556 rtx funexp = XEXP (DECL_RTL (function), 0);
26557 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26558 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26559 SIBLING_CALL_P (insn) = 1;
26561 insn = get_insns ();
26562 shorten_branches (insn);
26563 final_start_function (insn, file, 1);
26564 final (insn, file, 1);
26565 final_end_function ();
26567 /* Stop pretending this is a post-reload pass. */
26568 reload_completed = 0;
26571 /* Output code to add DELTA to the first argument, and then jump
26572 to FUNCTION. Used for C++ multiple inheritance. */
26574 static void
26575 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26576 HOST_WIDE_INT vcall_offset, tree function)
26578 if (TARGET_32BIT)
26579 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26580 else
26581 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26585 arm_emit_vector_const (FILE *file, rtx x)
26587 int i;
26588 const char * pattern;
26590 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26592 switch (GET_MODE (x))
26594 case E_V2SImode: pattern = "%08x"; break;
26595 case E_V4HImode: pattern = "%04x"; break;
26596 case E_V8QImode: pattern = "%02x"; break;
26597 default: gcc_unreachable ();
26600 fprintf (file, "0x");
26601 for (i = CONST_VECTOR_NUNITS (x); i--;)
26603 rtx element;
26605 element = CONST_VECTOR_ELT (x, i);
26606 fprintf (file, pattern, INTVAL (element));
26609 return 1;
26612 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26613 HFmode constant pool entries are actually loaded with ldr. */
26614 void
26615 arm_emit_fp16_const (rtx c)
26617 long bits;
26619 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26620 if (WORDS_BIG_ENDIAN)
26621 assemble_zeros (2);
26622 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26623 if (!WORDS_BIG_ENDIAN)
26624 assemble_zeros (2);
26627 const char *
26628 arm_output_load_gr (rtx *operands)
26630 rtx reg;
26631 rtx offset;
26632 rtx wcgr;
26633 rtx sum;
26635 if (!MEM_P (operands [1])
26636 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26637 || !REG_P (reg = XEXP (sum, 0))
26638 || !CONST_INT_P (offset = XEXP (sum, 1))
26639 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26640 return "wldrw%?\t%0, %1";
26642 /* Fix up an out-of-range load of a GR register. */
26643 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26644 wcgr = operands[0];
26645 operands[0] = reg;
26646 output_asm_insn ("ldr%?\t%0, %1", operands);
26648 operands[0] = wcgr;
26649 operands[1] = reg;
26650 output_asm_insn ("tmcr%?\t%0, %1", operands);
26651 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26653 return "";
26656 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26658 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26659 named arg and all anonymous args onto the stack.
26660 XXX I know the prologue shouldn't be pushing registers, but it is faster
26661 that way. */
26663 static void
26664 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26665 machine_mode mode,
26666 tree type,
26667 int *pretend_size,
26668 int second_time ATTRIBUTE_UNUSED)
26670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26671 int nregs;
26673 cfun->machine->uses_anonymous_args = 1;
26674 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26676 nregs = pcum->aapcs_ncrn;
26677 if (nregs & 1)
26679 int res = arm_needs_doubleword_align (mode, type);
26680 if (res < 0 && warn_psabi)
26681 inform (input_location, "parameter passing for argument of "
26682 "type %qT changed in GCC 7.1", type);
26683 else if (res > 0)
26684 nregs++;
26687 else
26688 nregs = pcum->nregs;
26690 if (nregs < NUM_ARG_REGS)
26691 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26694 /* We can't rely on the caller doing the proper promotion when
26695 using APCS or ATPCS. */
26697 static bool
26698 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26700 return !TARGET_AAPCS_BASED;
26703 static machine_mode
26704 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26705 machine_mode mode,
26706 int *punsignedp ATTRIBUTE_UNUSED,
26707 const_tree fntype ATTRIBUTE_UNUSED,
26708 int for_return ATTRIBUTE_UNUSED)
26710 if (GET_MODE_CLASS (mode) == MODE_INT
26711 && GET_MODE_SIZE (mode) < 4)
26712 return SImode;
26714 return mode;
26718 static bool
26719 arm_default_short_enums (void)
26721 return ARM_DEFAULT_SHORT_ENUMS;
26725 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26727 static bool
26728 arm_align_anon_bitfield (void)
26730 return TARGET_AAPCS_BASED;
26734 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26736 static tree
26737 arm_cxx_guard_type (void)
26739 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26743 /* The EABI says test the least significant bit of a guard variable. */
26745 static bool
26746 arm_cxx_guard_mask_bit (void)
26748 return TARGET_AAPCS_BASED;
26752 /* The EABI specifies that all array cookies are 8 bytes long. */
26754 static tree
26755 arm_get_cookie_size (tree type)
26757 tree size;
26759 if (!TARGET_AAPCS_BASED)
26760 return default_cxx_get_cookie_size (type);
26762 size = build_int_cst (sizetype, 8);
26763 return size;
26767 /* The EABI says that array cookies should also contain the element size. */
26769 static bool
26770 arm_cookie_has_size (void)
26772 return TARGET_AAPCS_BASED;
26776 /* The EABI says constructors and destructors should return a pointer to
26777 the object constructed/destroyed. */
26779 static bool
26780 arm_cxx_cdtor_returns_this (void)
26782 return TARGET_AAPCS_BASED;
26785 /* The EABI says that an inline function may never be the key
26786 method. */
26788 static bool
26789 arm_cxx_key_method_may_be_inline (void)
26791 return !TARGET_AAPCS_BASED;
26794 static void
26795 arm_cxx_determine_class_data_visibility (tree decl)
26797 if (!TARGET_AAPCS_BASED
26798 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26799 return;
26801 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26802 is exported. However, on systems without dynamic vague linkage,
26803 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26804 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26805 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26806 else
26807 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26808 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26811 static bool
26812 arm_cxx_class_data_always_comdat (void)
26814 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26815 vague linkage if the class has no key function. */
26816 return !TARGET_AAPCS_BASED;
26820 /* The EABI says __aeabi_atexit should be used to register static
26821 destructors. */
26823 static bool
26824 arm_cxx_use_aeabi_atexit (void)
26826 return TARGET_AAPCS_BASED;
26830 void
26831 arm_set_return_address (rtx source, rtx scratch)
26833 arm_stack_offsets *offsets;
26834 HOST_WIDE_INT delta;
26835 rtx addr;
26836 unsigned long saved_regs;
26838 offsets = arm_get_frame_offsets ();
26839 saved_regs = offsets->saved_regs_mask;
26841 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26842 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26843 else
26845 if (frame_pointer_needed)
26846 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26847 else
26849 /* LR will be the first saved register. */
26850 delta = offsets->outgoing_args - (offsets->frame + 4);
26853 if (delta >= 4096)
26855 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26856 GEN_INT (delta & ~4095)));
26857 addr = scratch;
26858 delta &= 4095;
26860 else
26861 addr = stack_pointer_rtx;
26863 addr = plus_constant (Pmode, addr, delta);
26865 /* The store needs to be marked as frame related in order to prevent
26866 DSE from deleting it as dead if it is based on fp. */
26867 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26868 RTX_FRAME_RELATED_P (insn) = 1;
26869 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26874 void
26875 thumb_set_return_address (rtx source, rtx scratch)
26877 arm_stack_offsets *offsets;
26878 HOST_WIDE_INT delta;
26879 HOST_WIDE_INT limit;
26880 int reg;
26881 rtx addr;
26882 unsigned long mask;
26884 emit_use (source);
26886 offsets = arm_get_frame_offsets ();
26887 mask = offsets->saved_regs_mask;
26888 if (mask & (1 << LR_REGNUM))
26890 limit = 1024;
26891 /* Find the saved regs. */
26892 if (frame_pointer_needed)
26894 delta = offsets->soft_frame - offsets->saved_args;
26895 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26896 if (TARGET_THUMB1)
26897 limit = 128;
26899 else
26901 delta = offsets->outgoing_args - offsets->saved_args;
26902 reg = SP_REGNUM;
26904 /* Allow for the stack frame. */
26905 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26906 delta -= 16;
26907 /* The link register is always the first saved register. */
26908 delta -= 4;
26910 /* Construct the address. */
26911 addr = gen_rtx_REG (SImode, reg);
26912 if (delta > limit)
26914 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26915 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26916 addr = scratch;
26918 else
26919 addr = plus_constant (Pmode, addr, delta);
26921 /* The store needs to be marked as frame related in order to prevent
26922 DSE from deleting it as dead if it is based on fp. */
26923 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26924 RTX_FRAME_RELATED_P (insn) = 1;
26925 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26927 else
26928 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26931 /* Implements target hook vector_mode_supported_p. */
26932 bool
26933 arm_vector_mode_supported_p (machine_mode mode)
26935 /* Neon also supports V2SImode, etc. listed in the clause below. */
26936 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26937 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26938 || mode == V2DImode || mode == V8HFmode))
26939 return true;
26941 if ((TARGET_NEON || TARGET_IWMMXT)
26942 && ((mode == V2SImode)
26943 || (mode == V4HImode)
26944 || (mode == V8QImode)))
26945 return true;
26947 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26948 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26949 || mode == V2HAmode))
26950 return true;
26952 return false;
26955 /* Implements target hook array_mode_supported_p. */
26957 static bool
26958 arm_array_mode_supported_p (machine_mode mode,
26959 unsigned HOST_WIDE_INT nelems)
26961 if (TARGET_NEON
26962 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26963 && (nelems >= 2 && nelems <= 4))
26964 return true;
26966 return false;
26969 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26970 registers when autovectorizing for Neon, at least until multiple vector
26971 widths are supported properly by the middle-end. */
26973 static machine_mode
26974 arm_preferred_simd_mode (scalar_mode mode)
26976 if (TARGET_NEON)
26977 switch (mode)
26979 case E_SFmode:
26980 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26981 case E_SImode:
26982 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26983 case E_HImode:
26984 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26985 case E_QImode:
26986 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26987 case E_DImode:
26988 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26989 return V2DImode;
26990 break;
26992 default:;
26995 if (TARGET_REALLY_IWMMXT)
26996 switch (mode)
26998 case E_SImode:
26999 return V2SImode;
27000 case E_HImode:
27001 return V4HImode;
27002 case E_QImode:
27003 return V8QImode;
27005 default:;
27008 return word_mode;
27011 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27013 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27014 using r0-r4 for function arguments, r7 for the stack frame and don't have
27015 enough left over to do doubleword arithmetic. For Thumb-2 all the
27016 potentially problematic instructions accept high registers so this is not
27017 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27018 that require many low registers. */
27019 static bool
27020 arm_class_likely_spilled_p (reg_class_t rclass)
27022 if ((TARGET_THUMB1 && rclass == LO_REGS)
27023 || rclass == CC_REG)
27024 return true;
27026 return false;
27029 /* Implements target hook small_register_classes_for_mode_p. */
27030 bool
27031 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27033 return TARGET_THUMB1;
27036 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27037 ARM insns and therefore guarantee that the shift count is modulo 256.
27038 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27039 guarantee no particular behavior for out-of-range counts. */
27041 static unsigned HOST_WIDE_INT
27042 arm_shift_truncation_mask (machine_mode mode)
27044 return mode == SImode ? 255 : 0;
27048 /* Map internal gcc register numbers to DWARF2 register numbers. */
27050 unsigned int
27051 arm_dbx_register_number (unsigned int regno)
27053 if (regno < 16)
27054 return regno;
27056 if (IS_VFP_REGNUM (regno))
27058 /* See comment in arm_dwarf_register_span. */
27059 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27060 return 64 + regno - FIRST_VFP_REGNUM;
27061 else
27062 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27065 if (IS_IWMMXT_GR_REGNUM (regno))
27066 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27068 if (IS_IWMMXT_REGNUM (regno))
27069 return 112 + regno - FIRST_IWMMXT_REGNUM;
27071 return DWARF_FRAME_REGISTERS;
27074 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27075 GCC models tham as 64 32-bit registers, so we need to describe this to
27076 the DWARF generation code. Other registers can use the default. */
27077 static rtx
27078 arm_dwarf_register_span (rtx rtl)
27080 machine_mode mode;
27081 unsigned regno;
27082 rtx parts[16];
27083 int nregs;
27084 int i;
27086 regno = REGNO (rtl);
27087 if (!IS_VFP_REGNUM (regno))
27088 return NULL_RTX;
27090 /* XXX FIXME: The EABI defines two VFP register ranges:
27091 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27092 256-287: D0-D31
27093 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27094 corresponding D register. Until GDB supports this, we shall use the
27095 legacy encodings. We also use these encodings for D0-D15 for
27096 compatibility with older debuggers. */
27097 mode = GET_MODE (rtl);
27098 if (GET_MODE_SIZE (mode) < 8)
27099 return NULL_RTX;
27101 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27103 nregs = GET_MODE_SIZE (mode) / 4;
27104 for (i = 0; i < nregs; i += 2)
27105 if (TARGET_BIG_END)
27107 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27108 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27110 else
27112 parts[i] = gen_rtx_REG (SImode, regno + i);
27113 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27116 else
27118 nregs = GET_MODE_SIZE (mode) / 8;
27119 for (i = 0; i < nregs; i++)
27120 parts[i] = gen_rtx_REG (DImode, regno + i);
27123 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27126 #if ARM_UNWIND_INFO
27127 /* Emit unwind directives for a store-multiple instruction or stack pointer
27128 push during alignment.
27129 These should only ever be generated by the function prologue code, so
27130 expect them to have a particular form.
27131 The store-multiple instruction sometimes pushes pc as the last register,
27132 although it should not be tracked into unwind information, or for -Os
27133 sometimes pushes some dummy registers before first register that needs
27134 to be tracked in unwind information; such dummy registers are there just
27135 to avoid separate stack adjustment, and will not be restored in the
27136 epilogue. */
27138 static void
27139 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27141 int i;
27142 HOST_WIDE_INT offset;
27143 HOST_WIDE_INT nregs;
27144 int reg_size;
27145 unsigned reg;
27146 unsigned lastreg;
27147 unsigned padfirst = 0, padlast = 0;
27148 rtx e;
27150 e = XVECEXP (p, 0, 0);
27151 gcc_assert (GET_CODE (e) == SET);
27153 /* First insn will adjust the stack pointer. */
27154 gcc_assert (GET_CODE (e) == SET
27155 && REG_P (SET_DEST (e))
27156 && REGNO (SET_DEST (e)) == SP_REGNUM
27157 && GET_CODE (SET_SRC (e)) == PLUS);
27159 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27160 nregs = XVECLEN (p, 0) - 1;
27161 gcc_assert (nregs);
27163 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27164 if (reg < 16)
27166 /* For -Os dummy registers can be pushed at the beginning to
27167 avoid separate stack pointer adjustment. */
27168 e = XVECEXP (p, 0, 1);
27169 e = XEXP (SET_DEST (e), 0);
27170 if (GET_CODE (e) == PLUS)
27171 padfirst = INTVAL (XEXP (e, 1));
27172 gcc_assert (padfirst == 0 || optimize_size);
27173 /* The function prologue may also push pc, but not annotate it as it is
27174 never restored. We turn this into a stack pointer adjustment. */
27175 e = XVECEXP (p, 0, nregs);
27176 e = XEXP (SET_DEST (e), 0);
27177 if (GET_CODE (e) == PLUS)
27178 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27179 else
27180 padlast = offset - 4;
27181 gcc_assert (padlast == 0 || padlast == 4);
27182 if (padlast == 4)
27183 fprintf (asm_out_file, "\t.pad #4\n");
27184 reg_size = 4;
27185 fprintf (asm_out_file, "\t.save {");
27187 else if (IS_VFP_REGNUM (reg))
27189 reg_size = 8;
27190 fprintf (asm_out_file, "\t.vsave {");
27192 else
27193 /* Unknown register type. */
27194 gcc_unreachable ();
27196 /* If the stack increment doesn't match the size of the saved registers,
27197 something has gone horribly wrong. */
27198 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27200 offset = padfirst;
27201 lastreg = 0;
27202 /* The remaining insns will describe the stores. */
27203 for (i = 1; i <= nregs; i++)
27205 /* Expect (set (mem <addr>) (reg)).
27206 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27207 e = XVECEXP (p, 0, i);
27208 gcc_assert (GET_CODE (e) == SET
27209 && MEM_P (SET_DEST (e))
27210 && REG_P (SET_SRC (e)));
27212 reg = REGNO (SET_SRC (e));
27213 gcc_assert (reg >= lastreg);
27215 if (i != 1)
27216 fprintf (asm_out_file, ", ");
27217 /* We can't use %r for vfp because we need to use the
27218 double precision register names. */
27219 if (IS_VFP_REGNUM (reg))
27220 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27221 else
27222 asm_fprintf (asm_out_file, "%r", reg);
27224 if (flag_checking)
27226 /* Check that the addresses are consecutive. */
27227 e = XEXP (SET_DEST (e), 0);
27228 if (GET_CODE (e) == PLUS)
27229 gcc_assert (REG_P (XEXP (e, 0))
27230 && REGNO (XEXP (e, 0)) == SP_REGNUM
27231 && CONST_INT_P (XEXP (e, 1))
27232 && offset == INTVAL (XEXP (e, 1)));
27233 else
27234 gcc_assert (i == 1
27235 && REG_P (e)
27236 && REGNO (e) == SP_REGNUM);
27237 offset += reg_size;
27240 fprintf (asm_out_file, "}\n");
27241 if (padfirst)
27242 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27245 /* Emit unwind directives for a SET. */
27247 static void
27248 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27250 rtx e0;
27251 rtx e1;
27252 unsigned reg;
27254 e0 = XEXP (p, 0);
27255 e1 = XEXP (p, 1);
27256 switch (GET_CODE (e0))
27258 case MEM:
27259 /* Pushing a single register. */
27260 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27261 || !REG_P (XEXP (XEXP (e0, 0), 0))
27262 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27263 abort ();
27265 asm_fprintf (asm_out_file, "\t.save ");
27266 if (IS_VFP_REGNUM (REGNO (e1)))
27267 asm_fprintf(asm_out_file, "{d%d}\n",
27268 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27269 else
27270 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27271 break;
27273 case REG:
27274 if (REGNO (e0) == SP_REGNUM)
27276 /* A stack increment. */
27277 if (GET_CODE (e1) != PLUS
27278 || !REG_P (XEXP (e1, 0))
27279 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27280 || !CONST_INT_P (XEXP (e1, 1)))
27281 abort ();
27283 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27284 -INTVAL (XEXP (e1, 1)));
27286 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27288 HOST_WIDE_INT offset;
27290 if (GET_CODE (e1) == PLUS)
27292 if (!REG_P (XEXP (e1, 0))
27293 || !CONST_INT_P (XEXP (e1, 1)))
27294 abort ();
27295 reg = REGNO (XEXP (e1, 0));
27296 offset = INTVAL (XEXP (e1, 1));
27297 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27298 HARD_FRAME_POINTER_REGNUM, reg,
27299 offset);
27301 else if (REG_P (e1))
27303 reg = REGNO (e1);
27304 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27305 HARD_FRAME_POINTER_REGNUM, reg);
27307 else
27308 abort ();
27310 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27312 /* Move from sp to reg. */
27313 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27315 else if (GET_CODE (e1) == PLUS
27316 && REG_P (XEXP (e1, 0))
27317 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27318 && CONST_INT_P (XEXP (e1, 1)))
27320 /* Set reg to offset from sp. */
27321 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27322 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27324 else
27325 abort ();
27326 break;
27328 default:
27329 abort ();
27334 /* Emit unwind directives for the given insn. */
27336 static void
27337 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27339 rtx note, pat;
27340 bool handled_one = false;
27342 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27343 return;
27345 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27346 && (TREE_NOTHROW (current_function_decl)
27347 || crtl->all_throwers_are_sibcalls))
27348 return;
27350 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27351 return;
27353 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27355 switch (REG_NOTE_KIND (note))
27357 case REG_FRAME_RELATED_EXPR:
27358 pat = XEXP (note, 0);
27359 goto found;
27361 case REG_CFA_REGISTER:
27362 pat = XEXP (note, 0);
27363 if (pat == NULL)
27365 pat = PATTERN (insn);
27366 if (GET_CODE (pat) == PARALLEL)
27367 pat = XVECEXP (pat, 0, 0);
27370 /* Only emitted for IS_STACKALIGN re-alignment. */
27372 rtx dest, src;
27373 unsigned reg;
27375 src = SET_SRC (pat);
27376 dest = SET_DEST (pat);
27378 gcc_assert (src == stack_pointer_rtx);
27379 reg = REGNO (dest);
27380 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27381 reg + 0x90, reg);
27383 handled_one = true;
27384 break;
27386 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27387 to get correct dwarf information for shrink-wrap. We should not
27388 emit unwind information for it because these are used either for
27389 pretend arguments or notes to adjust sp and restore registers from
27390 stack. */
27391 case REG_CFA_DEF_CFA:
27392 case REG_CFA_ADJUST_CFA:
27393 case REG_CFA_RESTORE:
27394 return;
27396 case REG_CFA_EXPRESSION:
27397 case REG_CFA_OFFSET:
27398 /* ??? Only handling here what we actually emit. */
27399 gcc_unreachable ();
27401 default:
27402 break;
27405 if (handled_one)
27406 return;
27407 pat = PATTERN (insn);
27408 found:
27410 switch (GET_CODE (pat))
27412 case SET:
27413 arm_unwind_emit_set (asm_out_file, pat);
27414 break;
27416 case SEQUENCE:
27417 /* Store multiple. */
27418 arm_unwind_emit_sequence (asm_out_file, pat);
27419 break;
27421 default:
27422 abort();
27427 /* Output a reference from a function exception table to the type_info
27428 object X. The EABI specifies that the symbol should be relocated by
27429 an R_ARM_TARGET2 relocation. */
27431 static bool
27432 arm_output_ttype (rtx x)
27434 fputs ("\t.word\t", asm_out_file);
27435 output_addr_const (asm_out_file, x);
27436 /* Use special relocations for symbol references. */
27437 if (!CONST_INT_P (x))
27438 fputs ("(TARGET2)", asm_out_file);
27439 fputc ('\n', asm_out_file);
27441 return TRUE;
27444 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27446 static void
27447 arm_asm_emit_except_personality (rtx personality)
27449 fputs ("\t.personality\t", asm_out_file);
27450 output_addr_const (asm_out_file, personality);
27451 fputc ('\n', asm_out_file);
27453 #endif /* ARM_UNWIND_INFO */
27455 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27457 static void
27458 arm_asm_init_sections (void)
27460 #if ARM_UNWIND_INFO
27461 exception_section = get_unnamed_section (0, output_section_asm_op,
27462 "\t.handlerdata");
27463 #endif /* ARM_UNWIND_INFO */
27465 #ifdef OBJECT_FORMAT_ELF
27466 if (target_pure_code)
27467 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27468 #endif
27471 /* Output unwind directives for the start/end of a function. */
27473 void
27474 arm_output_fn_unwind (FILE * f, bool prologue)
27476 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27477 return;
27479 if (prologue)
27480 fputs ("\t.fnstart\n", f);
27481 else
27483 /* If this function will never be unwound, then mark it as such.
27484 The came condition is used in arm_unwind_emit to suppress
27485 the frame annotations. */
27486 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27487 && (TREE_NOTHROW (current_function_decl)
27488 || crtl->all_throwers_are_sibcalls))
27489 fputs("\t.cantunwind\n", f);
27491 fputs ("\t.fnend\n", f);
27495 static bool
27496 arm_emit_tls_decoration (FILE *fp, rtx x)
27498 enum tls_reloc reloc;
27499 rtx val;
27501 val = XVECEXP (x, 0, 0);
27502 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27504 output_addr_const (fp, val);
27506 switch (reloc)
27508 case TLS_GD32:
27509 fputs ("(tlsgd)", fp);
27510 break;
27511 case TLS_LDM32:
27512 fputs ("(tlsldm)", fp);
27513 break;
27514 case TLS_LDO32:
27515 fputs ("(tlsldo)", fp);
27516 break;
27517 case TLS_IE32:
27518 fputs ("(gottpoff)", fp);
27519 break;
27520 case TLS_LE32:
27521 fputs ("(tpoff)", fp);
27522 break;
27523 case TLS_DESCSEQ:
27524 fputs ("(tlsdesc)", fp);
27525 break;
27526 default:
27527 gcc_unreachable ();
27530 switch (reloc)
27532 case TLS_GD32:
27533 case TLS_LDM32:
27534 case TLS_IE32:
27535 case TLS_DESCSEQ:
27536 fputs (" + (. - ", fp);
27537 output_addr_const (fp, XVECEXP (x, 0, 2));
27538 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27539 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27540 output_addr_const (fp, XVECEXP (x, 0, 3));
27541 fputc (')', fp);
27542 break;
27543 default:
27544 break;
27547 return TRUE;
27550 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27552 static void
27553 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27555 gcc_assert (size == 4);
27556 fputs ("\t.word\t", file);
27557 output_addr_const (file, x);
27558 fputs ("(tlsldo)", file);
27561 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27563 static bool
27564 arm_output_addr_const_extra (FILE *fp, rtx x)
27566 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27567 return arm_emit_tls_decoration (fp, x);
27568 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27570 char label[256];
27571 int labelno = INTVAL (XVECEXP (x, 0, 0));
27573 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27574 assemble_name_raw (fp, label);
27576 return TRUE;
27578 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27580 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27581 if (GOT_PCREL)
27582 fputs ("+.", fp);
27583 fputs ("-(", fp);
27584 output_addr_const (fp, XVECEXP (x, 0, 0));
27585 fputc (')', fp);
27586 return TRUE;
27588 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27590 output_addr_const (fp, XVECEXP (x, 0, 0));
27591 if (GOT_PCREL)
27592 fputs ("+.", fp);
27593 fputs ("-(", fp);
27594 output_addr_const (fp, XVECEXP (x, 0, 1));
27595 fputc (')', fp);
27596 return TRUE;
27598 else if (GET_CODE (x) == CONST_VECTOR)
27599 return arm_emit_vector_const (fp, x);
27601 return FALSE;
27604 /* Output assembly for a shift instruction.
27605 SET_FLAGS determines how the instruction modifies the condition codes.
27606 0 - Do not set condition codes.
27607 1 - Set condition codes.
27608 2 - Use smallest instruction. */
27609 const char *
27610 arm_output_shift(rtx * operands, int set_flags)
27612 char pattern[100];
27613 static const char flag_chars[3] = {'?', '.', '!'};
27614 const char *shift;
27615 HOST_WIDE_INT val;
27616 char c;
27618 c = flag_chars[set_flags];
27619 shift = shift_op(operands[3], &val);
27620 if (shift)
27622 if (val != -1)
27623 operands[2] = GEN_INT(val);
27624 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27626 else
27627 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27629 output_asm_insn (pattern, operands);
27630 return "";
27633 /* Output assembly for a WMMX immediate shift instruction. */
27634 const char *
27635 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27637 int shift = INTVAL (operands[2]);
27638 char templ[50];
27639 machine_mode opmode = GET_MODE (operands[0]);
27641 gcc_assert (shift >= 0);
27643 /* If the shift value in the register versions is > 63 (for D qualifier),
27644 31 (for W qualifier) or 15 (for H qualifier). */
27645 if (((opmode == V4HImode) && (shift > 15))
27646 || ((opmode == V2SImode) && (shift > 31))
27647 || ((opmode == DImode) && (shift > 63)))
27649 if (wror_or_wsra)
27651 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27652 output_asm_insn (templ, operands);
27653 if (opmode == DImode)
27655 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27656 output_asm_insn (templ, operands);
27659 else
27661 /* The destination register will contain all zeros. */
27662 sprintf (templ, "wzero\t%%0");
27663 output_asm_insn (templ, operands);
27665 return "";
27668 if ((opmode == DImode) && (shift > 32))
27670 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27671 output_asm_insn (templ, operands);
27672 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27673 output_asm_insn (templ, operands);
27675 else
27677 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27678 output_asm_insn (templ, operands);
27680 return "";
27683 /* Output assembly for a WMMX tinsr instruction. */
27684 const char *
27685 arm_output_iwmmxt_tinsr (rtx *operands)
27687 int mask = INTVAL (operands[3]);
27688 int i;
27689 char templ[50];
27690 int units = mode_nunits[GET_MODE (operands[0])];
27691 gcc_assert ((mask & (mask - 1)) == 0);
27692 for (i = 0; i < units; ++i)
27694 if ((mask & 0x01) == 1)
27696 break;
27698 mask >>= 1;
27700 gcc_assert (i < units);
27702 switch (GET_MODE (operands[0]))
27704 case E_V8QImode:
27705 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27706 break;
27707 case E_V4HImode:
27708 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27709 break;
27710 case E_V2SImode:
27711 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27712 break;
27713 default:
27714 gcc_unreachable ();
27715 break;
27717 output_asm_insn (templ, operands);
27719 return "";
27722 /* Output a Thumb-1 casesi dispatch sequence. */
27723 const char *
27724 thumb1_output_casesi (rtx *operands)
27726 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27728 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27730 switch (GET_MODE(diff_vec))
27732 case E_QImode:
27733 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27734 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27735 case E_HImode:
27736 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27737 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27738 case E_SImode:
27739 return "bl\t%___gnu_thumb1_case_si";
27740 default:
27741 gcc_unreachable ();
27745 /* Output a Thumb-2 casesi instruction. */
27746 const char *
27747 thumb2_output_casesi (rtx *operands)
27749 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27751 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27753 output_asm_insn ("cmp\t%0, %1", operands);
27754 output_asm_insn ("bhi\t%l3", operands);
27755 switch (GET_MODE(diff_vec))
27757 case E_QImode:
27758 return "tbb\t[%|pc, %0]";
27759 case E_HImode:
27760 return "tbh\t[%|pc, %0, lsl #1]";
27761 case E_SImode:
27762 if (flag_pic)
27764 output_asm_insn ("adr\t%4, %l2", operands);
27765 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27766 output_asm_insn ("add\t%4, %4, %5", operands);
27767 return "bx\t%4";
27769 else
27771 output_asm_insn ("adr\t%4, %l2", operands);
27772 return "ldr\t%|pc, [%4, %0, lsl #2]";
27774 default:
27775 gcc_unreachable ();
27779 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27780 per-core tuning structs. */
27781 static int
27782 arm_issue_rate (void)
27784 return current_tune->issue_rate;
27787 /* Return how many instructions should scheduler lookahead to choose the
27788 best one. */
27789 static int
27790 arm_first_cycle_multipass_dfa_lookahead (void)
27792 int issue_rate = arm_issue_rate ();
27794 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27797 /* Enable modeling of L2 auto-prefetcher. */
27798 static int
27799 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27801 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27804 const char *
27805 arm_mangle_type (const_tree type)
27807 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27808 has to be managled as if it is in the "std" namespace. */
27809 if (TARGET_AAPCS_BASED
27810 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27811 return "St9__va_list";
27813 /* Half-precision float. */
27814 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27815 return "Dh";
27817 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27818 builtin type. */
27819 if (TYPE_NAME (type) != NULL)
27820 return arm_mangle_builtin_type (type);
27822 /* Use the default mangling. */
27823 return NULL;
27826 /* Order of allocation of core registers for Thumb: this allocation is
27827 written over the corresponding initial entries of the array
27828 initialized with REG_ALLOC_ORDER. We allocate all low registers
27829 first. Saving and restoring a low register is usually cheaper than
27830 using a call-clobbered high register. */
27832 static const int thumb_core_reg_alloc_order[] =
27834 3, 2, 1, 0, 4, 5, 6, 7,
27835 12, 14, 8, 9, 10, 11
27838 /* Adjust register allocation order when compiling for Thumb. */
27840 void
27841 arm_order_regs_for_local_alloc (void)
27843 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27844 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27845 if (TARGET_THUMB)
27846 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27847 sizeof (thumb_core_reg_alloc_order));
27850 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27852 bool
27853 arm_frame_pointer_required (void)
27855 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27856 return true;
27858 /* If the function receives nonlocal gotos, it needs to save the frame
27859 pointer in the nonlocal_goto_save_area object. */
27860 if (cfun->has_nonlocal_label)
27861 return true;
27863 /* The frame pointer is required for non-leaf APCS frames. */
27864 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27865 return true;
27867 /* If we are probing the stack in the prologue, we will have a faulting
27868 instruction prior to the stack adjustment and this requires a frame
27869 pointer if we want to catch the exception using the EABI unwinder. */
27870 if (!IS_INTERRUPT (arm_current_func_type ())
27871 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27872 && arm_except_unwind_info (&global_options) == UI_TARGET
27873 && cfun->can_throw_non_call_exceptions)
27875 HOST_WIDE_INT size = get_frame_size ();
27877 /* That's irrelevant if there is no stack adjustment. */
27878 if (size <= 0)
27879 return false;
27881 /* That's relevant only if there is a stack probe. */
27882 if (crtl->is_leaf && !cfun->calls_alloca)
27884 /* We don't have the final size of the frame so adjust. */
27885 size += 32 * UNITS_PER_WORD;
27886 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27887 return true;
27889 else
27890 return true;
27893 return false;
27896 /* Only thumb1 can't support conditional execution, so return true if
27897 the target is not thumb1. */
27898 static bool
27899 arm_have_conditional_execution (void)
27901 return !TARGET_THUMB1;
27904 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27905 static HOST_WIDE_INT
27906 arm_vector_alignment (const_tree type)
27908 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27910 if (TARGET_AAPCS_BASED)
27911 align = MIN (align, 64);
27913 return align;
27916 static unsigned int
27917 arm_autovectorize_vector_sizes (void)
27919 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27922 static bool
27923 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27925 /* Vectors which aren't in packed structures will not be less aligned than
27926 the natural alignment of their element type, so this is safe. */
27927 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27928 return !is_packed;
27930 return default_builtin_vector_alignment_reachable (type, is_packed);
27933 static bool
27934 arm_builtin_support_vector_misalignment (machine_mode mode,
27935 const_tree type, int misalignment,
27936 bool is_packed)
27938 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27940 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27942 if (is_packed)
27943 return align == 1;
27945 /* If the misalignment is unknown, we should be able to handle the access
27946 so long as it is not to a member of a packed data structure. */
27947 if (misalignment == -1)
27948 return true;
27950 /* Return true if the misalignment is a multiple of the natural alignment
27951 of the vector's element type. This is probably always going to be
27952 true in practice, since we've already established that this isn't a
27953 packed access. */
27954 return ((misalignment % align) == 0);
27957 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27958 is_packed);
27961 static void
27962 arm_conditional_register_usage (void)
27964 int regno;
27966 if (TARGET_THUMB1 && optimize_size)
27968 /* When optimizing for size on Thumb-1, it's better not
27969 to use the HI regs, because of the overhead of
27970 stacking them. */
27971 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27972 fixed_regs[regno] = call_used_regs[regno] = 1;
27975 /* The link register can be clobbered by any branch insn,
27976 but we have no way to track that at present, so mark
27977 it as unavailable. */
27978 if (TARGET_THUMB1)
27979 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27981 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27983 /* VFPv3 registers are disabled when earlier VFP
27984 versions are selected due to the definition of
27985 LAST_VFP_REGNUM. */
27986 for (regno = FIRST_VFP_REGNUM;
27987 regno <= LAST_VFP_REGNUM; ++ regno)
27989 fixed_regs[regno] = 0;
27990 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27991 || regno >= FIRST_VFP_REGNUM + 32;
27995 if (TARGET_REALLY_IWMMXT)
27997 regno = FIRST_IWMMXT_GR_REGNUM;
27998 /* The 2002/10/09 revision of the XScale ABI has wCG0
27999 and wCG1 as call-preserved registers. The 2002/11/21
28000 revision changed this so that all wCG registers are
28001 scratch registers. */
28002 for (regno = FIRST_IWMMXT_GR_REGNUM;
28003 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28004 fixed_regs[regno] = 0;
28005 /* The XScale ABI has wR0 - wR9 as scratch registers,
28006 the rest as call-preserved registers. */
28007 for (regno = FIRST_IWMMXT_REGNUM;
28008 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28010 fixed_regs[regno] = 0;
28011 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28015 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28017 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28018 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28020 else if (TARGET_APCS_STACK)
28022 fixed_regs[10] = 1;
28023 call_used_regs[10] = 1;
28025 /* -mcaller-super-interworking reserves r11 for calls to
28026 _interwork_r11_call_via_rN(). Making the register global
28027 is an easy way of ensuring that it remains valid for all
28028 calls. */
28029 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28030 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28032 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28033 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28034 if (TARGET_CALLER_INTERWORKING)
28035 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28037 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28040 static reg_class_t
28041 arm_preferred_rename_class (reg_class_t rclass)
28043 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28044 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28045 and code size can be reduced. */
28046 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28047 return LO_REGS;
28048 else
28049 return NO_REGS;
28052 /* Compute the attribute "length" of insn "*push_multi".
28053 So this function MUST be kept in sync with that insn pattern. */
28055 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28057 int i, regno, hi_reg;
28058 int num_saves = XVECLEN (parallel_op, 0);
28060 /* ARM mode. */
28061 if (TARGET_ARM)
28062 return 4;
28063 /* Thumb1 mode. */
28064 if (TARGET_THUMB1)
28065 return 2;
28067 /* Thumb2 mode. */
28068 regno = REGNO (first_op);
28069 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28070 list is 8-bit. Normally this means all registers in the list must be
28071 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28072 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28073 with 16-bit encoding. */
28074 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28075 for (i = 1; i < num_saves && !hi_reg; i++)
28077 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28078 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28081 if (!hi_reg)
28082 return 2;
28083 return 4;
28086 /* Compute the attribute "length" of insn. Currently, this function is used
28087 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28088 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28089 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28090 true if OPERANDS contains insn which explicit updates base register. */
28093 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28095 /* ARM mode. */
28096 if (TARGET_ARM)
28097 return 4;
28098 /* Thumb1 mode. */
28099 if (TARGET_THUMB1)
28100 return 2;
28102 rtx parallel_op = operands[0];
28103 /* Initialize to elements number of PARALLEL. */
28104 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28105 /* Initialize the value to base register. */
28106 unsigned regno = REGNO (operands[1]);
28107 /* Skip return and write back pattern.
28108 We only need register pop pattern for later analysis. */
28109 unsigned first_indx = 0;
28110 first_indx += return_pc ? 1 : 0;
28111 first_indx += write_back_p ? 1 : 0;
28113 /* A pop operation can be done through LDM or POP. If the base register is SP
28114 and if it's with write back, then a LDM will be alias of POP. */
28115 bool pop_p = (regno == SP_REGNUM && write_back_p);
28116 bool ldm_p = !pop_p;
28118 /* Check base register for LDM. */
28119 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28120 return 4;
28122 /* Check each register in the list. */
28123 for (; indx >= first_indx; indx--)
28125 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28126 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28127 comment in arm_attr_length_push_multi. */
28128 if (REGNO_REG_CLASS (regno) == HI_REGS
28129 && (regno != PC_REGNUM || ldm_p))
28130 return 4;
28133 return 2;
28136 /* Compute the number of instructions emitted by output_move_double. */
28138 arm_count_output_move_double_insns (rtx *operands)
28140 int count;
28141 rtx ops[2];
28142 /* output_move_double may modify the operands array, so call it
28143 here on a copy of the array. */
28144 ops[0] = operands[0];
28145 ops[1] = operands[1];
28146 output_move_double (ops, false, &count);
28147 return count;
28151 vfp3_const_double_for_fract_bits (rtx operand)
28153 REAL_VALUE_TYPE r0;
28155 if (!CONST_DOUBLE_P (operand))
28156 return 0;
28158 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28159 if (exact_real_inverse (DFmode, &r0)
28160 && !REAL_VALUE_NEGATIVE (r0))
28162 if (exact_real_truncate (DFmode, &r0))
28164 HOST_WIDE_INT value = real_to_integer (&r0);
28165 value = value & 0xffffffff;
28166 if ((value != 0) && ( (value & (value - 1)) == 0))
28168 int ret = exact_log2 (value);
28169 gcc_assert (IN_RANGE (ret, 0, 31));
28170 return ret;
28174 return 0;
28177 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28178 log2 is in [1, 32], return that log2. Otherwise return -1.
28179 This is used in the patterns for vcvt.s32.f32 floating-point to
28180 fixed-point conversions. */
28183 vfp3_const_double_for_bits (rtx x)
28185 const REAL_VALUE_TYPE *r;
28187 if (!CONST_DOUBLE_P (x))
28188 return -1;
28190 r = CONST_DOUBLE_REAL_VALUE (x);
28192 if (REAL_VALUE_NEGATIVE (*r)
28193 || REAL_VALUE_ISNAN (*r)
28194 || REAL_VALUE_ISINF (*r)
28195 || !real_isinteger (r, SFmode))
28196 return -1;
28198 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28200 /* The exact_log2 above will have returned -1 if this is
28201 not an exact log2. */
28202 if (!IN_RANGE (hwint, 1, 32))
28203 return -1;
28205 return hwint;
28209 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28211 static void
28212 arm_pre_atomic_barrier (enum memmodel model)
28214 if (need_atomic_barrier_p (model, true))
28215 emit_insn (gen_memory_barrier ());
28218 static void
28219 arm_post_atomic_barrier (enum memmodel model)
28221 if (need_atomic_barrier_p (model, false))
28222 emit_insn (gen_memory_barrier ());
28225 /* Emit the load-exclusive and store-exclusive instructions.
28226 Use acquire and release versions if necessary. */
28228 static void
28229 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28231 rtx (*gen) (rtx, rtx);
28233 if (acq)
28235 switch (mode)
28237 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28238 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28239 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28240 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28241 default:
28242 gcc_unreachable ();
28245 else
28247 switch (mode)
28249 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28250 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28251 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28252 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28253 default:
28254 gcc_unreachable ();
28258 emit_insn (gen (rval, mem));
28261 static void
28262 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28263 rtx mem, bool rel)
28265 rtx (*gen) (rtx, rtx, rtx);
28267 if (rel)
28269 switch (mode)
28271 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28272 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28273 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28274 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28275 default:
28276 gcc_unreachable ();
28279 else
28281 switch (mode)
28283 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28284 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28285 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28286 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28287 default:
28288 gcc_unreachable ();
28292 emit_insn (gen (bval, rval, mem));
28295 /* Mark the previous jump instruction as unlikely. */
28297 static void
28298 emit_unlikely_jump (rtx insn)
28300 rtx_insn *jump = emit_jump_insn (insn);
28301 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28304 /* Expand a compare and swap pattern. */
28306 void
28307 arm_expand_compare_and_swap (rtx operands[])
28309 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28310 machine_mode mode;
28311 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28313 bval = operands[0];
28314 rval = operands[1];
28315 mem = operands[2];
28316 oldval = operands[3];
28317 newval = operands[4];
28318 is_weak = operands[5];
28319 mod_s = operands[6];
28320 mod_f = operands[7];
28321 mode = GET_MODE (mem);
28323 /* Normally the succ memory model must be stronger than fail, but in the
28324 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28325 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28327 if (TARGET_HAVE_LDACQ
28328 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28329 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28330 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28332 switch (mode)
28334 case E_QImode:
28335 case E_HImode:
28336 /* For narrow modes, we're going to perform the comparison in SImode,
28337 so do the zero-extension now. */
28338 rval = gen_reg_rtx (SImode);
28339 oldval = convert_modes (SImode, mode, oldval, true);
28340 /* FALLTHRU */
28342 case E_SImode:
28343 /* Force the value into a register if needed. We waited until after
28344 the zero-extension above to do this properly. */
28345 if (!arm_add_operand (oldval, SImode))
28346 oldval = force_reg (SImode, oldval);
28347 break;
28349 case E_DImode:
28350 if (!cmpdi_operand (oldval, mode))
28351 oldval = force_reg (mode, oldval);
28352 break;
28354 default:
28355 gcc_unreachable ();
28358 if (TARGET_THUMB1)
28360 switch (mode)
28362 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28363 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28364 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28365 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28366 default:
28367 gcc_unreachable ();
28370 else
28372 switch (mode)
28374 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28375 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28376 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28377 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28378 default:
28379 gcc_unreachable ();
28383 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28384 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28386 if (mode == QImode || mode == HImode)
28387 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28389 /* In all cases, we arrange for success to be signaled by Z set.
28390 This arrangement allows for the boolean result to be used directly
28391 in a subsequent branch, post optimization. For Thumb-1 targets, the
28392 boolean negation of the result is also stored in bval because Thumb-1
28393 backend lacks dependency tracking for CC flag due to flag-setting not
28394 being represented at RTL level. */
28395 if (TARGET_THUMB1)
28396 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28397 else
28399 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28400 emit_insn (gen_rtx_SET (bval, x));
28404 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28405 another memory store between the load-exclusive and store-exclusive can
28406 reset the monitor from Exclusive to Open state. This means we must wait
28407 until after reload to split the pattern, lest we get a register spill in
28408 the middle of the atomic sequence. Success of the compare and swap is
28409 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28410 for Thumb-1 targets (ie. negation of the boolean value returned by
28411 atomic_compare_and_swapmode standard pattern in operand 0). */
28413 void
28414 arm_split_compare_and_swap (rtx operands[])
28416 rtx rval, mem, oldval, newval, neg_bval;
28417 machine_mode mode;
28418 enum memmodel mod_s, mod_f;
28419 bool is_weak;
28420 rtx_code_label *label1, *label2;
28421 rtx x, cond;
28423 rval = operands[1];
28424 mem = operands[2];
28425 oldval = operands[3];
28426 newval = operands[4];
28427 is_weak = (operands[5] != const0_rtx);
28428 mod_s = memmodel_from_int (INTVAL (operands[6]));
28429 mod_f = memmodel_from_int (INTVAL (operands[7]));
28430 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28431 mode = GET_MODE (mem);
28433 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28435 bool use_acquire = TARGET_HAVE_LDACQ
28436 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28437 || is_mm_release (mod_s));
28439 bool use_release = TARGET_HAVE_LDACQ
28440 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28441 || is_mm_acquire (mod_s));
28443 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28444 a full barrier is emitted after the store-release. */
28445 if (is_armv8_sync)
28446 use_acquire = false;
28448 /* Checks whether a barrier is needed and emits one accordingly. */
28449 if (!(use_acquire || use_release))
28450 arm_pre_atomic_barrier (mod_s);
28452 label1 = NULL;
28453 if (!is_weak)
28455 label1 = gen_label_rtx ();
28456 emit_label (label1);
28458 label2 = gen_label_rtx ();
28460 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28462 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28463 as required to communicate with arm_expand_compare_and_swap. */
28464 if (TARGET_32BIT)
28466 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28467 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28468 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28469 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28470 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28472 else
28474 emit_move_insn (neg_bval, const1_rtx);
28475 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28476 if (thumb1_cmpneg_operand (oldval, SImode))
28477 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28478 label2, cond));
28479 else
28480 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28483 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28485 /* Weak or strong, we want EQ to be true for success, so that we
28486 match the flags that we got from the compare above. */
28487 if (TARGET_32BIT)
28489 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28490 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28491 emit_insn (gen_rtx_SET (cond, x));
28494 if (!is_weak)
28496 /* Z is set to boolean value of !neg_bval, as required to communicate
28497 with arm_expand_compare_and_swap. */
28498 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28499 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28502 if (!is_mm_relaxed (mod_f))
28503 emit_label (label2);
28505 /* Checks whether a barrier is needed and emits one accordingly. */
28506 if (is_armv8_sync
28507 || !(use_acquire || use_release))
28508 arm_post_atomic_barrier (mod_s);
28510 if (is_mm_relaxed (mod_f))
28511 emit_label (label2);
28514 /* Split an atomic operation pattern. Operation is given by CODE and is one
28515 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28516 operation). Operation is performed on the content at MEM and on VALUE
28517 following the memory model MODEL_RTX. The content at MEM before and after
28518 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28519 success of the operation is returned in COND. Using a scratch register or
28520 an operand register for these determines what result is returned for that
28521 pattern. */
28523 void
28524 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28525 rtx value, rtx model_rtx, rtx cond)
28527 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28528 machine_mode mode = GET_MODE (mem);
28529 machine_mode wmode = (mode == DImode ? DImode : SImode);
28530 rtx_code_label *label;
28531 bool all_low_regs, bind_old_new;
28532 rtx x;
28534 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28536 bool use_acquire = TARGET_HAVE_LDACQ
28537 && !(is_mm_relaxed (model) || is_mm_consume (model)
28538 || is_mm_release (model));
28540 bool use_release = TARGET_HAVE_LDACQ
28541 && !(is_mm_relaxed (model) || is_mm_consume (model)
28542 || is_mm_acquire (model));
28544 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28545 a full barrier is emitted after the store-release. */
28546 if (is_armv8_sync)
28547 use_acquire = false;
28549 /* Checks whether a barrier is needed and emits one accordingly. */
28550 if (!(use_acquire || use_release))
28551 arm_pre_atomic_barrier (model);
28553 label = gen_label_rtx ();
28554 emit_label (label);
28556 if (new_out)
28557 new_out = gen_lowpart (wmode, new_out);
28558 if (old_out)
28559 old_out = gen_lowpart (wmode, old_out);
28560 else
28561 old_out = new_out;
28562 value = simplify_gen_subreg (wmode, value, mode, 0);
28564 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28566 /* Does the operation require destination and first operand to use the same
28567 register? This is decided by register constraints of relevant insn
28568 patterns in thumb1.md. */
28569 gcc_assert (!new_out || REG_P (new_out));
28570 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28571 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28572 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28573 bind_old_new =
28574 (TARGET_THUMB1
28575 && code != SET
28576 && code != MINUS
28577 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28579 /* We want to return the old value while putting the result of the operation
28580 in the same register as the old value so copy the old value over to the
28581 destination register and use that register for the operation. */
28582 if (old_out && bind_old_new)
28584 emit_move_insn (new_out, old_out);
28585 old_out = new_out;
28588 switch (code)
28590 case SET:
28591 new_out = value;
28592 break;
28594 case NOT:
28595 x = gen_rtx_AND (wmode, old_out, value);
28596 emit_insn (gen_rtx_SET (new_out, x));
28597 x = gen_rtx_NOT (wmode, new_out);
28598 emit_insn (gen_rtx_SET (new_out, x));
28599 break;
28601 case MINUS:
28602 if (CONST_INT_P (value))
28604 value = GEN_INT (-INTVAL (value));
28605 code = PLUS;
28607 /* FALLTHRU */
28609 case PLUS:
28610 if (mode == DImode)
28612 /* DImode plus/minus need to clobber flags. */
28613 /* The adddi3 and subdi3 patterns are incorrectly written so that
28614 they require matching operands, even when we could easily support
28615 three operands. Thankfully, this can be fixed up post-splitting,
28616 as the individual add+adc patterns do accept three operands and
28617 post-reload cprop can make these moves go away. */
28618 emit_move_insn (new_out, old_out);
28619 if (code == PLUS)
28620 x = gen_adddi3 (new_out, new_out, value);
28621 else
28622 x = gen_subdi3 (new_out, new_out, value);
28623 emit_insn (x);
28624 break;
28626 /* FALLTHRU */
28628 default:
28629 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28630 emit_insn (gen_rtx_SET (new_out, x));
28631 break;
28634 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28635 use_release);
28637 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28638 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28640 /* Checks whether a barrier is needed and emits one accordingly. */
28641 if (is_armv8_sync
28642 || !(use_acquire || use_release))
28643 arm_post_atomic_barrier (model);
28646 #define MAX_VECT_LEN 16
28648 struct expand_vec_perm_d
28650 rtx target, op0, op1;
28651 unsigned char perm[MAX_VECT_LEN];
28652 machine_mode vmode;
28653 unsigned char nelt;
28654 bool one_vector_p;
28655 bool testing_p;
28658 /* Generate a variable permutation. */
28660 static void
28661 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28663 machine_mode vmode = GET_MODE (target);
28664 bool one_vector_p = rtx_equal_p (op0, op1);
28666 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28667 gcc_checking_assert (GET_MODE (op0) == vmode);
28668 gcc_checking_assert (GET_MODE (op1) == vmode);
28669 gcc_checking_assert (GET_MODE (sel) == vmode);
28670 gcc_checking_assert (TARGET_NEON);
28672 if (one_vector_p)
28674 if (vmode == V8QImode)
28675 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28676 else
28677 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28679 else
28681 rtx pair;
28683 if (vmode == V8QImode)
28685 pair = gen_reg_rtx (V16QImode);
28686 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28687 pair = gen_lowpart (TImode, pair);
28688 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28690 else
28692 pair = gen_reg_rtx (OImode);
28693 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28694 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28699 void
28700 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28702 machine_mode vmode = GET_MODE (target);
28703 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28704 bool one_vector_p = rtx_equal_p (op0, op1);
28705 rtx rmask[MAX_VECT_LEN], mask;
28707 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28708 numbering of elements for big-endian, we must reverse the order. */
28709 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28711 /* The VTBL instruction does not use a modulo index, so we must take care
28712 of that ourselves. */
28713 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28714 for (i = 0; i < nelt; ++i)
28715 rmask[i] = mask;
28716 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28717 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28719 arm_expand_vec_perm_1 (target, op0, op1, sel);
28722 /* Map lane ordering between architectural lane order, and GCC lane order,
28723 taking into account ABI. See comment above output_move_neon for details. */
28725 static int
28726 neon_endian_lane_map (machine_mode mode, int lane)
28728 if (BYTES_BIG_ENDIAN)
28730 int nelems = GET_MODE_NUNITS (mode);
28731 /* Reverse lane order. */
28732 lane = (nelems - 1 - lane);
28733 /* Reverse D register order, to match ABI. */
28734 if (GET_MODE_SIZE (mode) == 16)
28735 lane = lane ^ (nelems / 2);
28737 return lane;
28740 /* Some permutations index into pairs of vectors, this is a helper function
28741 to map indexes into those pairs of vectors. */
28743 static int
28744 neon_pair_endian_lane_map (machine_mode mode, int lane)
28746 int nelem = GET_MODE_NUNITS (mode);
28747 if (BYTES_BIG_ENDIAN)
28748 lane =
28749 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28750 return lane;
28753 /* Generate or test for an insn that supports a constant permutation. */
28755 /* Recognize patterns for the VUZP insns. */
28757 static bool
28758 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28760 unsigned int i, odd, mask, nelt = d->nelt;
28761 rtx out0, out1, in0, in1;
28762 rtx (*gen)(rtx, rtx, rtx, rtx);
28763 int first_elem;
28764 int swap_nelt;
28766 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28767 return false;
28769 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28770 big endian pattern on 64 bit vectors, so we correct for that. */
28771 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28772 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28774 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28776 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28777 odd = 0;
28778 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28779 odd = 1;
28780 else
28781 return false;
28782 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28784 for (i = 0; i < nelt; i++)
28786 unsigned elt =
28787 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28788 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28789 return false;
28792 /* Success! */
28793 if (d->testing_p)
28794 return true;
28796 switch (d->vmode)
28798 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28799 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28800 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28801 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28802 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28803 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28804 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28805 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28806 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28807 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28808 default:
28809 gcc_unreachable ();
28812 in0 = d->op0;
28813 in1 = d->op1;
28814 if (swap_nelt != 0)
28815 std::swap (in0, in1);
28817 out0 = d->target;
28818 out1 = gen_reg_rtx (d->vmode);
28819 if (odd)
28820 std::swap (out0, out1);
28822 emit_insn (gen (out0, in0, in1, out1));
28823 return true;
28826 /* Recognize patterns for the VZIP insns. */
28828 static bool
28829 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28831 unsigned int i, high, mask, nelt = d->nelt;
28832 rtx out0, out1, in0, in1;
28833 rtx (*gen)(rtx, rtx, rtx, rtx);
28834 int first_elem;
28835 bool is_swapped;
28837 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28838 return false;
28840 is_swapped = BYTES_BIG_ENDIAN;
28842 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28844 high = nelt / 2;
28845 if (first_elem == neon_endian_lane_map (d->vmode, high))
28847 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28848 high = 0;
28849 else
28850 return false;
28851 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28853 for (i = 0; i < nelt / 2; i++)
28855 unsigned elt =
28856 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28857 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28858 != elt)
28859 return false;
28860 elt =
28861 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28862 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28863 != elt)
28864 return false;
28867 /* Success! */
28868 if (d->testing_p)
28869 return true;
28871 switch (d->vmode)
28873 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28874 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28875 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28876 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28877 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28878 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28879 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28880 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28881 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28882 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28883 default:
28884 gcc_unreachable ();
28887 in0 = d->op0;
28888 in1 = d->op1;
28889 if (is_swapped)
28890 std::swap (in0, in1);
28892 out0 = d->target;
28893 out1 = gen_reg_rtx (d->vmode);
28894 if (high)
28895 std::swap (out0, out1);
28897 emit_insn (gen (out0, in0, in1, out1));
28898 return true;
28901 /* Recognize patterns for the VREV insns. */
28903 static bool
28904 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28906 unsigned int i, j, diff, nelt = d->nelt;
28907 rtx (*gen)(rtx, rtx);
28909 if (!d->one_vector_p)
28910 return false;
28912 diff = d->perm[0];
28913 switch (diff)
28915 case 7:
28916 switch (d->vmode)
28918 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28919 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28920 default:
28921 return false;
28923 break;
28924 case 3:
28925 switch (d->vmode)
28927 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28928 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28929 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28930 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28931 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28932 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28933 default:
28934 return false;
28936 break;
28937 case 1:
28938 switch (d->vmode)
28940 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28941 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28942 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28943 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28944 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28945 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28946 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28947 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28948 default:
28949 return false;
28951 break;
28952 default:
28953 return false;
28956 for (i = 0; i < nelt ; i += diff + 1)
28957 for (j = 0; j <= diff; j += 1)
28959 /* This is guaranteed to be true as the value of diff
28960 is 7, 3, 1 and we should have enough elements in the
28961 queue to generate this. Getting a vector mask with a
28962 value of diff other than these values implies that
28963 something is wrong by the time we get here. */
28964 gcc_assert (i + j < nelt);
28965 if (d->perm[i + j] != i + diff - j)
28966 return false;
28969 /* Success! */
28970 if (d->testing_p)
28971 return true;
28973 emit_insn (gen (d->target, d->op0));
28974 return true;
28977 /* Recognize patterns for the VTRN insns. */
28979 static bool
28980 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28982 unsigned int i, odd, mask, nelt = d->nelt;
28983 rtx out0, out1, in0, in1;
28984 rtx (*gen)(rtx, rtx, rtx, rtx);
28986 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28987 return false;
28989 /* Note that these are little-endian tests. Adjust for big-endian later. */
28990 if (d->perm[0] == 0)
28991 odd = 0;
28992 else if (d->perm[0] == 1)
28993 odd = 1;
28994 else
28995 return false;
28996 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28998 for (i = 0; i < nelt; i += 2)
29000 if (d->perm[i] != i + odd)
29001 return false;
29002 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29003 return false;
29006 /* Success! */
29007 if (d->testing_p)
29008 return true;
29010 switch (d->vmode)
29012 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29013 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29014 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29015 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29016 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29017 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29018 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29019 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29020 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29021 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29022 default:
29023 gcc_unreachable ();
29026 in0 = d->op0;
29027 in1 = d->op1;
29028 if (BYTES_BIG_ENDIAN)
29030 std::swap (in0, in1);
29031 odd = !odd;
29034 out0 = d->target;
29035 out1 = gen_reg_rtx (d->vmode);
29036 if (odd)
29037 std::swap (out0, out1);
29039 emit_insn (gen (out0, in0, in1, out1));
29040 return true;
29043 /* Recognize patterns for the VEXT insns. */
29045 static bool
29046 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29048 unsigned int i, nelt = d->nelt;
29049 rtx (*gen) (rtx, rtx, rtx, rtx);
29050 rtx offset;
29052 unsigned int location;
29054 unsigned int next = d->perm[0] + 1;
29056 /* TODO: Handle GCC's numbering of elements for big-endian. */
29057 if (BYTES_BIG_ENDIAN)
29058 return false;
29060 /* Check if the extracted indexes are increasing by one. */
29061 for (i = 1; i < nelt; next++, i++)
29063 /* If we hit the most significant element of the 2nd vector in
29064 the previous iteration, no need to test further. */
29065 if (next == 2 * nelt)
29066 return false;
29068 /* If we are operating on only one vector: it could be a
29069 rotation. If there are only two elements of size < 64, let
29070 arm_evpc_neon_vrev catch it. */
29071 if (d->one_vector_p && (next == nelt))
29073 if ((nelt == 2) && (d->vmode != V2DImode))
29074 return false;
29075 else
29076 next = 0;
29079 if (d->perm[i] != next)
29080 return false;
29083 location = d->perm[0];
29085 switch (d->vmode)
29087 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29088 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29089 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29090 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29091 case E_V2SImode: gen = gen_neon_vextv2si; break;
29092 case E_V4SImode: gen = gen_neon_vextv4si; break;
29093 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29094 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29095 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29096 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29097 case E_V2DImode: gen = gen_neon_vextv2di; break;
29098 default:
29099 return false;
29102 /* Success! */
29103 if (d->testing_p)
29104 return true;
29106 offset = GEN_INT (location);
29107 emit_insn (gen (d->target, d->op0, d->op1, offset));
29108 return true;
29111 /* The NEON VTBL instruction is a fully variable permuation that's even
29112 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29113 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29114 can do slightly better by expanding this as a constant where we don't
29115 have to apply a mask. */
29117 static bool
29118 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29120 rtx rperm[MAX_VECT_LEN], sel;
29121 machine_mode vmode = d->vmode;
29122 unsigned int i, nelt = d->nelt;
29124 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29125 numbering of elements for big-endian, we must reverse the order. */
29126 if (BYTES_BIG_ENDIAN)
29127 return false;
29129 if (d->testing_p)
29130 return true;
29132 /* Generic code will try constant permutation twice. Once with the
29133 original mode and again with the elements lowered to QImode.
29134 So wait and don't do the selector expansion ourselves. */
29135 if (vmode != V8QImode && vmode != V16QImode)
29136 return false;
29138 for (i = 0; i < nelt; ++i)
29139 rperm[i] = GEN_INT (d->perm[i]);
29140 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29141 sel = force_reg (vmode, sel);
29143 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29144 return true;
29147 static bool
29148 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29150 /* Check if the input mask matches vext before reordering the
29151 operands. */
29152 if (TARGET_NEON)
29153 if (arm_evpc_neon_vext (d))
29154 return true;
29156 /* The pattern matching functions above are written to look for a small
29157 number to begin the sequence (0, 1, N/2). If we begin with an index
29158 from the second operand, we can swap the operands. */
29159 if (d->perm[0] >= d->nelt)
29161 unsigned i, nelt = d->nelt;
29163 for (i = 0; i < nelt; ++i)
29164 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29166 std::swap (d->op0, d->op1);
29169 if (TARGET_NEON)
29171 if (arm_evpc_neon_vuzp (d))
29172 return true;
29173 if (arm_evpc_neon_vzip (d))
29174 return true;
29175 if (arm_evpc_neon_vrev (d))
29176 return true;
29177 if (arm_evpc_neon_vtrn (d))
29178 return true;
29179 return arm_evpc_neon_vtbl (d);
29181 return false;
29184 /* Expand a vec_perm_const pattern. */
29186 bool
29187 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29189 struct expand_vec_perm_d d;
29190 int i, nelt, which;
29192 d.target = target;
29193 d.op0 = op0;
29194 d.op1 = op1;
29196 d.vmode = GET_MODE (target);
29197 gcc_assert (VECTOR_MODE_P (d.vmode));
29198 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29199 d.testing_p = false;
29201 for (i = which = 0; i < nelt; ++i)
29203 rtx e = XVECEXP (sel, 0, i);
29204 int ei = INTVAL (e) & (2 * nelt - 1);
29205 which |= (ei < nelt ? 1 : 2);
29206 d.perm[i] = ei;
29209 switch (which)
29211 default:
29212 gcc_unreachable();
29214 case 3:
29215 d.one_vector_p = false;
29216 if (!rtx_equal_p (op0, op1))
29217 break;
29219 /* The elements of PERM do not suggest that only the first operand
29220 is used, but both operands are identical. Allow easier matching
29221 of the permutation by folding the permutation into the single
29222 input vector. */
29223 /* FALLTHRU */
29224 case 2:
29225 for (i = 0; i < nelt; ++i)
29226 d.perm[i] &= nelt - 1;
29227 d.op0 = op1;
29228 d.one_vector_p = true;
29229 break;
29231 case 1:
29232 d.op1 = op0;
29233 d.one_vector_p = true;
29234 break;
29237 return arm_expand_vec_perm_const_1 (&d);
29240 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29242 static bool
29243 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29244 const unsigned char *sel)
29246 struct expand_vec_perm_d d;
29247 unsigned int i, nelt, which;
29248 bool ret;
29250 d.vmode = vmode;
29251 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29252 d.testing_p = true;
29253 memcpy (d.perm, sel, nelt);
29255 /* Categorize the set of elements in the selector. */
29256 for (i = which = 0; i < nelt; ++i)
29258 unsigned char e = d.perm[i];
29259 gcc_assert (e < 2 * nelt);
29260 which |= (e < nelt ? 1 : 2);
29263 /* For all elements from second vector, fold the elements to first. */
29264 if (which == 2)
29265 for (i = 0; i < nelt; ++i)
29266 d.perm[i] -= nelt;
29268 /* Check whether the mask can be applied to the vector type. */
29269 d.one_vector_p = (which != 3);
29271 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29272 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29273 if (!d.one_vector_p)
29274 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29276 start_sequence ();
29277 ret = arm_expand_vec_perm_const_1 (&d);
29278 end_sequence ();
29280 return ret;
29283 bool
29284 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29286 /* If we are soft float and we do not have ldrd
29287 then all auto increment forms are ok. */
29288 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29289 return true;
29291 switch (code)
29293 /* Post increment and Pre Decrement are supported for all
29294 instruction forms except for vector forms. */
29295 case ARM_POST_INC:
29296 case ARM_PRE_DEC:
29297 if (VECTOR_MODE_P (mode))
29299 if (code != ARM_PRE_DEC)
29300 return true;
29301 else
29302 return false;
29305 return true;
29307 case ARM_POST_DEC:
29308 case ARM_PRE_INC:
29309 /* Without LDRD and mode size greater than
29310 word size, there is no point in auto-incrementing
29311 because ldm and stm will not have these forms. */
29312 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29313 return false;
29315 /* Vector and floating point modes do not support
29316 these auto increment forms. */
29317 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29318 return false;
29320 return true;
29322 default:
29323 return false;
29327 return false;
29330 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29331 on ARM, since we know that shifts by negative amounts are no-ops.
29332 Additionally, the default expansion code is not available or suitable
29333 for post-reload insn splits (this can occur when the register allocator
29334 chooses not to do a shift in NEON).
29336 This function is used in both initial expand and post-reload splits, and
29337 handles all kinds of 64-bit shifts.
29339 Input requirements:
29340 - It is safe for the input and output to be the same register, but
29341 early-clobber rules apply for the shift amount and scratch registers.
29342 - Shift by register requires both scratch registers. In all other cases
29343 the scratch registers may be NULL.
29344 - Ashiftrt by a register also clobbers the CC register. */
29345 void
29346 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29347 rtx amount, rtx scratch1, rtx scratch2)
29349 rtx out_high = gen_highpart (SImode, out);
29350 rtx out_low = gen_lowpart (SImode, out);
29351 rtx in_high = gen_highpart (SImode, in);
29352 rtx in_low = gen_lowpart (SImode, in);
29354 /* Terminology:
29355 in = the register pair containing the input value.
29356 out = the destination register pair.
29357 up = the high- or low-part of each pair.
29358 down = the opposite part to "up".
29359 In a shift, we can consider bits to shift from "up"-stream to
29360 "down"-stream, so in a left-shift "up" is the low-part and "down"
29361 is the high-part of each register pair. */
29363 rtx out_up = code == ASHIFT ? out_low : out_high;
29364 rtx out_down = code == ASHIFT ? out_high : out_low;
29365 rtx in_up = code == ASHIFT ? in_low : in_high;
29366 rtx in_down = code == ASHIFT ? in_high : in_low;
29368 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29369 gcc_assert (out
29370 && (REG_P (out) || GET_CODE (out) == SUBREG)
29371 && GET_MODE (out) == DImode);
29372 gcc_assert (in
29373 && (REG_P (in) || GET_CODE (in) == SUBREG)
29374 && GET_MODE (in) == DImode);
29375 gcc_assert (amount
29376 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29377 && GET_MODE (amount) == SImode)
29378 || CONST_INT_P (amount)));
29379 gcc_assert (scratch1 == NULL
29380 || (GET_CODE (scratch1) == SCRATCH)
29381 || (GET_MODE (scratch1) == SImode
29382 && REG_P (scratch1)));
29383 gcc_assert (scratch2 == NULL
29384 || (GET_CODE (scratch2) == SCRATCH)
29385 || (GET_MODE (scratch2) == SImode
29386 && REG_P (scratch2)));
29387 gcc_assert (!REG_P (out) || !REG_P (amount)
29388 || !HARD_REGISTER_P (out)
29389 || (REGNO (out) != REGNO (amount)
29390 && REGNO (out) + 1 != REGNO (amount)));
29392 /* Macros to make following code more readable. */
29393 #define SUB_32(DEST,SRC) \
29394 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29395 #define RSB_32(DEST,SRC) \
29396 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29397 #define SUB_S_32(DEST,SRC) \
29398 gen_addsi3_compare0 ((DEST), (SRC), \
29399 GEN_INT (-32))
29400 #define SET(DEST,SRC) \
29401 gen_rtx_SET ((DEST), (SRC))
29402 #define SHIFT(CODE,SRC,AMOUNT) \
29403 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29404 #define LSHIFT(CODE,SRC,AMOUNT) \
29405 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29406 SImode, (SRC), (AMOUNT))
29407 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29408 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29409 SImode, (SRC), (AMOUNT))
29410 #define ORR(A,B) \
29411 gen_rtx_IOR (SImode, (A), (B))
29412 #define BRANCH(COND,LABEL) \
29413 gen_arm_cond_branch ((LABEL), \
29414 gen_rtx_ ## COND (CCmode, cc_reg, \
29415 const0_rtx), \
29416 cc_reg)
29418 /* Shifts by register and shifts by constant are handled separately. */
29419 if (CONST_INT_P (amount))
29421 /* We have a shift-by-constant. */
29423 /* First, handle out-of-range shift amounts.
29424 In both cases we try to match the result an ARM instruction in a
29425 shift-by-register would give. This helps reduce execution
29426 differences between optimization levels, but it won't stop other
29427 parts of the compiler doing different things. This is "undefined
29428 behavior, in any case. */
29429 if (INTVAL (amount) <= 0)
29430 emit_insn (gen_movdi (out, in));
29431 else if (INTVAL (amount) >= 64)
29433 if (code == ASHIFTRT)
29435 rtx const31_rtx = GEN_INT (31);
29436 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29437 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29439 else
29440 emit_insn (gen_movdi (out, const0_rtx));
29443 /* Now handle valid shifts. */
29444 else if (INTVAL (amount) < 32)
29446 /* Shifts by a constant less than 32. */
29447 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29449 /* Clearing the out register in DImode first avoids lots
29450 of spilling and results in less stack usage.
29451 Later this redundant insn is completely removed.
29452 Do that only if "in" and "out" are different registers. */
29453 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29454 emit_insn (SET (out, const0_rtx));
29455 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29456 emit_insn (SET (out_down,
29457 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29458 out_down)));
29459 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29461 else
29463 /* Shifts by a constant greater than 31. */
29464 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29466 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29467 emit_insn (SET (out, const0_rtx));
29468 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29469 if (code == ASHIFTRT)
29470 emit_insn (gen_ashrsi3 (out_up, in_up,
29471 GEN_INT (31)));
29472 else
29473 emit_insn (SET (out_up, const0_rtx));
29476 else
29478 /* We have a shift-by-register. */
29479 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29481 /* This alternative requires the scratch registers. */
29482 gcc_assert (scratch1 && REG_P (scratch1));
29483 gcc_assert (scratch2 && REG_P (scratch2));
29485 /* We will need the values "amount-32" and "32-amount" later.
29486 Swapping them around now allows the later code to be more general. */
29487 switch (code)
29489 case ASHIFT:
29490 emit_insn (SUB_32 (scratch1, amount));
29491 emit_insn (RSB_32 (scratch2, amount));
29492 break;
29493 case ASHIFTRT:
29494 emit_insn (RSB_32 (scratch1, amount));
29495 /* Also set CC = amount > 32. */
29496 emit_insn (SUB_S_32 (scratch2, amount));
29497 break;
29498 case LSHIFTRT:
29499 emit_insn (RSB_32 (scratch1, amount));
29500 emit_insn (SUB_32 (scratch2, amount));
29501 break;
29502 default:
29503 gcc_unreachable ();
29506 /* Emit code like this:
29508 arithmetic-left:
29509 out_down = in_down << amount;
29510 out_down = (in_up << (amount - 32)) | out_down;
29511 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29512 out_up = in_up << amount;
29514 arithmetic-right:
29515 out_down = in_down >> amount;
29516 out_down = (in_up << (32 - amount)) | out_down;
29517 if (amount < 32)
29518 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29519 out_up = in_up << amount;
29521 logical-right:
29522 out_down = in_down >> amount;
29523 out_down = (in_up << (32 - amount)) | out_down;
29524 if (amount < 32)
29525 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29526 out_up = in_up << amount;
29528 The ARM and Thumb2 variants are the same but implemented slightly
29529 differently. If this were only called during expand we could just
29530 use the Thumb2 case and let combine do the right thing, but this
29531 can also be called from post-reload splitters. */
29533 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29535 if (!TARGET_THUMB2)
29537 /* Emit code for ARM mode. */
29538 emit_insn (SET (out_down,
29539 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29540 if (code == ASHIFTRT)
29542 rtx_code_label *done_label = gen_label_rtx ();
29543 emit_jump_insn (BRANCH (LT, done_label));
29544 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29545 out_down)));
29546 emit_label (done_label);
29548 else
29549 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29550 out_down)));
29552 else
29554 /* Emit code for Thumb2 mode.
29555 Thumb2 can't do shift and or in one insn. */
29556 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29557 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29559 if (code == ASHIFTRT)
29561 rtx_code_label *done_label = gen_label_rtx ();
29562 emit_jump_insn (BRANCH (LT, done_label));
29563 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29564 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29565 emit_label (done_label);
29567 else
29569 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29570 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29574 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29577 #undef SUB_32
29578 #undef RSB_32
29579 #undef SUB_S_32
29580 #undef SET
29581 #undef SHIFT
29582 #undef LSHIFT
29583 #undef REV_LSHIFT
29584 #undef ORR
29585 #undef BRANCH
29588 /* Returns true if the pattern is a valid symbolic address, which is either a
29589 symbol_ref or (symbol_ref + addend).
29591 According to the ARM ELF ABI, the initial addend of REL-type relocations
29592 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29593 literal field of the instruction as a 16-bit signed value in the range
29594 -32768 <= A < 32768. */
29596 bool
29597 arm_valid_symbolic_address_p (rtx addr)
29599 rtx xop0, xop1 = NULL_RTX;
29600 rtx tmp = addr;
29602 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29603 return true;
29605 /* (const (plus: symbol_ref const_int)) */
29606 if (GET_CODE (addr) == CONST)
29607 tmp = XEXP (addr, 0);
29609 if (GET_CODE (tmp) == PLUS)
29611 xop0 = XEXP (tmp, 0);
29612 xop1 = XEXP (tmp, 1);
29614 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29615 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29618 return false;
29621 /* Returns true if a valid comparison operation and makes
29622 the operands in a form that is valid. */
29623 bool
29624 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29626 enum rtx_code code = GET_CODE (*comparison);
29627 int code_int;
29628 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29629 ? GET_MODE (*op2) : GET_MODE (*op1);
29631 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29633 if (code == UNEQ || code == LTGT)
29634 return false;
29636 code_int = (int)code;
29637 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29638 PUT_CODE (*comparison, (enum rtx_code)code_int);
29640 switch (mode)
29642 case E_SImode:
29643 if (!arm_add_operand (*op1, mode))
29644 *op1 = force_reg (mode, *op1);
29645 if (!arm_add_operand (*op2, mode))
29646 *op2 = force_reg (mode, *op2);
29647 return true;
29649 case E_DImode:
29650 if (!cmpdi_operand (*op1, mode))
29651 *op1 = force_reg (mode, *op1);
29652 if (!cmpdi_operand (*op2, mode))
29653 *op2 = force_reg (mode, *op2);
29654 return true;
29656 case E_HFmode:
29657 if (!TARGET_VFP_FP16INST)
29658 break;
29659 /* FP16 comparisons are done in SF mode. */
29660 mode = SFmode;
29661 *op1 = convert_to_mode (mode, *op1, 1);
29662 *op2 = convert_to_mode (mode, *op2, 1);
29663 /* Fall through. */
29664 case E_SFmode:
29665 case E_DFmode:
29666 if (!vfp_compare_operand (*op1, mode))
29667 *op1 = force_reg (mode, *op1);
29668 if (!vfp_compare_operand (*op2, mode))
29669 *op2 = force_reg (mode, *op2);
29670 return true;
29671 default:
29672 break;
29675 return false;
29679 /* Maximum number of instructions to set block of memory. */
29680 static int
29681 arm_block_set_max_insns (void)
29683 if (optimize_function_for_size_p (cfun))
29684 return 4;
29685 else
29686 return current_tune->max_insns_inline_memset;
29689 /* Return TRUE if it's profitable to set block of memory for
29690 non-vectorized case. VAL is the value to set the memory
29691 with. LENGTH is the number of bytes to set. ALIGN is the
29692 alignment of the destination memory in bytes. UNALIGNED_P
29693 is TRUE if we can only set the memory with instructions
29694 meeting alignment requirements. USE_STRD_P is TRUE if we
29695 can use strd to set the memory. */
29696 static bool
29697 arm_block_set_non_vect_profit_p (rtx val,
29698 unsigned HOST_WIDE_INT length,
29699 unsigned HOST_WIDE_INT align,
29700 bool unaligned_p, bool use_strd_p)
29702 int num = 0;
29703 /* For leftovers in bytes of 0-7, we can set the memory block using
29704 strb/strh/str with minimum instruction number. */
29705 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29707 if (unaligned_p)
29709 num = arm_const_inline_cost (SET, val);
29710 num += length / align + length % align;
29712 else if (use_strd_p)
29714 num = arm_const_double_inline_cost (val);
29715 num += (length >> 3) + leftover[length & 7];
29717 else
29719 num = arm_const_inline_cost (SET, val);
29720 num += (length >> 2) + leftover[length & 3];
29723 /* We may be able to combine last pair STRH/STRB into a single STR
29724 by shifting one byte back. */
29725 if (unaligned_access && length > 3 && (length & 3) == 3)
29726 num--;
29728 return (num <= arm_block_set_max_insns ());
29731 /* Return TRUE if it's profitable to set block of memory for
29732 vectorized case. LENGTH is the number of bytes to set.
29733 ALIGN is the alignment of destination memory in bytes.
29734 MODE is the vector mode used to set the memory. */
29735 static bool
29736 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29737 unsigned HOST_WIDE_INT align,
29738 machine_mode mode)
29740 int num;
29741 bool unaligned_p = ((align & 3) != 0);
29742 unsigned int nelt = GET_MODE_NUNITS (mode);
29744 /* Instruction loading constant value. */
29745 num = 1;
29746 /* Instructions storing the memory. */
29747 num += (length + nelt - 1) / nelt;
29748 /* Instructions adjusting the address expression. Only need to
29749 adjust address expression if it's 4 bytes aligned and bytes
29750 leftover can only be stored by mis-aligned store instruction. */
29751 if (!unaligned_p && (length & 3) != 0)
29752 num++;
29754 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29755 if (!unaligned_p && mode == V16QImode)
29756 num--;
29758 return (num <= arm_block_set_max_insns ());
29761 /* Set a block of memory using vectorization instructions for the
29762 unaligned case. We fill the first LENGTH bytes of the memory
29763 area starting from DSTBASE with byte constant VALUE. ALIGN is
29764 the alignment requirement of memory. Return TRUE if succeeded. */
29765 static bool
29766 arm_block_set_unaligned_vect (rtx dstbase,
29767 unsigned HOST_WIDE_INT length,
29768 unsigned HOST_WIDE_INT value,
29769 unsigned HOST_WIDE_INT align)
29771 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29772 rtx dst, mem;
29773 rtx val_elt, val_vec, reg;
29774 rtx rval[MAX_VECT_LEN];
29775 rtx (*gen_func) (rtx, rtx);
29776 machine_mode mode;
29777 unsigned HOST_WIDE_INT v = value;
29778 unsigned int offset = 0;
29779 gcc_assert ((align & 0x3) != 0);
29780 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29781 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29782 if (length >= nelt_v16)
29784 mode = V16QImode;
29785 gen_func = gen_movmisalignv16qi;
29787 else
29789 mode = V8QImode;
29790 gen_func = gen_movmisalignv8qi;
29792 nelt_mode = GET_MODE_NUNITS (mode);
29793 gcc_assert (length >= nelt_mode);
29794 /* Skip if it isn't profitable. */
29795 if (!arm_block_set_vect_profit_p (length, align, mode))
29796 return false;
29798 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29799 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29801 v = sext_hwi (v, BITS_PER_WORD);
29802 val_elt = GEN_INT (v);
29803 for (j = 0; j < nelt_mode; j++)
29804 rval[j] = val_elt;
29806 reg = gen_reg_rtx (mode);
29807 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29808 /* Emit instruction loading the constant value. */
29809 emit_move_insn (reg, val_vec);
29811 /* Handle nelt_mode bytes in a vector. */
29812 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29814 emit_insn ((*gen_func) (mem, reg));
29815 if (i + 2 * nelt_mode <= length)
29817 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29818 offset += nelt_mode;
29819 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29823 /* If there are not less than nelt_v8 bytes leftover, we must be in
29824 V16QI mode. */
29825 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29827 /* Handle (8, 16) bytes leftover. */
29828 if (i + nelt_v8 < length)
29830 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29831 offset += length - i;
29832 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29834 /* We are shifting bytes back, set the alignment accordingly. */
29835 if ((length & 1) != 0 && align >= 2)
29836 set_mem_align (mem, BITS_PER_UNIT);
29838 emit_insn (gen_movmisalignv16qi (mem, reg));
29840 /* Handle (0, 8] bytes leftover. */
29841 else if (i < length && i + nelt_v8 >= length)
29843 if (mode == V16QImode)
29844 reg = gen_lowpart (V8QImode, reg);
29846 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29847 + (nelt_mode - nelt_v8))));
29848 offset += (length - i) + (nelt_mode - nelt_v8);
29849 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29851 /* We are shifting bytes back, set the alignment accordingly. */
29852 if ((length & 1) != 0 && align >= 2)
29853 set_mem_align (mem, BITS_PER_UNIT);
29855 emit_insn (gen_movmisalignv8qi (mem, reg));
29858 return true;
29861 /* Set a block of memory using vectorization instructions for the
29862 aligned case. We fill the first LENGTH bytes of the memory area
29863 starting from DSTBASE with byte constant VALUE. ALIGN is the
29864 alignment requirement of memory. Return TRUE if succeeded. */
29865 static bool
29866 arm_block_set_aligned_vect (rtx dstbase,
29867 unsigned HOST_WIDE_INT length,
29868 unsigned HOST_WIDE_INT value,
29869 unsigned HOST_WIDE_INT align)
29871 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29872 rtx dst, addr, mem;
29873 rtx val_elt, val_vec, reg;
29874 rtx rval[MAX_VECT_LEN];
29875 machine_mode mode;
29876 unsigned HOST_WIDE_INT v = value;
29877 unsigned int offset = 0;
29879 gcc_assert ((align & 0x3) == 0);
29880 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29881 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29882 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29883 mode = V16QImode;
29884 else
29885 mode = V8QImode;
29887 nelt_mode = GET_MODE_NUNITS (mode);
29888 gcc_assert (length >= nelt_mode);
29889 /* Skip if it isn't profitable. */
29890 if (!arm_block_set_vect_profit_p (length, align, mode))
29891 return false;
29893 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29895 v = sext_hwi (v, BITS_PER_WORD);
29896 val_elt = GEN_INT (v);
29897 for (j = 0; j < nelt_mode; j++)
29898 rval[j] = val_elt;
29900 reg = gen_reg_rtx (mode);
29901 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29902 /* Emit instruction loading the constant value. */
29903 emit_move_insn (reg, val_vec);
29905 i = 0;
29906 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29907 if (mode == V16QImode)
29909 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29910 emit_insn (gen_movmisalignv16qi (mem, reg));
29911 i += nelt_mode;
29912 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29913 if (i + nelt_v8 < length && i + nelt_v16 > length)
29915 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29916 offset += length - nelt_mode;
29917 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29918 /* We are shifting bytes back, set the alignment accordingly. */
29919 if ((length & 0x3) == 0)
29920 set_mem_align (mem, BITS_PER_UNIT * 4);
29921 else if ((length & 0x1) == 0)
29922 set_mem_align (mem, BITS_PER_UNIT * 2);
29923 else
29924 set_mem_align (mem, BITS_PER_UNIT);
29926 emit_insn (gen_movmisalignv16qi (mem, reg));
29927 return true;
29929 /* Fall through for bytes leftover. */
29930 mode = V8QImode;
29931 nelt_mode = GET_MODE_NUNITS (mode);
29932 reg = gen_lowpart (V8QImode, reg);
29935 /* Handle 8 bytes in a vector. */
29936 for (; (i + nelt_mode <= length); i += nelt_mode)
29938 addr = plus_constant (Pmode, dst, i);
29939 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29940 emit_move_insn (mem, reg);
29943 /* Handle single word leftover by shifting 4 bytes back. We can
29944 use aligned access for this case. */
29945 if (i + UNITS_PER_WORD == length)
29947 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29948 offset += i - UNITS_PER_WORD;
29949 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29950 /* We are shifting 4 bytes back, set the alignment accordingly. */
29951 if (align > UNITS_PER_WORD)
29952 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29954 emit_move_insn (mem, reg);
29956 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29957 We have to use unaligned access for this case. */
29958 else if (i < length)
29960 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29961 offset += length - nelt_mode;
29962 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29963 /* We are shifting bytes back, set the alignment accordingly. */
29964 if ((length & 1) == 0)
29965 set_mem_align (mem, BITS_PER_UNIT * 2);
29966 else
29967 set_mem_align (mem, BITS_PER_UNIT);
29969 emit_insn (gen_movmisalignv8qi (mem, reg));
29972 return true;
29975 /* Set a block of memory using plain strh/strb instructions, only
29976 using instructions allowed by ALIGN on processor. We fill the
29977 first LENGTH bytes of the memory area starting from DSTBASE
29978 with byte constant VALUE. ALIGN is the alignment requirement
29979 of memory. */
29980 static bool
29981 arm_block_set_unaligned_non_vect (rtx dstbase,
29982 unsigned HOST_WIDE_INT length,
29983 unsigned HOST_WIDE_INT value,
29984 unsigned HOST_WIDE_INT align)
29986 unsigned int i;
29987 rtx dst, addr, mem;
29988 rtx val_exp, val_reg, reg;
29989 machine_mode mode;
29990 HOST_WIDE_INT v = value;
29992 gcc_assert (align == 1 || align == 2);
29994 if (align == 2)
29995 v |= (value << BITS_PER_UNIT);
29997 v = sext_hwi (v, BITS_PER_WORD);
29998 val_exp = GEN_INT (v);
29999 /* Skip if it isn't profitable. */
30000 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30001 align, true, false))
30002 return false;
30004 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30005 mode = (align == 2 ? HImode : QImode);
30006 val_reg = force_reg (SImode, val_exp);
30007 reg = gen_lowpart (mode, val_reg);
30009 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30011 addr = plus_constant (Pmode, dst, i);
30012 mem = adjust_automodify_address (dstbase, mode, addr, i);
30013 emit_move_insn (mem, reg);
30016 /* Handle single byte leftover. */
30017 if (i + 1 == length)
30019 reg = gen_lowpart (QImode, val_reg);
30020 addr = plus_constant (Pmode, dst, i);
30021 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30022 emit_move_insn (mem, reg);
30023 i++;
30026 gcc_assert (i == length);
30027 return true;
30030 /* Set a block of memory using plain strd/str/strh/strb instructions,
30031 to permit unaligned copies on processors which support unaligned
30032 semantics for those instructions. We fill the first LENGTH bytes
30033 of the memory area starting from DSTBASE with byte constant VALUE.
30034 ALIGN is the alignment requirement of memory. */
30035 static bool
30036 arm_block_set_aligned_non_vect (rtx dstbase,
30037 unsigned HOST_WIDE_INT length,
30038 unsigned HOST_WIDE_INT value,
30039 unsigned HOST_WIDE_INT align)
30041 unsigned int i;
30042 rtx dst, addr, mem;
30043 rtx val_exp, val_reg, reg;
30044 unsigned HOST_WIDE_INT v;
30045 bool use_strd_p;
30047 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30048 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30050 v = (value | (value << 8) | (value << 16) | (value << 24));
30051 if (length < UNITS_PER_WORD)
30052 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30054 if (use_strd_p)
30055 v |= (v << BITS_PER_WORD);
30056 else
30057 v = sext_hwi (v, BITS_PER_WORD);
30059 val_exp = GEN_INT (v);
30060 /* Skip if it isn't profitable. */
30061 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30062 align, false, use_strd_p))
30064 if (!use_strd_p)
30065 return false;
30067 /* Try without strd. */
30068 v = (v >> BITS_PER_WORD);
30069 v = sext_hwi (v, BITS_PER_WORD);
30070 val_exp = GEN_INT (v);
30071 use_strd_p = false;
30072 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30073 align, false, use_strd_p))
30074 return false;
30077 i = 0;
30078 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30079 /* Handle double words using strd if possible. */
30080 if (use_strd_p)
30082 val_reg = force_reg (DImode, val_exp);
30083 reg = val_reg;
30084 for (; (i + 8 <= length); i += 8)
30086 addr = plus_constant (Pmode, dst, i);
30087 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30088 emit_move_insn (mem, reg);
30091 else
30092 val_reg = force_reg (SImode, val_exp);
30094 /* Handle words. */
30095 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30096 for (; (i + 4 <= length); i += 4)
30098 addr = plus_constant (Pmode, dst, i);
30099 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30100 if ((align & 3) == 0)
30101 emit_move_insn (mem, reg);
30102 else
30103 emit_insn (gen_unaligned_storesi (mem, reg));
30106 /* Merge last pair of STRH and STRB into a STR if possible. */
30107 if (unaligned_access && i > 0 && (i + 3) == length)
30109 addr = plus_constant (Pmode, dst, i - 1);
30110 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30111 /* We are shifting one byte back, set the alignment accordingly. */
30112 if ((align & 1) == 0)
30113 set_mem_align (mem, BITS_PER_UNIT);
30115 /* Most likely this is an unaligned access, and we can't tell at
30116 compilation time. */
30117 emit_insn (gen_unaligned_storesi (mem, reg));
30118 return true;
30121 /* Handle half word leftover. */
30122 if (i + 2 <= length)
30124 reg = gen_lowpart (HImode, val_reg);
30125 addr = plus_constant (Pmode, dst, i);
30126 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30127 if ((align & 1) == 0)
30128 emit_move_insn (mem, reg);
30129 else
30130 emit_insn (gen_unaligned_storehi (mem, reg));
30132 i += 2;
30135 /* Handle single byte leftover. */
30136 if (i + 1 == length)
30138 reg = gen_lowpart (QImode, val_reg);
30139 addr = plus_constant (Pmode, dst, i);
30140 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30141 emit_move_insn (mem, reg);
30144 return true;
30147 /* Set a block of memory using vectorization instructions for both
30148 aligned and unaligned cases. We fill the first LENGTH bytes of
30149 the memory area starting from DSTBASE with byte constant VALUE.
30150 ALIGN is the alignment requirement of memory. */
30151 static bool
30152 arm_block_set_vect (rtx dstbase,
30153 unsigned HOST_WIDE_INT length,
30154 unsigned HOST_WIDE_INT value,
30155 unsigned HOST_WIDE_INT align)
30157 /* Check whether we need to use unaligned store instruction. */
30158 if (((align & 3) != 0 || (length & 3) != 0)
30159 /* Check whether unaligned store instruction is available. */
30160 && (!unaligned_access || BYTES_BIG_ENDIAN))
30161 return false;
30163 if ((align & 3) == 0)
30164 return arm_block_set_aligned_vect (dstbase, length, value, align);
30165 else
30166 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30169 /* Expand string store operation. Firstly we try to do that by using
30170 vectorization instructions, then try with ARM unaligned access and
30171 double-word store if profitable. OPERANDS[0] is the destination,
30172 OPERANDS[1] is the number of bytes, operands[2] is the value to
30173 initialize the memory, OPERANDS[3] is the known alignment of the
30174 destination. */
30175 bool
30176 arm_gen_setmem (rtx *operands)
30178 rtx dstbase = operands[0];
30179 unsigned HOST_WIDE_INT length;
30180 unsigned HOST_WIDE_INT value;
30181 unsigned HOST_WIDE_INT align;
30183 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30184 return false;
30186 length = UINTVAL (operands[1]);
30187 if (length > 64)
30188 return false;
30190 value = (UINTVAL (operands[2]) & 0xFF);
30191 align = UINTVAL (operands[3]);
30192 if (TARGET_NEON && length >= 8
30193 && current_tune->string_ops_prefer_neon
30194 && arm_block_set_vect (dstbase, length, value, align))
30195 return true;
30197 if (!unaligned_access && (align & 3) != 0)
30198 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30200 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30204 static bool
30205 arm_macro_fusion_p (void)
30207 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30210 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30211 for MOVW / MOVT macro fusion. */
30213 static bool
30214 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30216 /* We are trying to fuse
30217 movw imm / movt imm
30218 instructions as a group that gets scheduled together. */
30220 rtx set_dest = SET_DEST (curr_set);
30222 if (GET_MODE (set_dest) != SImode)
30223 return false;
30225 /* We are trying to match:
30226 prev (movw) == (set (reg r0) (const_int imm16))
30227 curr (movt) == (set (zero_extract (reg r0)
30228 (const_int 16)
30229 (const_int 16))
30230 (const_int imm16_1))
30232 prev (movw) == (set (reg r1)
30233 (high (symbol_ref ("SYM"))))
30234 curr (movt) == (set (reg r0)
30235 (lo_sum (reg r1)
30236 (symbol_ref ("SYM")))) */
30238 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30240 if (CONST_INT_P (SET_SRC (curr_set))
30241 && CONST_INT_P (SET_SRC (prev_set))
30242 && REG_P (XEXP (set_dest, 0))
30243 && REG_P (SET_DEST (prev_set))
30244 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30245 return true;
30248 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30249 && REG_P (SET_DEST (curr_set))
30250 && REG_P (SET_DEST (prev_set))
30251 && GET_CODE (SET_SRC (prev_set)) == HIGH
30252 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30253 return true;
30255 return false;
30258 static bool
30259 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30261 rtx prev_set = single_set (prev);
30262 rtx curr_set = single_set (curr);
30264 if (!prev_set
30265 || !curr_set)
30266 return false;
30268 if (any_condjump_p (curr))
30269 return false;
30271 if (!arm_macro_fusion_p ())
30272 return false;
30274 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30275 && aarch_crypto_can_dual_issue (prev, curr))
30276 return true;
30278 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30279 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30280 return true;
30282 return false;
30285 /* Return true iff the instruction fusion described by OP is enabled. */
30286 bool
30287 arm_fusion_enabled_p (tune_params::fuse_ops op)
30289 return current_tune->fusible_ops & op;
30292 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30293 scheduled for speculative execution. Reject the long-running division
30294 and square-root instructions. */
30296 static bool
30297 arm_sched_can_speculate_insn (rtx_insn *insn)
30299 switch (get_attr_type (insn))
30301 case TYPE_SDIV:
30302 case TYPE_UDIV:
30303 case TYPE_FDIVS:
30304 case TYPE_FDIVD:
30305 case TYPE_FSQRTS:
30306 case TYPE_FSQRTD:
30307 case TYPE_NEON_FP_SQRT_S:
30308 case TYPE_NEON_FP_SQRT_D:
30309 case TYPE_NEON_FP_SQRT_S_Q:
30310 case TYPE_NEON_FP_SQRT_D_Q:
30311 case TYPE_NEON_FP_DIV_S:
30312 case TYPE_NEON_FP_DIV_D:
30313 case TYPE_NEON_FP_DIV_S_Q:
30314 case TYPE_NEON_FP_DIV_D_Q:
30315 return false;
30316 default:
30317 return true;
30321 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30323 static unsigned HOST_WIDE_INT
30324 arm_asan_shadow_offset (void)
30326 return HOST_WIDE_INT_1U << 29;
30330 /* This is a temporary fix for PR60655. Ideally we need
30331 to handle most of these cases in the generic part but
30332 currently we reject minus (..) (sym_ref). We try to
30333 ameliorate the case with minus (sym_ref1) (sym_ref2)
30334 where they are in the same section. */
30336 static bool
30337 arm_const_not_ok_for_debug_p (rtx p)
30339 tree decl_op0 = NULL;
30340 tree decl_op1 = NULL;
30342 if (GET_CODE (p) == MINUS)
30344 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30346 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30347 if (decl_op1
30348 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30349 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30351 if ((VAR_P (decl_op1)
30352 || TREE_CODE (decl_op1) == CONST_DECL)
30353 && (VAR_P (decl_op0)
30354 || TREE_CODE (decl_op0) == CONST_DECL))
30355 return (get_variable_section (decl_op1, false)
30356 != get_variable_section (decl_op0, false));
30358 if (TREE_CODE (decl_op1) == LABEL_DECL
30359 && TREE_CODE (decl_op0) == LABEL_DECL)
30360 return (DECL_CONTEXT (decl_op1)
30361 != DECL_CONTEXT (decl_op0));
30364 return true;
30368 return false;
30371 /* return TRUE if x is a reference to a value in a constant pool */
30372 extern bool
30373 arm_is_constant_pool_ref (rtx x)
30375 return (MEM_P (x)
30376 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30377 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30380 /* Remember the last target of arm_set_current_function. */
30381 static GTY(()) tree arm_previous_fndecl;
30383 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30385 void
30386 save_restore_target_globals (tree new_tree)
30388 /* If we have a previous state, use it. */
30389 if (TREE_TARGET_GLOBALS (new_tree))
30390 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30391 else if (new_tree == target_option_default_node)
30392 restore_target_globals (&default_target_globals);
30393 else
30395 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30396 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30399 arm_option_params_internal ();
30402 /* Invalidate arm_previous_fndecl. */
30404 void
30405 arm_reset_previous_fndecl (void)
30407 arm_previous_fndecl = NULL_TREE;
30410 /* Establish appropriate back-end context for processing the function
30411 FNDECL. The argument might be NULL to indicate processing at top
30412 level, outside of any function scope. */
30414 static void
30415 arm_set_current_function (tree fndecl)
30417 if (!fndecl || fndecl == arm_previous_fndecl)
30418 return;
30420 tree old_tree = (arm_previous_fndecl
30421 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30422 : NULL_TREE);
30424 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30426 /* If current function has no attributes but previous one did,
30427 use the default node. */
30428 if (! new_tree && old_tree)
30429 new_tree = target_option_default_node;
30431 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30432 the default have been handled by save_restore_target_globals from
30433 arm_pragma_target_parse. */
30434 if (old_tree == new_tree)
30435 return;
30437 arm_previous_fndecl = fndecl;
30439 /* First set the target options. */
30440 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30442 save_restore_target_globals (new_tree);
30445 /* Implement TARGET_OPTION_PRINT. */
30447 static void
30448 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30450 int flags = ptr->x_target_flags;
30451 const char *fpu_name;
30453 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30454 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30456 fprintf (file, "%*sselected isa %s\n", indent, "",
30457 TARGET_THUMB2_P (flags) ? "thumb2" :
30458 TARGET_THUMB_P (flags) ? "thumb1" :
30459 "arm");
30461 if (ptr->x_arm_arch_string)
30462 fprintf (file, "%*sselected architecture %s\n", indent, "",
30463 ptr->x_arm_arch_string);
30465 if (ptr->x_arm_cpu_string)
30466 fprintf (file, "%*sselected CPU %s\n", indent, "",
30467 ptr->x_arm_cpu_string);
30469 if (ptr->x_arm_tune_string)
30470 fprintf (file, "%*sselected tune %s\n", indent, "",
30471 ptr->x_arm_tune_string);
30473 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30476 /* Hook to determine if one function can safely inline another. */
30478 static bool
30479 arm_can_inline_p (tree caller, tree callee)
30481 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30482 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30483 bool can_inline = true;
30485 struct cl_target_option *caller_opts
30486 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30487 : target_option_default_node);
30489 struct cl_target_option *callee_opts
30490 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30491 : target_option_default_node);
30493 if (callee_opts == caller_opts)
30494 return true;
30496 /* Callee's ISA features should be a subset of the caller's. */
30497 struct arm_build_target caller_target;
30498 struct arm_build_target callee_target;
30499 caller_target.isa = sbitmap_alloc (isa_num_bits);
30500 callee_target.isa = sbitmap_alloc (isa_num_bits);
30502 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30503 false);
30504 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30505 false);
30506 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30507 can_inline = false;
30509 sbitmap_free (caller_target.isa);
30510 sbitmap_free (callee_target.isa);
30512 /* OK to inline between different modes.
30513 Function with mode specific instructions, e.g using asm,
30514 must be explicitly protected with noinline. */
30515 return can_inline;
30518 /* Hook to fix function's alignment affected by target attribute. */
30520 static void
30521 arm_relayout_function (tree fndecl)
30523 if (DECL_USER_ALIGN (fndecl))
30524 return;
30526 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30528 if (!callee_tree)
30529 callee_tree = target_option_default_node;
30531 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30532 SET_DECL_ALIGN
30533 (fndecl,
30534 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30537 /* Inner function to process the attribute((target(...))), take an argument and
30538 set the current options from the argument. If we have a list, recursively
30539 go over the list. */
30541 static bool
30542 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30544 if (TREE_CODE (args) == TREE_LIST)
30546 bool ret = true;
30548 for (; args; args = TREE_CHAIN (args))
30549 if (TREE_VALUE (args)
30550 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30551 ret = false;
30552 return ret;
30555 else if (TREE_CODE (args) != STRING_CST)
30557 error ("attribute %<target%> argument not a string");
30558 return false;
30561 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30562 char *q;
30564 while ((q = strtok (argstr, ",")) != NULL)
30566 while (ISSPACE (*q)) ++q;
30568 argstr = NULL;
30569 if (!strncmp (q, "thumb", 5))
30570 opts->x_target_flags |= MASK_THUMB;
30572 else if (!strncmp (q, "arm", 3))
30573 opts->x_target_flags &= ~MASK_THUMB;
30575 else if (!strncmp (q, "fpu=", 4))
30577 int fpu_index;
30578 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30579 &fpu_index, CL_TARGET))
30581 error ("invalid fpu for attribute(target(\"%s\"))", q);
30582 return false;
30584 if (fpu_index == TARGET_FPU_auto)
30586 /* This doesn't really make sense until we support
30587 general dynamic selection of the architecture and all
30588 sub-features. */
30589 sorry ("auto fpu selection not currently permitted here");
30590 return false;
30592 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30594 else
30596 error ("attribute(target(\"%s\")) is unknown", q);
30597 return false;
30601 return true;
30604 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30606 tree
30607 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30608 struct gcc_options *opts_set)
30610 struct cl_target_option cl_opts;
30612 if (!arm_valid_target_attribute_rec (args, opts))
30613 return NULL_TREE;
30615 cl_target_option_save (&cl_opts, opts);
30616 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30617 arm_option_check_internal (opts);
30618 /* Do any overrides, such as global options arch=xxx. */
30619 arm_option_override_internal (opts, opts_set);
30621 return build_target_option_node (opts);
30624 static void
30625 add_attribute (const char * mode, tree *attributes)
30627 size_t len = strlen (mode);
30628 tree value = build_string (len, mode);
30630 TREE_TYPE (value) = build_array_type (char_type_node,
30631 build_index_type (size_int (len)));
30633 *attributes = tree_cons (get_identifier ("target"),
30634 build_tree_list (NULL_TREE, value),
30635 *attributes);
30638 /* For testing. Insert thumb or arm modes alternatively on functions. */
30640 static void
30641 arm_insert_attributes (tree fndecl, tree * attributes)
30643 const char *mode;
30645 if (! TARGET_FLIP_THUMB)
30646 return;
30648 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30649 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30650 return;
30652 /* Nested definitions must inherit mode. */
30653 if (current_function_decl)
30655 mode = TARGET_THUMB ? "thumb" : "arm";
30656 add_attribute (mode, attributes);
30657 return;
30660 /* If there is already a setting don't change it. */
30661 if (lookup_attribute ("target", *attributes) != NULL)
30662 return;
30664 mode = thumb_flipper ? "thumb" : "arm";
30665 add_attribute (mode, attributes);
30667 thumb_flipper = !thumb_flipper;
30670 /* Hook to validate attribute((target("string"))). */
30672 static bool
30673 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30674 tree args, int ARG_UNUSED (flags))
30676 bool ret = true;
30677 struct gcc_options func_options;
30678 tree cur_tree, new_optimize;
30679 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30681 /* Get the optimization options of the current function. */
30682 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30684 /* If the function changed the optimization levels as well as setting target
30685 options, start with the optimizations specified. */
30686 if (!func_optimize)
30687 func_optimize = optimization_default_node;
30689 /* Init func_options. */
30690 memset (&func_options, 0, sizeof (func_options));
30691 init_options_struct (&func_options, NULL);
30692 lang_hooks.init_options_struct (&func_options);
30694 /* Initialize func_options to the defaults. */
30695 cl_optimization_restore (&func_options,
30696 TREE_OPTIMIZATION (func_optimize));
30698 cl_target_option_restore (&func_options,
30699 TREE_TARGET_OPTION (target_option_default_node));
30701 /* Set func_options flags with new target mode. */
30702 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30703 &global_options_set);
30705 if (cur_tree == NULL_TREE)
30706 ret = false;
30708 new_optimize = build_optimization_node (&func_options);
30710 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30712 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30714 finalize_options_struct (&func_options);
30716 return ret;
30719 /* Match an ISA feature bitmap to a named FPU. We always use the
30720 first entry that exactly matches the feature set, so that we
30721 effectively canonicalize the FPU name for the assembler. */
30722 static const char*
30723 arm_identify_fpu_from_isa (sbitmap isa)
30725 auto_sbitmap fpubits (isa_num_bits);
30726 auto_sbitmap cand_fpubits (isa_num_bits);
30728 bitmap_and (fpubits, isa, isa_all_fpubits);
30730 /* If there are no ISA feature bits relating to the FPU, we must be
30731 doing soft-float. */
30732 if (bitmap_empty_p (fpubits))
30733 return "softvfp";
30735 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30737 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30738 if (bitmap_equal_p (fpubits, cand_fpubits))
30739 return all_fpus[i].name;
30741 /* We must find an entry, or things have gone wrong. */
30742 gcc_unreachable ();
30745 void
30746 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30749 fprintf (stream, "\t.syntax unified\n");
30751 if (TARGET_THUMB)
30753 if (is_called_in_ARM_mode (decl)
30754 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30755 && cfun->is_thunk))
30756 fprintf (stream, "\t.code 32\n");
30757 else if (TARGET_THUMB1)
30758 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30759 else
30760 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30762 else
30763 fprintf (stream, "\t.arm\n");
30765 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30766 (TARGET_SOFT_FLOAT
30767 ? "softvfp"
30768 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30770 if (TARGET_POKE_FUNCTION_NAME)
30771 arm_poke_function_name (stream, (const char *) name);
30774 /* If MEM is in the form of [base+offset], extract the two parts
30775 of address and set to BASE and OFFSET, otherwise return false
30776 after clearing BASE and OFFSET. */
30778 static bool
30779 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30781 rtx addr;
30783 gcc_assert (MEM_P (mem));
30785 addr = XEXP (mem, 0);
30787 /* Strip off const from addresses like (const (addr)). */
30788 if (GET_CODE (addr) == CONST)
30789 addr = XEXP (addr, 0);
30791 if (GET_CODE (addr) == REG)
30793 *base = addr;
30794 *offset = const0_rtx;
30795 return true;
30798 if (GET_CODE (addr) == PLUS
30799 && GET_CODE (XEXP (addr, 0)) == REG
30800 && CONST_INT_P (XEXP (addr, 1)))
30802 *base = XEXP (addr, 0);
30803 *offset = XEXP (addr, 1);
30804 return true;
30807 *base = NULL_RTX;
30808 *offset = NULL_RTX;
30810 return false;
30813 /* If INSN is a load or store of address in the form of [base+offset],
30814 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30815 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30816 otherwise return FALSE. */
30818 static bool
30819 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30821 rtx x, dest, src;
30823 gcc_assert (INSN_P (insn));
30824 x = PATTERN (insn);
30825 if (GET_CODE (x) != SET)
30826 return false;
30828 src = SET_SRC (x);
30829 dest = SET_DEST (x);
30830 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30832 *is_load = false;
30833 extract_base_offset_in_addr (dest, base, offset);
30835 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30837 *is_load = true;
30838 extract_base_offset_in_addr (src, base, offset);
30840 else
30841 return false;
30843 return (*base != NULL_RTX && *offset != NULL_RTX);
30846 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30848 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30849 and PRI are only calculated for these instructions. For other instruction,
30850 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30851 instruction fusion can be supported by returning different priorities.
30853 It's important that irrelevant instructions get the largest FUSION_PRI. */
30855 static void
30856 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30857 int *fusion_pri, int *pri)
30859 int tmp, off_val;
30860 bool is_load;
30861 rtx base, offset;
30863 gcc_assert (INSN_P (insn));
30865 tmp = max_pri - 1;
30866 if (!fusion_load_store (insn, &base, &offset, &is_load))
30868 *pri = tmp;
30869 *fusion_pri = tmp;
30870 return;
30873 /* Load goes first. */
30874 if (is_load)
30875 *fusion_pri = tmp - 1;
30876 else
30877 *fusion_pri = tmp - 2;
30879 tmp /= 2;
30881 /* INSN with smaller base register goes first. */
30882 tmp -= ((REGNO (base) & 0xff) << 20);
30884 /* INSN with smaller offset goes first. */
30885 off_val = (int)(INTVAL (offset));
30886 if (off_val >= 0)
30887 tmp -= (off_val & 0xfffff);
30888 else
30889 tmp += ((- off_val) & 0xfffff);
30891 *pri = tmp;
30892 return;
30896 /* Construct and return a PARALLEL RTX vector with elements numbering the
30897 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30898 the vector - from the perspective of the architecture. This does not
30899 line up with GCC's perspective on lane numbers, so we end up with
30900 different masks depending on our target endian-ness. The diagram
30901 below may help. We must draw the distinction when building masks
30902 which select one half of the vector. An instruction selecting
30903 architectural low-lanes for a big-endian target, must be described using
30904 a mask selecting GCC high-lanes.
30906 Big-Endian Little-Endian
30908 GCC 0 1 2 3 3 2 1 0
30909 | x | x | x | x | | x | x | x | x |
30910 Architecture 3 2 1 0 3 2 1 0
30912 Low Mask: { 2, 3 } { 0, 1 }
30913 High Mask: { 0, 1 } { 2, 3 }
30917 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30919 int nunits = GET_MODE_NUNITS (mode);
30920 rtvec v = rtvec_alloc (nunits / 2);
30921 int high_base = nunits / 2;
30922 int low_base = 0;
30923 int base;
30924 rtx t1;
30925 int i;
30927 if (BYTES_BIG_ENDIAN)
30928 base = high ? low_base : high_base;
30929 else
30930 base = high ? high_base : low_base;
30932 for (i = 0; i < nunits / 2; i++)
30933 RTVEC_ELT (v, i) = GEN_INT (base + i);
30935 t1 = gen_rtx_PARALLEL (mode, v);
30936 return t1;
30939 /* Check OP for validity as a PARALLEL RTX vector with elements
30940 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30941 from the perspective of the architecture. See the diagram above
30942 arm_simd_vect_par_cnst_half_p for more details. */
30944 bool
30945 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30946 bool high)
30948 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30949 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30950 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30951 int i = 0;
30953 if (!VECTOR_MODE_P (mode))
30954 return false;
30956 if (count_op != count_ideal)
30957 return false;
30959 for (i = 0; i < count_ideal; i++)
30961 rtx elt_op = XVECEXP (op, 0, i);
30962 rtx elt_ideal = XVECEXP (ideal, 0, i);
30964 if (!CONST_INT_P (elt_op)
30965 || INTVAL (elt_ideal) != INTVAL (elt_op))
30966 return false;
30968 return true;
30971 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30972 in Thumb1. */
30973 static bool
30974 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30975 const_tree)
30977 /* For now, we punt and not handle this for TARGET_THUMB1. */
30978 if (vcall_offset && TARGET_THUMB1)
30979 return false;
30981 /* Otherwise ok. */
30982 return true;
30985 /* Generate RTL for a conditional branch with rtx comparison CODE in
30986 mode CC_MODE. The destination of the unlikely conditional branch
30987 is LABEL_REF. */
30989 void
30990 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30991 rtx label_ref)
30993 rtx x;
30994 x = gen_rtx_fmt_ee (code, VOIDmode,
30995 gen_rtx_REG (cc_mode, CC_REGNUM),
30996 const0_rtx);
30998 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30999 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31000 pc_rtx);
31001 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31004 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31006 For pure-code sections there is no letter code for this attribute, so
31007 output all the section flags numerically when this is needed. */
31009 static bool
31010 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31013 if (flags & SECTION_ARM_PURECODE)
31015 *num = 0x20000000;
31017 if (!(flags & SECTION_DEBUG))
31018 *num |= 0x2;
31019 if (flags & SECTION_EXCLUDE)
31020 *num |= 0x80000000;
31021 if (flags & SECTION_WRITE)
31022 *num |= 0x1;
31023 if (flags & SECTION_CODE)
31024 *num |= 0x4;
31025 if (flags & SECTION_MERGE)
31026 *num |= 0x10;
31027 if (flags & SECTION_STRINGS)
31028 *num |= 0x20;
31029 if (flags & SECTION_TLS)
31030 *num |= 0x400;
31031 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31032 *num |= 0x200;
31034 return true;
31037 return false;
31040 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31042 If pure-code is passed as an option, make sure all functions are in
31043 sections that have the SHF_ARM_PURECODE attribute. */
31045 static section *
31046 arm_function_section (tree decl, enum node_frequency freq,
31047 bool startup, bool exit)
31049 const char * section_name;
31050 section * sec;
31052 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31053 return default_function_section (decl, freq, startup, exit);
31055 if (!target_pure_code)
31056 return default_function_section (decl, freq, startup, exit);
31059 section_name = DECL_SECTION_NAME (decl);
31061 /* If a function is not in a named section then it falls under the 'default'
31062 text section, also known as '.text'. We can preserve previous behavior as
31063 the default text section already has the SHF_ARM_PURECODE section
31064 attribute. */
31065 if (!section_name)
31067 section *default_sec = default_function_section (decl, freq, startup,
31068 exit);
31070 /* If default_sec is not null, then it must be a special section like for
31071 example .text.startup. We set the pure-code attribute and return the
31072 same section to preserve existing behavior. */
31073 if (default_sec)
31074 default_sec->common.flags |= SECTION_ARM_PURECODE;
31075 return default_sec;
31078 /* Otherwise look whether a section has already been created with
31079 'section_name'. */
31080 sec = get_named_section (decl, section_name, 0);
31081 if (!sec)
31082 /* If that is not the case passing NULL as the section's name to
31083 'get_named_section' will create a section with the declaration's
31084 section name. */
31085 sec = get_named_section (decl, NULL, 0);
31087 /* Set the SHF_ARM_PURECODE attribute. */
31088 sec->common.flags |= SECTION_ARM_PURECODE;
31090 return sec;
31093 /* Implements the TARGET_SECTION_FLAGS hook.
31095 If DECL is a function declaration and pure-code is passed as an option
31096 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31097 section's name and RELOC indicates whether the declarations initializer may
31098 contain runtime relocations. */
31100 static unsigned int
31101 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31103 unsigned int flags = default_section_type_flags (decl, name, reloc);
31105 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31106 flags |= SECTION_ARM_PURECODE;
31108 return flags;
31111 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31113 static void
31114 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31115 rtx op0, rtx op1,
31116 rtx *quot_p, rtx *rem_p)
31118 if (mode == SImode)
31119 gcc_assert (!TARGET_IDIV);
31121 scalar_int_mode libval_mode
31122 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31124 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31125 libval_mode,
31126 op0, GET_MODE (op0),
31127 op1, GET_MODE (op1));
31129 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31130 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31131 GET_MODE_SIZE (mode));
31133 gcc_assert (quotient);
31134 gcc_assert (remainder);
31136 *quot_p = quotient;
31137 *rem_p = remainder;
31140 /* This function checks for the availability of the coprocessor builtin passed
31141 in BUILTIN for the current target. Returns true if it is available and
31142 false otherwise. If a BUILTIN is passed for which this function has not
31143 been implemented it will cause an exception. */
31145 bool
31146 arm_coproc_builtin_available (enum unspecv builtin)
31148 /* None of these builtins are available in Thumb mode if the target only
31149 supports Thumb-1. */
31150 if (TARGET_THUMB1)
31151 return false;
31153 switch (builtin)
31155 case VUNSPEC_CDP:
31156 case VUNSPEC_LDC:
31157 case VUNSPEC_LDCL:
31158 case VUNSPEC_STC:
31159 case VUNSPEC_STCL:
31160 case VUNSPEC_MCR:
31161 case VUNSPEC_MRC:
31162 if (arm_arch4)
31163 return true;
31164 break;
31165 case VUNSPEC_CDP2:
31166 case VUNSPEC_LDC2:
31167 case VUNSPEC_LDC2L:
31168 case VUNSPEC_STC2:
31169 case VUNSPEC_STC2L:
31170 case VUNSPEC_MCR2:
31171 case VUNSPEC_MRC2:
31172 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31173 ARMv8-{A,M}. */
31174 if (arm_arch5)
31175 return true;
31176 break;
31177 case VUNSPEC_MCRR:
31178 case VUNSPEC_MRRC:
31179 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31180 ARMv8-{A,M}. */
31181 if (arm_arch6 || arm_arch5te)
31182 return true;
31183 break;
31184 case VUNSPEC_MCRR2:
31185 case VUNSPEC_MRRC2:
31186 if (arm_arch6)
31187 return true;
31188 break;
31189 default:
31190 gcc_unreachable ();
31192 return false;
31195 /* This function returns true if OP is a valid memory operand for the ldc and
31196 stc coprocessor instructions and false otherwise. */
31198 bool
31199 arm_coproc_ldc_stc_legitimate_address (rtx op)
31201 HOST_WIDE_INT range;
31202 /* Has to be a memory operand. */
31203 if (!MEM_P (op))
31204 return false;
31206 op = XEXP (op, 0);
31208 /* We accept registers. */
31209 if (REG_P (op))
31210 return true;
31212 switch GET_CODE (op)
31214 case PLUS:
31216 /* Or registers with an offset. */
31217 if (!REG_P (XEXP (op, 0)))
31218 return false;
31220 op = XEXP (op, 1);
31222 /* The offset must be an immediate though. */
31223 if (!CONST_INT_P (op))
31224 return false;
31226 range = INTVAL (op);
31228 /* Within the range of [-1020,1020]. */
31229 if (!IN_RANGE (range, -1020, 1020))
31230 return false;
31232 /* And a multiple of 4. */
31233 return (range % 4) == 0;
31235 case PRE_INC:
31236 case POST_INC:
31237 case PRE_DEC:
31238 case POST_DEC:
31239 return REG_P (XEXP (op, 0));
31240 default:
31241 gcc_unreachable ();
31243 return false;
31246 #if CHECKING_P
31247 namespace selftest {
31249 /* Scan the static data tables generated by parsecpu.awk looking for
31250 potential issues with the data. We primarily check for
31251 inconsistencies in the option extensions at present (extensions
31252 that duplicate others but aren't marked as aliases). Furthermore,
31253 for correct canonicalization later options must never be a subset
31254 of an earlier option. Any extension should also only specify other
31255 feature bits and never an architecture bit. The architecture is inferred
31256 from the declaration of the extension. */
31257 static void
31258 arm_test_cpu_arch_data (void)
31260 const arch_option *arch;
31261 const cpu_option *cpu;
31262 auto_sbitmap target_isa (isa_num_bits);
31263 auto_sbitmap isa1 (isa_num_bits);
31264 auto_sbitmap isa2 (isa_num_bits);
31266 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31268 const cpu_arch_extension *ext1, *ext2;
31270 if (arch->common.extensions == NULL)
31271 continue;
31273 arm_initialize_isa (target_isa, arch->common.isa_bits);
31275 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31277 if (ext1->alias)
31278 continue;
31280 arm_initialize_isa (isa1, ext1->isa_bits);
31281 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31283 if (ext2->alias || ext1->remove != ext2->remove)
31284 continue;
31286 arm_initialize_isa (isa2, ext2->isa_bits);
31287 /* If the option is a subset of the parent option, it doesn't
31288 add anything and so isn't useful. */
31289 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31291 /* If the extension specifies any architectural bits then
31292 disallow it. Extensions should only specify feature bits. */
31293 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31298 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31300 const cpu_arch_extension *ext1, *ext2;
31302 if (cpu->common.extensions == NULL)
31303 continue;
31305 arm_initialize_isa (target_isa, arch->common.isa_bits);
31307 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31309 if (ext1->alias)
31310 continue;
31312 arm_initialize_isa (isa1, ext1->isa_bits);
31313 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31315 if (ext2->alias || ext1->remove != ext2->remove)
31316 continue;
31318 arm_initialize_isa (isa2, ext2->isa_bits);
31319 /* If the option is a subset of the parent option, it doesn't
31320 add anything and so isn't useful. */
31321 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31323 /* If the extension specifies any architectural bits then
31324 disallow it. Extensions should only specify feature bits. */
31325 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31331 static void
31332 arm_run_selftests (void)
31334 arm_test_cpu_arch_data ();
31336 } /* Namespace selftest. */
31338 #undef TARGET_RUN_TARGET_SELFTESTS
31339 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31340 #endif /* CHECKING_P */
31342 struct gcc_target targetm = TARGET_INITIALIZER;
31344 #include "gt-arm.h"